xref: /freebsd/contrib/llvm-project/lld/ELF/Symbols.h (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
10b57cec5SDimitry Andric //===- Symbols.h ------------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines various types of Symbols.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLD_ELF_SYMBOLS_H
140b57cec5SDimitry Andric #define LLD_ELF_SYMBOLS_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "InputFiles.h"
170b57cec5SDimitry Andric #include "InputSection.h"
180b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
19*0eae32dcSDimitry Andric #include "lld/Common/Memory.h"
200b57cec5SDimitry Andric #include "lld/Common/Strings.h"
215ffd83dbSDimitry Andric #include "llvm/ADT/DenseMap.h"
220b57cec5SDimitry Andric #include "llvm/Object/Archive.h"
230b57cec5SDimitry Andric #include "llvm/Object/ELF.h"
24349cc55cSDimitry Andric #include <tuple>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace lld {
275ffd83dbSDimitry Andric // Returns a string representation for a symbol for diagnostics.
2885868e8aSDimitry Andric std::string toString(const elf::Symbol &);
2985868e8aSDimitry Andric 
3085868e8aSDimitry Andric // There are two different ways to convert an Archive::Symbol to a string:
3185868e8aSDimitry Andric // One for Microsoft name mangling and one for Itanium name mangling.
3285868e8aSDimitry Andric // Call the functions toCOFFString and toELFString, not just toString.
3385868e8aSDimitry Andric std::string toELFString(const llvm::object::Archive::Symbol &);
3485868e8aSDimitry Andric 
350b57cec5SDimitry Andric namespace elf {
360b57cec5SDimitry Andric class CommonSymbol;
370b57cec5SDimitry Andric class Defined;
380b57cec5SDimitry Andric class InputFile;
390b57cec5SDimitry Andric class LazyArchive;
400b57cec5SDimitry Andric class LazyObject;
410b57cec5SDimitry Andric class SharedSymbol;
420b57cec5SDimitry Andric class Symbol;
430b57cec5SDimitry Andric class Undefined;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric // This is a StringRef-like container that doesn't run strlen().
460b57cec5SDimitry Andric //
470b57cec5SDimitry Andric // ELF string tables contain a lot of null-terminated strings. Most of them
480b57cec5SDimitry Andric // are not necessary for the linker because they are names of local symbols,
490b57cec5SDimitry Andric // and the linker doesn't use local symbol names for name resolution. So, we
500b57cec5SDimitry Andric // use this class to represents strings read from string tables.
510b57cec5SDimitry Andric struct StringRefZ {
520b57cec5SDimitry Andric   StringRefZ(const char *s) : data(s), size(-1) {}
530b57cec5SDimitry Andric   StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric   const char *data;
560b57cec5SDimitry Andric   const uint32_t size;
570b57cec5SDimitry Andric };
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric // The base class for real symbol classes.
600b57cec5SDimitry Andric class Symbol {
610b57cec5SDimitry Andric public:
620b57cec5SDimitry Andric   enum Kind {
630b57cec5SDimitry Andric     PlaceholderKind,
640b57cec5SDimitry Andric     DefinedKind,
650b57cec5SDimitry Andric     CommonKind,
660b57cec5SDimitry Andric     SharedKind,
670b57cec5SDimitry Andric     UndefinedKind,
680b57cec5SDimitry Andric     LazyArchiveKind,
690b57cec5SDimitry Andric     LazyObjectKind,
700b57cec5SDimitry Andric   };
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   Kind kind() const { return static_cast<Kind>(symbolKind); }
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   // The file from which this symbol was created.
750b57cec5SDimitry Andric   InputFile *file;
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric protected:
780b57cec5SDimitry Andric   const char *nameData;
790b57cec5SDimitry Andric   mutable uint32_t nameSize;
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric public:
820b57cec5SDimitry Andric   uint32_t dynsymIndex = 0;
830b57cec5SDimitry Andric   uint32_t gotIndex = -1;
840b57cec5SDimitry Andric   uint32_t pltIndex = -1;
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   uint32_t globalDynIndex = -1;
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric   // This field is a index to the symbol's version definition.
89*0eae32dcSDimitry Andric   uint16_t verdefIndex = -1;
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   // Version definition index.
920b57cec5SDimitry Andric   uint16_t versionId;
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   // Symbol binding. This is not overwritten by replace() to track
950b57cec5SDimitry Andric   // changes during resolution. In particular:
960b57cec5SDimitry Andric   //  - An undefined weak is still weak when it resolves to a shared library.
974824e7fdSDimitry Andric   //  - An undefined weak will not extract archive members, but we have to
980b57cec5SDimitry Andric   //    remember it is weak.
990b57cec5SDimitry Andric   uint8_t binding;
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric   // The following fields have the same meaning as the ELF symbol attributes.
1020b57cec5SDimitry Andric   uint8_t type;    // symbol type
1030b57cec5SDimitry Andric   uint8_t stOther; // st_other field value
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   uint8_t symbolKind;
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric   // Symbol visibility. This is the computed minimum visibility of all
1080b57cec5SDimitry Andric   // observed non-DSO symbols.
109480093f4SDimitry Andric   uint8_t visibility : 2;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric   // True if the symbol was used for linking and thus need to be added to the
1120b57cec5SDimitry Andric   // output file's symbol table. This is true for all symbols except for
1130b57cec5SDimitry Andric   // unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
1140b57cec5SDimitry Andric   // are unreferenced except by other bitcode objects.
115480093f4SDimitry Andric   uint8_t isUsedInRegularObj : 1;
1160b57cec5SDimitry Andric 
11785868e8aSDimitry Andric   // Used by a Defined symbol with protected or default visibility, to record
11885868e8aSDimitry Andric   // whether it is required to be exported into .dynsym. This is set when any of
11985868e8aSDimitry Andric   // the following conditions hold:
12085868e8aSDimitry Andric   //
12185868e8aSDimitry Andric   // - If there is an interposable symbol from a DSO.
12285868e8aSDimitry Andric   // - If -shared or --export-dynamic is specified, any symbol in an object
12385868e8aSDimitry Andric   //   file/bitcode sets this property, unless suppressed by LTO
12485868e8aSDimitry Andric   //   canBeOmittedFromSymbolTable().
125480093f4SDimitry Andric   uint8_t exportDynamic : 1;
12685868e8aSDimitry Andric 
12785868e8aSDimitry Andric   // True if the symbol is in the --dynamic-list file. A Defined symbol with
12885868e8aSDimitry Andric   // protected or default visibility with this property is required to be
12985868e8aSDimitry Andric   // exported into .dynsym.
130480093f4SDimitry Andric   uint8_t inDynamicList : 1;
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric   // False if LTO shouldn't inline whatever this symbol points to. If a symbol
1330b57cec5SDimitry Andric   // is overwritten after LTO, LTO shouldn't inline the symbol because it
1340b57cec5SDimitry Andric   // doesn't know the final contents of the symbol.
135480093f4SDimitry Andric   uint8_t canInline : 1;
13685868e8aSDimitry Andric 
137e8d8bef9SDimitry Andric   // Used to track if there has been at least one undefined reference to the
138e8d8bef9SDimitry Andric   // symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK
139e8d8bef9SDimitry Andric   // if the first undefined reference from a non-shared object is weak.
140e8d8bef9SDimitry Andric   //
141e8d8bef9SDimitry Andric   // This is also used to retain __wrap_foo when foo is referenced.
142480093f4SDimitry Andric   uint8_t referenced : 1;
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   // True if this symbol is specified by --trace-symbol option.
145480093f4SDimitry Andric   uint8_t traced : 1;
1460b57cec5SDimitry Andric 
14785868e8aSDimitry Andric   inline void replace(const Symbol &newSym);
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   bool includeInDynsym() const;
1500b57cec5SDimitry Andric   uint8_t computeBinding() const;
1510b57cec5SDimitry Andric   bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric   bool isUndefined() const { return symbolKind == UndefinedKind; }
1540b57cec5SDimitry Andric   bool isCommon() const { return symbolKind == CommonKind; }
1550b57cec5SDimitry Andric   bool isDefined() const { return symbolKind == DefinedKind; }
1560b57cec5SDimitry Andric   bool isShared() const { return symbolKind == SharedKind; }
1570b57cec5SDimitry Andric   bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric   bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   bool isLazy() const {
1620b57cec5SDimitry Andric     return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;
1630b57cec5SDimitry Andric   }
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric   // True if this is an undefined weak symbol. This only works once
1660b57cec5SDimitry Andric   // all input files have been added.
167349cc55cSDimitry Andric   bool isUndefWeak() const { return isWeak() && isUndefined(); }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   StringRef getName() const {
1700b57cec5SDimitry Andric     if (nameSize == (uint32_t)-1)
1710b57cec5SDimitry Andric       nameSize = strlen(nameData);
1720b57cec5SDimitry Andric     return {nameData, nameSize};
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric   void setName(StringRef s) {
1760b57cec5SDimitry Andric     nameData = s.data();
1770b57cec5SDimitry Andric     nameSize = s.size();
1780b57cec5SDimitry Andric   }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric   void parseSymbolVersion();
1810b57cec5SDimitry Andric 
182e8d8bef9SDimitry Andric   // Get the NUL-terminated version suffix ("", "@...", or "@@...").
183e8d8bef9SDimitry Andric   //
184e8d8bef9SDimitry Andric   // For @@, the name has been truncated by insert(). For @, the name has been
185e8d8bef9SDimitry Andric   // truncated by Symbol::parseSymbolVersion().
186e8d8bef9SDimitry Andric   const char *getVersionSuffix() const {
187e8d8bef9SDimitry Andric     (void)getName();
188e8d8bef9SDimitry Andric     return nameData + nameSize;
189e8d8bef9SDimitry Andric   }
190e8d8bef9SDimitry Andric 
1910b57cec5SDimitry Andric   bool isInGot() const { return gotIndex != -1U; }
1920b57cec5SDimitry Andric   bool isInPlt() const { return pltIndex != -1U; }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric   uint64_t getVA(int64_t addend = 0) const;
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric   uint64_t getGotOffset() const;
1970b57cec5SDimitry Andric   uint64_t getGotVA() const;
1980b57cec5SDimitry Andric   uint64_t getGotPltOffset() const;
1990b57cec5SDimitry Andric   uint64_t getGotPltVA() const;
2000b57cec5SDimitry Andric   uint64_t getPltVA() const;
2010b57cec5SDimitry Andric   uint64_t getSize() const;
2020b57cec5SDimitry Andric   OutputSection *getOutputSection() const;
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   // The following two functions are used for symbol resolution.
2050b57cec5SDimitry Andric   //
2060b57cec5SDimitry Andric   // You are expected to call mergeProperties for all symbols in input
2070b57cec5SDimitry Andric   // files so that attributes that are attached to names rather than
2080b57cec5SDimitry Andric   // indivisual symbol (such as visibility) are merged together.
2090b57cec5SDimitry Andric   //
2100b57cec5SDimitry Andric   // Every time you read a new symbol from an input, you are supposed
2110b57cec5SDimitry Andric   // to call resolve() with the new symbol. That function replaces
2120b57cec5SDimitry Andric   // "this" object as a result of name resolution if the new symbol is
2130b57cec5SDimitry Andric   // more appropriate to be included in the output.
2140b57cec5SDimitry Andric   //
2150b57cec5SDimitry Andric   // For example, if "this" is an undefined symbol and a new symbol is
2160b57cec5SDimitry Andric   // a defined symbol, "this" is replaced with the new symbol.
2170b57cec5SDimitry Andric   void mergeProperties(const Symbol &other);
2180b57cec5SDimitry Andric   void resolve(const Symbol &other);
2190b57cec5SDimitry Andric 
2204824e7fdSDimitry Andric   // If this is a lazy symbol, extract an input file and add the symbol
2210b57cec5SDimitry Andric   // in the file to the symbol table. Calling this function on
2220b57cec5SDimitry Andric   // non-lazy object causes a runtime error.
2234824e7fdSDimitry Andric   void extract() const;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   static bool isExportDynamic(Kind k, uint8_t visibility) {
2260b57cec5SDimitry Andric     if (k == SharedKind)
2270b57cec5SDimitry Andric       return visibility == llvm::ELF::STV_DEFAULT;
2280b57cec5SDimitry Andric     return config->shared || config->exportDynamic;
2290b57cec5SDimitry Andric   }
2300b57cec5SDimitry Andric 
231fe6060f1SDimitry Andric private:
2320b57cec5SDimitry Andric   void resolveUndefined(const Undefined &other);
2330b57cec5SDimitry Andric   void resolveCommon(const CommonSymbol &other);
2340b57cec5SDimitry Andric   void resolveDefined(const Defined &other);
2350b57cec5SDimitry Andric   template <class LazyT> void resolveLazy(const LazyT &other);
2360b57cec5SDimitry Andric   void resolveShared(const SharedSymbol &other);
2370b57cec5SDimitry Andric 
2380b57cec5SDimitry Andric   int compare(const Symbol *other) const;
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   inline size_t getSymbolSize() const;
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric protected:
2430b57cec5SDimitry Andric   Symbol(Kind k, InputFile *file, StringRefZ name, uint8_t binding,
2440b57cec5SDimitry Andric          uint8_t stOther, uint8_t type)
2450b57cec5SDimitry Andric       : file(file), nameData(name.data), nameSize(name.size), binding(binding),
2460b57cec5SDimitry Andric         type(type), stOther(stOther), symbolKind(k), visibility(stOther & 3),
2470b57cec5SDimitry Andric         isUsedInRegularObj(!file || file->kind() == InputFile::ObjKind),
24885868e8aSDimitry Andric         exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false),
249*0eae32dcSDimitry Andric         canInline(false), referenced(false), traced(false), isInIplt(false),
250*0eae32dcSDimitry Andric         gotInIgot(false), isPreemptible(false), used(!config->gcSections),
251*0eae32dcSDimitry Andric         folded(false), needsTocRestore(false), scriptDefined(false),
252*0eae32dcSDimitry Andric         needsCopy(false), needsGot(false), needsPlt(false), needsTlsDesc(false),
253*0eae32dcSDimitry Andric         needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false),
254*0eae32dcSDimitry Andric         needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {}
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric public:
2570b57cec5SDimitry Andric   // True if this symbol is in the Iplt sub-section of the Plt and the Igot
2580b57cec5SDimitry Andric   // sub-section of the .got.plt or .got.
259480093f4SDimitry Andric   uint8_t isInIplt : 1;
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   // True if this symbol needs a GOT entry and its GOT entry is actually in
2620b57cec5SDimitry Andric   // Igot. This will be true only for certain non-preemptible ifuncs.
263480093f4SDimitry Andric   uint8_t gotInIgot : 1;
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric   // True if this symbol is preemptible at load time.
266480093f4SDimitry Andric   uint8_t isPreemptible : 1;
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric   // True if an undefined or shared symbol is used from a live section.
2695ffd83dbSDimitry Andric   //
2705ffd83dbSDimitry Andric   // NOTE: In Writer.cpp the field is used to mark local defined symbols
2715ffd83dbSDimitry Andric   // which are referenced by relocations when -r or --emit-relocs is given.
272480093f4SDimitry Andric   uint8_t used : 1;
2730b57cec5SDimitry Andric 
274*0eae32dcSDimitry Andric   // True if defined relative to a section discarded by ICF.
275*0eae32dcSDimitry Andric   uint8_t folded : 1;
276*0eae32dcSDimitry Andric 
2770b57cec5SDimitry Andric   // True if a call to this symbol needs to be followed by a restore of the
2780b57cec5SDimitry Andric   // PPC64 toc pointer.
279480093f4SDimitry Andric   uint8_t needsTocRestore : 1;
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric   // True if this symbol is defined by a linker script.
282480093f4SDimitry Andric   uint8_t scriptDefined : 1;
2830b57cec5SDimitry Andric 
284*0eae32dcSDimitry Andric   // True if this symbol needs a canonical PLT entry, or (during
285*0eae32dcSDimitry Andric   // postScanRelocations) a copy relocation.
286*0eae32dcSDimitry Andric   uint8_t needsCopy : 1;
287*0eae32dcSDimitry Andric 
288*0eae32dcSDimitry Andric   // Temporary flags used to communicate which symbol entries need PLT and GOT
289*0eae32dcSDimitry Andric   // entries during postScanRelocations();
290*0eae32dcSDimitry Andric   uint8_t needsGot : 1;
291*0eae32dcSDimitry Andric   uint8_t needsPlt : 1;
292*0eae32dcSDimitry Andric   uint8_t needsTlsDesc : 1;
293*0eae32dcSDimitry Andric   uint8_t needsTlsGd : 1;
294*0eae32dcSDimitry Andric   uint8_t needsTlsGdToIe : 1;
295*0eae32dcSDimitry Andric   uint8_t needsTlsLd : 1;
296*0eae32dcSDimitry Andric   uint8_t needsGotDtprel : 1;
297*0eae32dcSDimitry Andric   uint8_t needsTlsIe : 1;
298*0eae32dcSDimitry Andric   uint8_t hasDirectReloc : 1;
299*0eae32dcSDimitry Andric 
3000b57cec5SDimitry Andric   // The partition whose dynamic symbol table contains this symbol's definition.
3010b57cec5SDimitry Andric   uint8_t partition = 1;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   bool isSection() const { return type == llvm::ELF::STT_SECTION; }
3040b57cec5SDimitry Andric   bool isTls() const { return type == llvm::ELF::STT_TLS; }
3050b57cec5SDimitry Andric   bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
3060b57cec5SDimitry Andric   bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
3070b57cec5SDimitry Andric   bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
3080b57cec5SDimitry Andric   bool isFile() const { return type == llvm::ELF::STT_FILE; }
3090b57cec5SDimitry Andric };
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric // Represents a symbol that is defined in the current output file.
3120b57cec5SDimitry Andric class Defined : public Symbol {
3130b57cec5SDimitry Andric public:
3140b57cec5SDimitry Andric   Defined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
3150b57cec5SDimitry Andric           uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
3160b57cec5SDimitry Andric       : Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
3170b57cec5SDimitry Andric         size(size), section(section) {}
3180b57cec5SDimitry Andric 
3190b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isDefined(); }
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   uint64_t value;
3220b57cec5SDimitry Andric   uint64_t size;
3230b57cec5SDimitry Andric   SectionBase *section;
3240b57cec5SDimitry Andric };
3250b57cec5SDimitry Andric 
3260b57cec5SDimitry Andric // Represents a common symbol.
3270b57cec5SDimitry Andric //
3280b57cec5SDimitry Andric // On Unix, it is traditionally allowed to write variable definitions
3290b57cec5SDimitry Andric // without initialization expressions (such as "int foo;") to header
3300b57cec5SDimitry Andric // files. Such definition is called "tentative definition".
3310b57cec5SDimitry Andric //
3320b57cec5SDimitry Andric // Using tentative definition is usually considered a bad practice
3330b57cec5SDimitry Andric // because you should write only declarations (such as "extern int
3340b57cec5SDimitry Andric // foo;") to header files. Nevertheless, the linker and the compiler
3350b57cec5SDimitry Andric // have to do something to support bad code by allowing duplicate
3360b57cec5SDimitry Andric // definitions for this particular case.
3370b57cec5SDimitry Andric //
3380b57cec5SDimitry Andric // Common symbols represent variable definitions without initializations.
339480093f4SDimitry Andric // The compiler creates common symbols when it sees variable definitions
3400b57cec5SDimitry Andric // without initialization (you can suppress this behavior and let the
3410b57cec5SDimitry Andric // compiler create a regular defined symbol by -fno-common).
3420b57cec5SDimitry Andric //
3430b57cec5SDimitry Andric // The linker allows common symbols to be replaced by regular defined
3440b57cec5SDimitry Andric // symbols. If there are remaining common symbols after name resolution is
3450b57cec5SDimitry Andric // complete, they are converted to regular defined symbols in a .bss
3460b57cec5SDimitry Andric // section. (Therefore, the later passes don't see any CommonSymbols.)
3470b57cec5SDimitry Andric class CommonSymbol : public Symbol {
3480b57cec5SDimitry Andric public:
3490b57cec5SDimitry Andric   CommonSymbol(InputFile *file, StringRefZ name, uint8_t binding,
3500b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
3510b57cec5SDimitry Andric       : Symbol(CommonKind, file, name, binding, stOther, type),
3520b57cec5SDimitry Andric         alignment(alignment), size(size) {}
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->isCommon(); }
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric   uint32_t alignment;
3570b57cec5SDimitry Andric   uint64_t size;
3580b57cec5SDimitry Andric };
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric class Undefined : public Symbol {
3610b57cec5SDimitry Andric public:
3620b57cec5SDimitry Andric   Undefined(InputFile *file, StringRefZ name, uint8_t binding, uint8_t stOther,
3630b57cec5SDimitry Andric             uint8_t type, uint32_t discardedSecIdx = 0)
3640b57cec5SDimitry Andric       : Symbol(UndefinedKind, file, name, binding, stOther, type),
3650b57cec5SDimitry Andric         discardedSecIdx(discardedSecIdx) {}
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   // The section index if in a discarded section, 0 otherwise.
3700b57cec5SDimitry Andric   uint32_t discardedSecIdx;
3710b57cec5SDimitry Andric };
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric class SharedSymbol : public Symbol {
3740b57cec5SDimitry Andric public:
3750b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
3780b57cec5SDimitry Andric                uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
379*0eae32dcSDimitry Andric                uint32_t alignment, uint16_t verdefIndex)
3800b57cec5SDimitry Andric       : Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
3810b57cec5SDimitry Andric         size(size), alignment(alignment) {
3820b57cec5SDimitry Andric     this->verdefIndex = verdefIndex;
3830b57cec5SDimitry Andric     // GNU ifunc is a mechanism to allow user-supplied functions to
3840b57cec5SDimitry Andric     // resolve PLT slot values at load-time. This is contrary to the
3850b57cec5SDimitry Andric     // regular symbol resolution scheme in which symbols are resolved just
3860b57cec5SDimitry Andric     // by name. Using this hook, you can program how symbols are solved
3870b57cec5SDimitry Andric     // for you program. For example, you can make "memcpy" to be resolved
3880b57cec5SDimitry Andric     // to a SSE-enabled version of memcpy only when a machine running the
3890b57cec5SDimitry Andric     // program supports the SSE instruction set.
3900b57cec5SDimitry Andric     //
3910b57cec5SDimitry Andric     // Naturally, such symbols should always be called through their PLT
3920b57cec5SDimitry Andric     // slots. What GNU ifunc symbols point to are resolver functions, and
3930b57cec5SDimitry Andric     // calling them directly doesn't make sense (unless you are writing a
3940b57cec5SDimitry Andric     // loader).
3950b57cec5SDimitry Andric     //
3960b57cec5SDimitry Andric     // For DSO symbols, we always call them through PLT slots anyway.
3970b57cec5SDimitry Andric     // So there's no difference between GNU ifunc and regular function
3980b57cec5SDimitry Andric     // symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
3990b57cec5SDimitry Andric     if (this->type == llvm::ELF::STT_GNU_IFUNC)
4000b57cec5SDimitry Andric       this->type = llvm::ELF::STT_FUNC;
4010b57cec5SDimitry Andric   }
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric   SharedFile &getFile() const { return *cast<SharedFile>(file); }
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   uint64_t value; // st_value
4060b57cec5SDimitry Andric   uint64_t size;  // st_size
4070b57cec5SDimitry Andric   uint32_t alignment;
4080b57cec5SDimitry Andric };
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric // LazyArchive and LazyObject represent a symbols that is not yet in the link,
4110b57cec5SDimitry Andric // but we know where to find it if needed. If the resolver finds both Undefined
4120b57cec5SDimitry Andric // and Lazy for the same name, it will ask the Lazy to load a file.
4130b57cec5SDimitry Andric //
4140b57cec5SDimitry Andric // A special complication is the handling of weak undefined symbols. They should
4150b57cec5SDimitry Andric // not load a file, but we have to remember we have seen both the weak undefined
4160b57cec5SDimitry Andric // and the lazy. We represent that with a lazy symbol with a weak binding. This
4170b57cec5SDimitry Andric // means that code looking for undefined symbols normally also has to take lazy
4180b57cec5SDimitry Andric // symbols into consideration.
4190b57cec5SDimitry Andric 
4200b57cec5SDimitry Andric // This class represents a symbol defined in an archive file. It is
4210b57cec5SDimitry Andric // created from an archive file header, and it knows how to load an
4220b57cec5SDimitry Andric // object file from an archive to replace itself with a defined
4230b57cec5SDimitry Andric // symbol.
4240b57cec5SDimitry Andric class LazyArchive : public Symbol {
4250b57cec5SDimitry Andric public:
4260b57cec5SDimitry Andric   LazyArchive(InputFile &file, const llvm::object::Archive::Symbol s)
4270b57cec5SDimitry Andric       : Symbol(LazyArchiveKind, &file, s.getName(), llvm::ELF::STB_GLOBAL,
4280b57cec5SDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE),
4290b57cec5SDimitry Andric         sym(s) {}
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric   MemoryBufferRef getMemberBuffer();
4340b57cec5SDimitry Andric 
4350b57cec5SDimitry Andric   const llvm::object::Archive::Symbol sym;
4360b57cec5SDimitry Andric };
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric // LazyObject symbols represents symbols in object files between
4390b57cec5SDimitry Andric // --start-lib and --end-lib options.
4400b57cec5SDimitry Andric class LazyObject : public Symbol {
4410b57cec5SDimitry Andric public:
4420b57cec5SDimitry Andric   LazyObject(InputFile &file, StringRef name)
4430b57cec5SDimitry Andric       : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL,
444*0eae32dcSDimitry Andric                llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {
445*0eae32dcSDimitry Andric     isUsedInRegularObj = false;
446*0eae32dcSDimitry Andric   }
4470b57cec5SDimitry Andric 
4480b57cec5SDimitry Andric   static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }
4490b57cec5SDimitry Andric };
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric // Some linker-generated symbols need to be created as
4520b57cec5SDimitry Andric // Defined symbols.
4530b57cec5SDimitry Andric struct ElfSym {
4540b57cec5SDimitry Andric   // __bss_start
4550b57cec5SDimitry Andric   static Defined *bss;
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   // etext and _etext
4580b57cec5SDimitry Andric   static Defined *etext1;
4590b57cec5SDimitry Andric   static Defined *etext2;
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric   // edata and _edata
4620b57cec5SDimitry Andric   static Defined *edata1;
4630b57cec5SDimitry Andric   static Defined *edata2;
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric   // end and _end
4660b57cec5SDimitry Andric   static Defined *end1;
4670b57cec5SDimitry Andric   static Defined *end2;
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
4700b57cec5SDimitry Andric   // be at some offset from the base of the .got section, usually 0 or
4710b57cec5SDimitry Andric   // the end of the .got.
4720b57cec5SDimitry Andric   static Defined *globalOffsetTable;
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric   // _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
4750b57cec5SDimitry Andric   static Defined *mipsGp;
4760b57cec5SDimitry Andric   static Defined *mipsGpDisp;
4770b57cec5SDimitry Andric   static Defined *mipsLocalGp;
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   // __rel{,a}_iplt_{start,end} symbols.
4800b57cec5SDimitry Andric   static Defined *relaIpltStart;
4810b57cec5SDimitry Andric   static Defined *relaIpltEnd;
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric   // __global_pointer$ for RISC-V.
4840b57cec5SDimitry Andric   static Defined *riscvGlobalPointer;
4850b57cec5SDimitry Andric 
4860b57cec5SDimitry Andric   // _TLS_MODULE_BASE_ on targets that support TLSDESC.
4870b57cec5SDimitry Andric   static Defined *tlsModuleBase;
4880b57cec5SDimitry Andric };
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric // A buffer class that is large enough to hold any Symbol-derived
4910b57cec5SDimitry Andric // object. We allocate memory using this class and instantiate a symbol
4920b57cec5SDimitry Andric // using the placement new.
4930b57cec5SDimitry Andric union SymbolUnion {
4940b57cec5SDimitry Andric   alignas(Defined) char a[sizeof(Defined)];
4950b57cec5SDimitry Andric   alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
4960b57cec5SDimitry Andric   alignas(Undefined) char c[sizeof(Undefined)];
4970b57cec5SDimitry Andric   alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
4980b57cec5SDimitry Andric   alignas(LazyArchive) char e[sizeof(LazyArchive)];
4990b57cec5SDimitry Andric   alignas(LazyObject) char f[sizeof(LazyObject)];
5000b57cec5SDimitry Andric };
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric // It is important to keep the size of SymbolUnion small for performance and
5030b57cec5SDimitry Andric // memory usage reasons. 80 bytes is a soft limit based on the size of Defined
5040b57cec5SDimitry Andric // on a 64-bit system.
5050b57cec5SDimitry Andric static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large");
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric template <typename T> struct AssertSymbol {
5080b57cec5SDimitry Andric   static_assert(std::is_trivially_destructible<T>(),
5090b57cec5SDimitry Andric                 "Symbol types must be trivially destructible");
5100b57cec5SDimitry Andric   static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");
5110b57cec5SDimitry Andric   static_assert(alignof(T) <= alignof(SymbolUnion),
5120b57cec5SDimitry Andric                 "SymbolUnion not aligned enough");
5130b57cec5SDimitry Andric };
5140b57cec5SDimitry Andric 
5150b57cec5SDimitry Andric static inline void assertSymbols() {
5160b57cec5SDimitry Andric   AssertSymbol<Defined>();
5170b57cec5SDimitry Andric   AssertSymbol<CommonSymbol>();
5180b57cec5SDimitry Andric   AssertSymbol<Undefined>();
5190b57cec5SDimitry Andric   AssertSymbol<SharedSymbol>();
5200b57cec5SDimitry Andric   AssertSymbol<LazyArchive>();
5210b57cec5SDimitry Andric   AssertSymbol<LazyObject>();
5220b57cec5SDimitry Andric }
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric void printTraceSymbol(const Symbol *sym);
5250b57cec5SDimitry Andric 
5260b57cec5SDimitry Andric size_t Symbol::getSymbolSize() const {
5270b57cec5SDimitry Andric   switch (kind()) {
5280b57cec5SDimitry Andric   case CommonKind:
5290b57cec5SDimitry Andric     return sizeof(CommonSymbol);
5300b57cec5SDimitry Andric   case DefinedKind:
5310b57cec5SDimitry Andric     return sizeof(Defined);
5320b57cec5SDimitry Andric   case LazyArchiveKind:
5330b57cec5SDimitry Andric     return sizeof(LazyArchive);
5340b57cec5SDimitry Andric   case LazyObjectKind:
5350b57cec5SDimitry Andric     return sizeof(LazyObject);
5360b57cec5SDimitry Andric   case SharedKind:
5370b57cec5SDimitry Andric     return sizeof(SharedSymbol);
5380b57cec5SDimitry Andric   case UndefinedKind:
5390b57cec5SDimitry Andric     return sizeof(Undefined);
5400b57cec5SDimitry Andric   case PlaceholderKind:
5410b57cec5SDimitry Andric     return sizeof(Symbol);
5420b57cec5SDimitry Andric   }
5430b57cec5SDimitry Andric   llvm_unreachable("unknown symbol kind");
5440b57cec5SDimitry Andric }
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric // replace() replaces "this" object with a given symbol by memcpy'ing
5470b57cec5SDimitry Andric // it over to "this". This function is called as a result of name
5480b57cec5SDimitry Andric // resolution, e.g. to replace an undefind symbol with a defined symbol.
54985868e8aSDimitry Andric void Symbol::replace(const Symbol &newSym) {
5500b57cec5SDimitry Andric   using llvm::ELF::STT_TLS;
5510b57cec5SDimitry Andric 
5525ffd83dbSDimitry Andric   // st_value of STT_TLS represents the assigned offset, not the actual address
5535ffd83dbSDimitry Andric   // which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can only be
5545ffd83dbSDimitry Andric   // referenced by special TLS relocations. It is usually an error if a STT_TLS
5555ffd83dbSDimitry Andric   // symbol is replaced by a non-STT_TLS symbol, vice versa. There are two
5565ffd83dbSDimitry Andric   // exceptions: (a) a STT_NOTYPE lazy/undefined symbol can be replaced by a
5575ffd83dbSDimitry Andric   // STT_TLS symbol, (b) a STT_TLS undefined symbol can be replaced by a
5585ffd83dbSDimitry Andric   // STT_NOTYPE lazy symbol.
5595ffd83dbSDimitry Andric   if (symbolKind != PlaceholderKind && !newSym.isLazy() &&
5605ffd83dbSDimitry Andric       (type == STT_TLS) != (newSym.type == STT_TLS) &&
5615ffd83dbSDimitry Andric       type != llvm::ELF::STT_NOTYPE)
5620b57cec5SDimitry Andric     error("TLS attribute mismatch: " + toString(*this) + "\n>>> defined in " +
56385868e8aSDimitry Andric           toString(newSym.file) + "\n>>> defined in " + toString(file));
5640b57cec5SDimitry Andric 
5650b57cec5SDimitry Andric   Symbol old = *this;
56685868e8aSDimitry Andric   memcpy(this, &newSym, newSym.getSymbolSize());
5670b57cec5SDimitry Andric 
56885868e8aSDimitry Andric   // old may be a placeholder. The referenced fields must be initialized in
56985868e8aSDimitry Andric   // SymbolTable::insert.
5700b57cec5SDimitry Andric   versionId = old.versionId;
5710b57cec5SDimitry Andric   visibility = old.visibility;
5720b57cec5SDimitry Andric   isUsedInRegularObj = old.isUsedInRegularObj;
5730b57cec5SDimitry Andric   exportDynamic = old.exportDynamic;
57485868e8aSDimitry Andric   inDynamicList = old.inDynamicList;
5750b57cec5SDimitry Andric   canInline = old.canInline;
57685868e8aSDimitry Andric   referenced = old.referenced;
5770b57cec5SDimitry Andric   traced = old.traced;
5780b57cec5SDimitry Andric   isPreemptible = old.isPreemptible;
5790b57cec5SDimitry Andric   scriptDefined = old.scriptDefined;
5800b57cec5SDimitry Andric   partition = old.partition;
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric   // Print out a log message if --trace-symbol was specified.
5830b57cec5SDimitry Andric   // This is for debugging.
5840b57cec5SDimitry Andric   if (traced)
5850b57cec5SDimitry Andric     printTraceSymbol(this);
5860b57cec5SDimitry Andric }
5870b57cec5SDimitry Andric 
588*0eae32dcSDimitry Andric template <typename... T> Defined *makeDefined(T &&...args) {
589*0eae32dcSDimitry Andric   return new (reinterpret_cast<Defined *>(
590*0eae32dcSDimitry Andric       getSpecificAllocSingleton<SymbolUnion>().Allocate()))
591*0eae32dcSDimitry Andric       Defined(std::forward<T>(args)...);
592*0eae32dcSDimitry Andric }
593*0eae32dcSDimitry Andric 
5940b57cec5SDimitry Andric void maybeWarnUnorderableSymbol(const Symbol *sym);
595480093f4SDimitry Andric bool computeIsPreemptible(const Symbol &sym);
5965ffd83dbSDimitry Andric void reportBackrefs();
5975ffd83dbSDimitry Andric 
5985ffd83dbSDimitry Andric // A mapping from a symbol to an InputFile referencing it backward. Used by
5995ffd83dbSDimitry Andric // --warn-backrefs.
600e8d8bef9SDimitry Andric extern llvm::DenseMap<const Symbol *,
601e8d8bef9SDimitry Andric                       std::pair<const InputFile *, const InputFile *>>
602e8d8bef9SDimitry Andric     backwardReferences;
603480093f4SDimitry Andric 
604349cc55cSDimitry Andric // A tuple of (reference, extractedFile, sym). Used by --why-extract=.
605349cc55cSDimitry Andric extern SmallVector<std::tuple<std::string, const InputFile *, const Symbol &>,
606349cc55cSDimitry Andric                    0>
607349cc55cSDimitry Andric     whyExtract;
608349cc55cSDimitry Andric 
6090b57cec5SDimitry Andric } // namespace elf
6100b57cec5SDimitry Andric } // namespace lld
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric #endif
613