xref: /freebsd/contrib/llvm-project/lld/ELF/LinkerScript.h (revision 16d6b3b3da62aa5baaf3c66c8d4e6f8c8f70aeb7)
10b57cec5SDimitry Andric //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #ifndef LLD_ELF_LINKER_SCRIPT_H
100b57cec5SDimitry Andric #define LLD_ELF_LINKER_SCRIPT_H
110b57cec5SDimitry Andric 
120b57cec5SDimitry Andric #include "Config.h"
130b57cec5SDimitry Andric #include "Writer.h"
140b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
150b57cec5SDimitry Andric #include "lld/Common/Strings.h"
160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
180b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
190b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
210b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
220b57cec5SDimitry Andric #include <cstddef>
230b57cec5SDimitry Andric #include <cstdint>
240b57cec5SDimitry Andric #include <functional>
250b57cec5SDimitry Andric #include <memory>
260b57cec5SDimitry Andric #include <vector>
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric namespace lld {
290b57cec5SDimitry Andric namespace elf {
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric class Defined;
320b57cec5SDimitry Andric class InputSection;
330b57cec5SDimitry Andric class InputSectionBase;
340b57cec5SDimitry Andric class OutputSection;
350b57cec5SDimitry Andric class SectionBase;
360b57cec5SDimitry Andric class Symbol;
370b57cec5SDimitry Andric class ThunkSection;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric // This represents an r-value in the linker script.
400b57cec5SDimitry Andric struct ExprValue {
410b57cec5SDimitry Andric   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
420b57cec5SDimitry Andric             const Twine &loc)
430b57cec5SDimitry Andric       : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
480b57cec5SDimitry Andric   uint64_t getValue() const;
490b57cec5SDimitry Andric   uint64_t getSecAddr() const;
500b57cec5SDimitry Andric   uint64_t getSectionOffset() const;
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   // If a value is relative to a section, it has a non-null Sec.
530b57cec5SDimitry Andric   SectionBase *sec;
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric   // True if this expression is enclosed in ABSOLUTE().
560b57cec5SDimitry Andric   // This flag affects the return value of getValue().
570b57cec5SDimitry Andric   bool forceAbsolute;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   uint64_t val;
600b57cec5SDimitry Andric   uint64_t alignment = 1;
610b57cec5SDimitry Andric 
62*16d6b3b3SDimitry Andric   // The original st_type if the expression represents a symbol. Any operation
63*16d6b3b3SDimitry Andric   // resets type to STT_NOTYPE.
64*16d6b3b3SDimitry Andric   uint8_t type = llvm::ELF::STT_NOTYPE;
65*16d6b3b3SDimitry Andric 
660b57cec5SDimitry Andric   // Original source location. Used for error messages.
670b57cec5SDimitry Andric   std::string loc;
680b57cec5SDimitry Andric };
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric // This represents an expression in the linker script.
710b57cec5SDimitry Andric // ScriptParser::readExpr reads an expression and returns an Expr.
720b57cec5SDimitry Andric // Later, we evaluate the expression by calling the function.
730b57cec5SDimitry Andric using Expr = std::function<ExprValue()>;
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric // This enum is used to implement linker script SECTIONS command.
760b57cec5SDimitry Andric // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
770b57cec5SDimitry Andric enum SectionsCommandKind {
780b57cec5SDimitry Andric   AssignmentKind, // . = expr or <sym> = expr
790b57cec5SDimitry Andric   OutputSectionKind,
800b57cec5SDimitry Andric   InputSectionKind,
810b57cec5SDimitry Andric   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
820b57cec5SDimitry Andric };
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric struct BaseCommand {
850b57cec5SDimitry Andric   BaseCommand(int k) : kind(k) {}
860b57cec5SDimitry Andric   int kind;
870b57cec5SDimitry Andric };
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric // This represents ". = <expr>" or "<symbol> = <expr>".
900b57cec5SDimitry Andric struct SymbolAssignment : BaseCommand {
910b57cec5SDimitry Andric   SymbolAssignment(StringRef name, Expr e, std::string loc)
920b57cec5SDimitry Andric       : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) {
950b57cec5SDimitry Andric     return c->kind == AssignmentKind;
960b57cec5SDimitry Andric   }
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric   // The LHS of an expression. Name is either a symbol name or ".".
990b57cec5SDimitry Andric   StringRef name;
1000b57cec5SDimitry Andric   Defined *sym = nullptr;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   // The RHS of an expression.
1030b57cec5SDimitry Andric   Expr expression;
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
1060b57cec5SDimitry Andric   bool provide = false;
1070b57cec5SDimitry Andric   bool hidden = false;
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric   // Holds file name and line number for error reporting.
1100b57cec5SDimitry Andric   std::string location;
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric   // A string representation of this command. We use this for -Map.
1130b57cec5SDimitry Andric   std::string commandString;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   // Address of this assignment command.
1165ffd83dbSDimitry Andric   uint64_t addr;
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric   // Size of this assignment command. This is usually 0, but if
1190b57cec5SDimitry Andric   // you move '.' this may be greater than 0.
1205ffd83dbSDimitry Andric   uint64_t size;
1210b57cec5SDimitry Andric };
1220b57cec5SDimitry Andric 
123480093f4SDimitry Andric // Linker scripts allow additional constraints to be put on output sections.
1240b57cec5SDimitry Andric // If an output section is marked as ONLY_IF_RO, the section is created
1250b57cec5SDimitry Andric // only if its input sections are read-only. Likewise, an output section
1260b57cec5SDimitry Andric // with ONLY_IF_RW is created if all input sections are RW.
1270b57cec5SDimitry Andric enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric // This struct is used to represent the location and size of regions of
1300b57cec5SDimitry Andric // target memory. Instances of the struct are created by parsing the
1310b57cec5SDimitry Andric // MEMORY command.
1320b57cec5SDimitry Andric struct MemoryRegion {
1335ffd83dbSDimitry Andric   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
1340b57cec5SDimitry Andric                uint32_t negFlags)
1355ffd83dbSDimitry Andric       : name(std::string(name)), origin(origin), length(length), flags(flags),
1360b57cec5SDimitry Andric         negFlags(negFlags) {}
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   std::string name;
1395ffd83dbSDimitry Andric   Expr origin;
1405ffd83dbSDimitry Andric   Expr length;
1410b57cec5SDimitry Andric   uint32_t flags;
1420b57cec5SDimitry Andric   uint32_t negFlags;
1430b57cec5SDimitry Andric   uint64_t curPos = 0;
1440b57cec5SDimitry Andric };
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric // This struct represents one section match pattern in SECTIONS() command.
1470b57cec5SDimitry Andric // It can optionally have negative match pattern for EXCLUDED_FILE command.
1480b57cec5SDimitry Andric // Also it may be surrounded with SORT() command, so contains sorting rules.
1490b57cec5SDimitry Andric struct SectionPattern {
1500b57cec5SDimitry Andric   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
1510b57cec5SDimitry Andric       : excludedFilePat(pat1), sectionPat(pat2),
1520b57cec5SDimitry Andric         sortOuter(SortSectionPolicy::Default),
1530b57cec5SDimitry Andric         sortInner(SortSectionPolicy::Default) {}
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric   StringMatcher excludedFilePat;
1560b57cec5SDimitry Andric   StringMatcher sectionPat;
1570b57cec5SDimitry Andric   SortSectionPolicy sortOuter;
1580b57cec5SDimitry Andric   SortSectionPolicy sortInner;
1590b57cec5SDimitry Andric };
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric struct InputSectionDescription : BaseCommand {
1625ffd83dbSDimitry Andric   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
1635ffd83dbSDimitry Andric                           uint64_t withoutFlags = 0)
1645ffd83dbSDimitry Andric       : BaseCommand(InputSectionKind), filePat(filePattern),
1655ffd83dbSDimitry Andric         withFlags(withFlags), withoutFlags(withoutFlags) {}
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) {
1680b57cec5SDimitry Andric     return c->kind == InputSectionKind;
1690b57cec5SDimitry Andric   }
1700b57cec5SDimitry Andric 
1715ffd83dbSDimitry Andric   SingleStringMatcher filePat;
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   // Input sections that matches at least one of SectionPatterns
1740b57cec5SDimitry Andric   // will be associated with this InputSectionDescription.
1750b57cec5SDimitry Andric   std::vector<SectionPattern> sectionPatterns;
1760b57cec5SDimitry Andric 
17785868e8aSDimitry Andric   // Includes InputSections and MergeInputSections. Used temporarily during
17885868e8aSDimitry Andric   // assignment of input sections to output sections.
17985868e8aSDimitry Andric   std::vector<InputSectionBase *> sectionBases;
18085868e8aSDimitry Andric 
18185868e8aSDimitry Andric   // Used after the finalizeInputSections() pass. MergeInputSections have been
18285868e8aSDimitry Andric   // merged into MergeSyntheticSections.
1830b57cec5SDimitry Andric   std::vector<InputSection *> sections;
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   // Temporary record of synthetic ThunkSection instances and the pass that
1860b57cec5SDimitry Andric   // they were created in. This is used to insert newly created ThunkSections
1870b57cec5SDimitry Andric   // into Sections at the end of a createThunks() pass.
1880b57cec5SDimitry Andric   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
1895ffd83dbSDimitry Andric 
1905ffd83dbSDimitry Andric   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
1915ffd83dbSDimitry Andric   uint64_t withFlags;
1925ffd83dbSDimitry Andric   uint64_t withoutFlags;
1930b57cec5SDimitry Andric };
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric // Represents BYTE(), SHORT(), LONG(), or QUAD().
1960b57cec5SDimitry Andric struct ByteCommand : BaseCommand {
1970b57cec5SDimitry Andric   ByteCommand(Expr e, unsigned size, std::string commandString)
1980b57cec5SDimitry Andric       : BaseCommand(ByteKind), commandString(commandString), expression(e),
1990b57cec5SDimitry Andric         size(size) {}
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric   // Keeps string representing the command. Used for -Map" is perhaps better.
2040b57cec5SDimitry Andric   std::string commandString;
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   Expr expression;
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   // This is just an offset of this assignment command in the output section.
2090b57cec5SDimitry Andric   unsigned offset;
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric   // Size of this data command.
2120b57cec5SDimitry Andric   unsigned size;
2130b57cec5SDimitry Andric };
2140b57cec5SDimitry Andric 
2155ffd83dbSDimitry Andric struct InsertCommand {
2165ffd83dbSDimitry Andric   OutputSection *os;
2175ffd83dbSDimitry Andric   bool isAfter;
2185ffd83dbSDimitry Andric   StringRef where;
2195ffd83dbSDimitry Andric };
2205ffd83dbSDimitry Andric 
2210b57cec5SDimitry Andric struct PhdrsCommand {
2220b57cec5SDimitry Andric   StringRef name;
2230b57cec5SDimitry Andric   unsigned type = llvm::ELF::PT_NULL;
2240b57cec5SDimitry Andric   bool hasFilehdr = false;
2250b57cec5SDimitry Andric   bool hasPhdrs = false;
2260b57cec5SDimitry Andric   llvm::Optional<unsigned> flags;
2270b57cec5SDimitry Andric   Expr lmaExpr = nullptr;
2280b57cec5SDimitry Andric };
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric class LinkerScript final {
2310b57cec5SDimitry Andric   // Temporary state used in processSectionCommands() and assignAddresses()
2320b57cec5SDimitry Andric   // that must be reinitialized for each call to the above functions, and must
2330b57cec5SDimitry Andric   // not be used outside of the scope of a call to the above functions.
2340b57cec5SDimitry Andric   struct AddressState {
2350b57cec5SDimitry Andric     AddressState();
2360b57cec5SDimitry Andric     uint64_t threadBssOffset = 0;
2370b57cec5SDimitry Andric     OutputSection *outSec = nullptr;
2380b57cec5SDimitry Andric     MemoryRegion *memRegion = nullptr;
2390b57cec5SDimitry Andric     MemoryRegion *lmaRegion = nullptr;
2400b57cec5SDimitry Andric     uint64_t lmaOffset = 0;
2410b57cec5SDimitry Andric   };
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   void addSymbol(SymbolAssignment *cmd);
2460b57cec5SDimitry Andric   void assignSymbol(SymbolAssignment *cmd, bool inSec);
2470b57cec5SDimitry Andric   void setDot(Expr e, const Twine &loc, bool inSec);
2480b57cec5SDimitry Andric   void expandOutputSection(uint64_t size);
2490b57cec5SDimitry Andric   void expandMemoryRegions(uint64_t size);
2500b57cec5SDimitry Andric 
25185868e8aSDimitry Andric   std::vector<InputSectionBase *>
2525ffd83dbSDimitry Andric   computeInputSections(const InputSectionDescription *,
2535ffd83dbSDimitry Andric                        ArrayRef<InputSectionBase *>);
2540b57cec5SDimitry Andric 
25585868e8aSDimitry Andric   std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
2560b57cec5SDimitry Andric 
2575ffd83dbSDimitry Andric   void discardSynthetic(OutputSection &);
2585ffd83dbSDimitry Andric 
2590b57cec5SDimitry Andric   std::vector<size_t> getPhdrIndices(OutputSection *sec);
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   MemoryRegion *findMemoryRegion(OutputSection *sec);
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric   void switchTo(OutputSection *sec);
2640b57cec5SDimitry Andric   uint64_t advance(uint64_t size, unsigned align);
2650b57cec5SDimitry Andric   void output(InputSection *sec);
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   void assignOffsets(OutputSection *sec);
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   // Ctx captures the local AddressState and makes it accessible
2700b57cec5SDimitry Andric   // deliberately. This is needed as there are some cases where we cannot just
2710b57cec5SDimitry Andric   // thread the current state through to a lambda function created by the
2720b57cec5SDimitry Andric   // script parser.
2730b57cec5SDimitry Andric   // This should remain a plain pointer as its lifetime is smaller than
2740b57cec5SDimitry Andric   // LinkerScript.
2750b57cec5SDimitry Andric   AddressState *ctx = nullptr;
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric   OutputSection *aether;
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   uint64_t dot;
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric public:
2820b57cec5SDimitry Andric   OutputSection *createOutputSection(StringRef name, StringRef location);
2830b57cec5SDimitry Andric   OutputSection *getOrCreateOutputSection(StringRef name);
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
2860b57cec5SDimitry Andric   uint64_t getDot() { return dot; }
28785868e8aSDimitry Andric   void discard(InputSectionBase *s);
2880b57cec5SDimitry Andric 
2890b57cec5SDimitry Andric   ExprValue getSymbolValue(StringRef name, const Twine &loc);
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric   void addOrphanSections();
2925ffd83dbSDimitry Andric   void diagnoseOrphanHandling() const;
2930b57cec5SDimitry Andric   void adjustSectionsBeforeSorting();
2940b57cec5SDimitry Andric   void adjustSectionsAfterSorting();
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   std::vector<PhdrEntry *> createPhdrs();
2970b57cec5SDimitry Andric   bool needsInterpSection();
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric   bool shouldKeep(InputSectionBase *s);
30085868e8aSDimitry Andric   const Defined *assignAddresses();
3010b57cec5SDimitry Andric   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
3020b57cec5SDimitry Andric   void processSectionCommands();
30385868e8aSDimitry Andric   void processSymbolAssignments();
3040b57cec5SDimitry Andric   void declareSymbols();
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   // Used to handle INSERT AFTER statements.
3070b57cec5SDimitry Andric   void processInsertCommands();
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   // SECTIONS command list.
3100b57cec5SDimitry Andric   std::vector<BaseCommand *> sectionCommands;
3110b57cec5SDimitry Andric 
3120b57cec5SDimitry Andric   // PHDRS command list.
3130b57cec5SDimitry Andric   std::vector<PhdrsCommand> phdrsCommands;
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   bool hasSectionsCommand = false;
3160b57cec5SDimitry Andric   bool errorOnMissingSection = false;
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   // List of section patterns specified with KEEP commands. They will
3190b57cec5SDimitry Andric   // be kept even if they are unused and --gc-sections is specified.
3200b57cec5SDimitry Andric   std::vector<InputSectionDescription *> keptSections;
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric   // A map from memory region name to a memory region descriptor.
3230b57cec5SDimitry Andric   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   // A list of symbols referenced by the script.
3260b57cec5SDimitry Andric   std::vector<llvm::StringRef> referencedSymbols;
3270b57cec5SDimitry Andric 
3285ffd83dbSDimitry Andric   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
3295ffd83dbSDimitry Andric   // to be reordered.
3305ffd83dbSDimitry Andric   std::vector<InsertCommand> insertCommands;
3315ffd83dbSDimitry Andric 
3325ffd83dbSDimitry Andric   // Sections that will be warned/errored by --orphan-handling.
3335ffd83dbSDimitry Andric   std::vector<const InputSectionBase *> orphanSections;
3340b57cec5SDimitry Andric };
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric extern LinkerScript *script;
3370b57cec5SDimitry Andric 
3380b57cec5SDimitry Andric } // end namespace elf
3390b57cec5SDimitry Andric } // end namespace lld
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric #endif // LLD_ELF_LINKER_SCRIPT_H
342