10b57cec5SDimitry Andric //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #ifndef LLD_ELF_LINKER_SCRIPT_H 100b57cec5SDimitry Andric #define LLD_ELF_LINKER_SCRIPT_H 110b57cec5SDimitry Andric 120b57cec5SDimitry Andric #include "Config.h" 130b57cec5SDimitry Andric #include "Writer.h" 140b57cec5SDimitry Andric #include "lld/Common/LLVM.h" 150b57cec5SDimitry Andric #include "lld/Common/Strings.h" 160b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 170b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 180b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 190b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h" 200b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 210b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 220b57cec5SDimitry Andric #include <cstddef> 230b57cec5SDimitry Andric #include <cstdint> 240b57cec5SDimitry Andric #include <functional> 250b57cec5SDimitry Andric #include <memory> 260b57cec5SDimitry Andric #include <vector> 270b57cec5SDimitry Andric 280b57cec5SDimitry Andric namespace lld { 290b57cec5SDimitry Andric namespace elf { 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric class Defined; 320b57cec5SDimitry Andric class InputSection; 330b57cec5SDimitry Andric class InputSectionBase; 340b57cec5SDimitry Andric class OutputSection; 350b57cec5SDimitry Andric class SectionBase; 360b57cec5SDimitry Andric class Symbol; 370b57cec5SDimitry Andric class ThunkSection; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric // This represents an r-value in the linker script. 400b57cec5SDimitry Andric struct ExprValue { 410b57cec5SDimitry Andric ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 420b57cec5SDimitry Andric const Twine &loc) 430b57cec5SDimitry Andric : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 480b57cec5SDimitry Andric uint64_t getValue() const; 490b57cec5SDimitry Andric uint64_t getSecAddr() const; 500b57cec5SDimitry Andric uint64_t getSectionOffset() const; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric // If a value is relative to a section, it has a non-null Sec. 530b57cec5SDimitry Andric SectionBase *sec; 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric // True if this expression is enclosed in ABSOLUTE(). 560b57cec5SDimitry Andric // This flag affects the return value of getValue(). 570b57cec5SDimitry Andric bool forceAbsolute; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric uint64_t val; 600b57cec5SDimitry Andric uint64_t alignment = 1; 610b57cec5SDimitry Andric 62*16d6b3b3SDimitry Andric // The original st_type if the expression represents a symbol. Any operation 63*16d6b3b3SDimitry Andric // resets type to STT_NOTYPE. 64*16d6b3b3SDimitry Andric uint8_t type = llvm::ELF::STT_NOTYPE; 65*16d6b3b3SDimitry Andric 660b57cec5SDimitry Andric // Original source location. Used for error messages. 670b57cec5SDimitry Andric std::string loc; 680b57cec5SDimitry Andric }; 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric // This represents an expression in the linker script. 710b57cec5SDimitry Andric // ScriptParser::readExpr reads an expression and returns an Expr. 720b57cec5SDimitry Andric // Later, we evaluate the expression by calling the function. 730b57cec5SDimitry Andric using Expr = std::function<ExprValue()>; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // This enum is used to implement linker script SECTIONS command. 760b57cec5SDimitry Andric // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 770b57cec5SDimitry Andric enum SectionsCommandKind { 780b57cec5SDimitry Andric AssignmentKind, // . = expr or <sym> = expr 790b57cec5SDimitry Andric OutputSectionKind, 800b57cec5SDimitry Andric InputSectionKind, 810b57cec5SDimitry Andric ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 820b57cec5SDimitry Andric }; 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric struct BaseCommand { 850b57cec5SDimitry Andric BaseCommand(int k) : kind(k) {} 860b57cec5SDimitry Andric int kind; 870b57cec5SDimitry Andric }; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric // This represents ". = <expr>" or "<symbol> = <expr>". 900b57cec5SDimitry Andric struct SymbolAssignment : BaseCommand { 910b57cec5SDimitry Andric SymbolAssignment(StringRef name, Expr e, std::string loc) 920b57cec5SDimitry Andric : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { 950b57cec5SDimitry Andric return c->kind == AssignmentKind; 960b57cec5SDimitry Andric } 970b57cec5SDimitry Andric 980b57cec5SDimitry Andric // The LHS of an expression. Name is either a symbol name or ".". 990b57cec5SDimitry Andric StringRef name; 1000b57cec5SDimitry Andric Defined *sym = nullptr; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric // The RHS of an expression. 1030b57cec5SDimitry Andric Expr expression; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 1060b57cec5SDimitry Andric bool provide = false; 1070b57cec5SDimitry Andric bool hidden = false; 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric // Holds file name and line number for error reporting. 1100b57cec5SDimitry Andric std::string location; 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric // A string representation of this command. We use this for -Map. 1130b57cec5SDimitry Andric std::string commandString; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric // Address of this assignment command. 1165ffd83dbSDimitry Andric uint64_t addr; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric // Size of this assignment command. This is usually 0, but if 1190b57cec5SDimitry Andric // you move '.' this may be greater than 0. 1205ffd83dbSDimitry Andric uint64_t size; 1210b57cec5SDimitry Andric }; 1220b57cec5SDimitry Andric 123480093f4SDimitry Andric // Linker scripts allow additional constraints to be put on output sections. 1240b57cec5SDimitry Andric // If an output section is marked as ONLY_IF_RO, the section is created 1250b57cec5SDimitry Andric // only if its input sections are read-only. Likewise, an output section 1260b57cec5SDimitry Andric // with ONLY_IF_RW is created if all input sections are RW. 1270b57cec5SDimitry Andric enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric // This struct is used to represent the location and size of regions of 1300b57cec5SDimitry Andric // target memory. Instances of the struct are created by parsing the 1310b57cec5SDimitry Andric // MEMORY command. 1320b57cec5SDimitry Andric struct MemoryRegion { 1335ffd83dbSDimitry Andric MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 1340b57cec5SDimitry Andric uint32_t negFlags) 1355ffd83dbSDimitry Andric : name(std::string(name)), origin(origin), length(length), flags(flags), 1360b57cec5SDimitry Andric negFlags(negFlags) {} 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric std::string name; 1395ffd83dbSDimitry Andric Expr origin; 1405ffd83dbSDimitry Andric Expr length; 1410b57cec5SDimitry Andric uint32_t flags; 1420b57cec5SDimitry Andric uint32_t negFlags; 1430b57cec5SDimitry Andric uint64_t curPos = 0; 1440b57cec5SDimitry Andric }; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric // This struct represents one section match pattern in SECTIONS() command. 1470b57cec5SDimitry Andric // It can optionally have negative match pattern for EXCLUDED_FILE command. 1480b57cec5SDimitry Andric // Also it may be surrounded with SORT() command, so contains sorting rules. 1490b57cec5SDimitry Andric struct SectionPattern { 1500b57cec5SDimitry Andric SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 1510b57cec5SDimitry Andric : excludedFilePat(pat1), sectionPat(pat2), 1520b57cec5SDimitry Andric sortOuter(SortSectionPolicy::Default), 1530b57cec5SDimitry Andric sortInner(SortSectionPolicy::Default) {} 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric StringMatcher excludedFilePat; 1560b57cec5SDimitry Andric StringMatcher sectionPat; 1570b57cec5SDimitry Andric SortSectionPolicy sortOuter; 1580b57cec5SDimitry Andric SortSectionPolicy sortInner; 1590b57cec5SDimitry Andric }; 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric struct InputSectionDescription : BaseCommand { 1625ffd83dbSDimitry Andric InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 1635ffd83dbSDimitry Andric uint64_t withoutFlags = 0) 1645ffd83dbSDimitry Andric : BaseCommand(InputSectionKind), filePat(filePattern), 1655ffd83dbSDimitry Andric withFlags(withFlags), withoutFlags(withoutFlags) {} 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { 1680b57cec5SDimitry Andric return c->kind == InputSectionKind; 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1715ffd83dbSDimitry Andric SingleStringMatcher filePat; 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // Input sections that matches at least one of SectionPatterns 1740b57cec5SDimitry Andric // will be associated with this InputSectionDescription. 1750b57cec5SDimitry Andric std::vector<SectionPattern> sectionPatterns; 1760b57cec5SDimitry Andric 17785868e8aSDimitry Andric // Includes InputSections and MergeInputSections. Used temporarily during 17885868e8aSDimitry Andric // assignment of input sections to output sections. 17985868e8aSDimitry Andric std::vector<InputSectionBase *> sectionBases; 18085868e8aSDimitry Andric 18185868e8aSDimitry Andric // Used after the finalizeInputSections() pass. MergeInputSections have been 18285868e8aSDimitry Andric // merged into MergeSyntheticSections. 1830b57cec5SDimitry Andric std::vector<InputSection *> sections; 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric // Temporary record of synthetic ThunkSection instances and the pass that 1860b57cec5SDimitry Andric // they were created in. This is used to insert newly created ThunkSections 1870b57cec5SDimitry Andric // into Sections at the end of a createThunks() pass. 1880b57cec5SDimitry Andric std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 1915ffd83dbSDimitry Andric uint64_t withFlags; 1925ffd83dbSDimitry Andric uint64_t withoutFlags; 1930b57cec5SDimitry Andric }; 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // Represents BYTE(), SHORT(), LONG(), or QUAD(). 1960b57cec5SDimitry Andric struct ByteCommand : BaseCommand { 1970b57cec5SDimitry Andric ByteCommand(Expr e, unsigned size, std::string commandString) 1980b57cec5SDimitry Andric : BaseCommand(ByteKind), commandString(commandString), expression(e), 1990b57cec5SDimitry Andric size(size) {} 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric // Keeps string representing the command. Used for -Map" is perhaps better. 2040b57cec5SDimitry Andric std::string commandString; 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric Expr expression; 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric // This is just an offset of this assignment command in the output section. 2090b57cec5SDimitry Andric unsigned offset; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric // Size of this data command. 2120b57cec5SDimitry Andric unsigned size; 2130b57cec5SDimitry Andric }; 2140b57cec5SDimitry Andric 2155ffd83dbSDimitry Andric struct InsertCommand { 2165ffd83dbSDimitry Andric OutputSection *os; 2175ffd83dbSDimitry Andric bool isAfter; 2185ffd83dbSDimitry Andric StringRef where; 2195ffd83dbSDimitry Andric }; 2205ffd83dbSDimitry Andric 2210b57cec5SDimitry Andric struct PhdrsCommand { 2220b57cec5SDimitry Andric StringRef name; 2230b57cec5SDimitry Andric unsigned type = llvm::ELF::PT_NULL; 2240b57cec5SDimitry Andric bool hasFilehdr = false; 2250b57cec5SDimitry Andric bool hasPhdrs = false; 2260b57cec5SDimitry Andric llvm::Optional<unsigned> flags; 2270b57cec5SDimitry Andric Expr lmaExpr = nullptr; 2280b57cec5SDimitry Andric }; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric class LinkerScript final { 2310b57cec5SDimitry Andric // Temporary state used in processSectionCommands() and assignAddresses() 2320b57cec5SDimitry Andric // that must be reinitialized for each call to the above functions, and must 2330b57cec5SDimitry Andric // not be used outside of the scope of a call to the above functions. 2340b57cec5SDimitry Andric struct AddressState { 2350b57cec5SDimitry Andric AddressState(); 2360b57cec5SDimitry Andric uint64_t threadBssOffset = 0; 2370b57cec5SDimitry Andric OutputSection *outSec = nullptr; 2380b57cec5SDimitry Andric MemoryRegion *memRegion = nullptr; 2390b57cec5SDimitry Andric MemoryRegion *lmaRegion = nullptr; 2400b57cec5SDimitry Andric uint64_t lmaOffset = 0; 2410b57cec5SDimitry Andric }; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric void addSymbol(SymbolAssignment *cmd); 2460b57cec5SDimitry Andric void assignSymbol(SymbolAssignment *cmd, bool inSec); 2470b57cec5SDimitry Andric void setDot(Expr e, const Twine &loc, bool inSec); 2480b57cec5SDimitry Andric void expandOutputSection(uint64_t size); 2490b57cec5SDimitry Andric void expandMemoryRegions(uint64_t size); 2500b57cec5SDimitry Andric 25185868e8aSDimitry Andric std::vector<InputSectionBase *> 2525ffd83dbSDimitry Andric computeInputSections(const InputSectionDescription *, 2535ffd83dbSDimitry Andric ArrayRef<InputSectionBase *>); 2540b57cec5SDimitry Andric 25585868e8aSDimitry Andric std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); 2560b57cec5SDimitry Andric 2575ffd83dbSDimitry Andric void discardSynthetic(OutputSection &); 2585ffd83dbSDimitry Andric 2590b57cec5SDimitry Andric std::vector<size_t> getPhdrIndices(OutputSection *sec); 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric MemoryRegion *findMemoryRegion(OutputSection *sec); 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric void switchTo(OutputSection *sec); 2640b57cec5SDimitry Andric uint64_t advance(uint64_t size, unsigned align); 2650b57cec5SDimitry Andric void output(InputSection *sec); 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric void assignOffsets(OutputSection *sec); 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // Ctx captures the local AddressState and makes it accessible 2700b57cec5SDimitry Andric // deliberately. This is needed as there are some cases where we cannot just 2710b57cec5SDimitry Andric // thread the current state through to a lambda function created by the 2720b57cec5SDimitry Andric // script parser. 2730b57cec5SDimitry Andric // This should remain a plain pointer as its lifetime is smaller than 2740b57cec5SDimitry Andric // LinkerScript. 2750b57cec5SDimitry Andric AddressState *ctx = nullptr; 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric OutputSection *aether; 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric uint64_t dot; 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric public: 2820b57cec5SDimitry Andric OutputSection *createOutputSection(StringRef name, StringRef location); 2830b57cec5SDimitry Andric OutputSection *getOrCreateOutputSection(StringRef name); 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 2860b57cec5SDimitry Andric uint64_t getDot() { return dot; } 28785868e8aSDimitry Andric void discard(InputSectionBase *s); 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric ExprValue getSymbolValue(StringRef name, const Twine &loc); 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric void addOrphanSections(); 2925ffd83dbSDimitry Andric void diagnoseOrphanHandling() const; 2930b57cec5SDimitry Andric void adjustSectionsBeforeSorting(); 2940b57cec5SDimitry Andric void adjustSectionsAfterSorting(); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric std::vector<PhdrEntry *> createPhdrs(); 2970b57cec5SDimitry Andric bool needsInterpSection(); 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric bool shouldKeep(InputSectionBase *s); 30085868e8aSDimitry Andric const Defined *assignAddresses(); 3010b57cec5SDimitry Andric void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 3020b57cec5SDimitry Andric void processSectionCommands(); 30385868e8aSDimitry Andric void processSymbolAssignments(); 3040b57cec5SDimitry Andric void declareSymbols(); 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric // Used to handle INSERT AFTER statements. 3070b57cec5SDimitry Andric void processInsertCommands(); 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // SECTIONS command list. 3100b57cec5SDimitry Andric std::vector<BaseCommand *> sectionCommands; 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric // PHDRS command list. 3130b57cec5SDimitry Andric std::vector<PhdrsCommand> phdrsCommands; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric bool hasSectionsCommand = false; 3160b57cec5SDimitry Andric bool errorOnMissingSection = false; 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric // List of section patterns specified with KEEP commands. They will 3190b57cec5SDimitry Andric // be kept even if they are unused and --gc-sections is specified. 3200b57cec5SDimitry Andric std::vector<InputSectionDescription *> keptSections; 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric // A map from memory region name to a memory region descriptor. 3230b57cec5SDimitry Andric llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric // A list of symbols referenced by the script. 3260b57cec5SDimitry Andric std::vector<llvm::StringRef> referencedSymbols; 3270b57cec5SDimitry Andric 3285ffd83dbSDimitry Andric // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 3295ffd83dbSDimitry Andric // to be reordered. 3305ffd83dbSDimitry Andric std::vector<InsertCommand> insertCommands; 3315ffd83dbSDimitry Andric 3325ffd83dbSDimitry Andric // Sections that will be warned/errored by --orphan-handling. 3335ffd83dbSDimitry Andric std::vector<const InputSectionBase *> orphanSections; 3340b57cec5SDimitry Andric }; 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric extern LinkerScript *script; 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric } // end namespace elf 3390b57cec5SDimitry Andric } // end namespace lld 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric #endif // LLD_ELF_LINKER_SCRIPT_H 342