1*0b57cec5SDimitry Andric //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric #ifndef LLD_ELF_LINKER_SCRIPT_H 10*0b57cec5SDimitry Andric #define LLD_ELF_LINKER_SCRIPT_H 11*0b57cec5SDimitry Andric 12*0b57cec5SDimitry Andric #include "Config.h" 13*0b57cec5SDimitry Andric #include "Writer.h" 14*0b57cec5SDimitry Andric #include "lld/Common/LLVM.h" 15*0b57cec5SDimitry Andric #include "lld/Common/Strings.h" 16*0b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 17*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 18*0b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 19*0b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 21*0b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 22*0b57cec5SDimitry Andric #include <cstddef> 23*0b57cec5SDimitry Andric #include <cstdint> 24*0b57cec5SDimitry Andric #include <functional> 25*0b57cec5SDimitry Andric #include <memory> 26*0b57cec5SDimitry Andric #include <vector> 27*0b57cec5SDimitry Andric 28*0b57cec5SDimitry Andric namespace lld { 29*0b57cec5SDimitry Andric namespace elf { 30*0b57cec5SDimitry Andric 31*0b57cec5SDimitry Andric class Defined; 32*0b57cec5SDimitry Andric class InputSection; 33*0b57cec5SDimitry Andric class InputSectionBase; 34*0b57cec5SDimitry Andric class OutputSection; 35*0b57cec5SDimitry Andric class SectionBase; 36*0b57cec5SDimitry Andric class Symbol; 37*0b57cec5SDimitry Andric class ThunkSection; 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric // This represents an r-value in the linker script. 40*0b57cec5SDimitry Andric struct ExprValue { 41*0b57cec5SDimitry Andric ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 42*0b57cec5SDimitry Andric const Twine &loc) 43*0b57cec5SDimitry Andric : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 46*0b57cec5SDimitry Andric 47*0b57cec5SDimitry Andric bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 48*0b57cec5SDimitry Andric uint64_t getValue() const; 49*0b57cec5SDimitry Andric uint64_t getSecAddr() const; 50*0b57cec5SDimitry Andric uint64_t getSectionOffset() const; 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric // If a value is relative to a section, it has a non-null Sec. 53*0b57cec5SDimitry Andric SectionBase *sec; 54*0b57cec5SDimitry Andric 55*0b57cec5SDimitry Andric // True if this expression is enclosed in ABSOLUTE(). 56*0b57cec5SDimitry Andric // This flag affects the return value of getValue(). 57*0b57cec5SDimitry Andric bool forceAbsolute; 58*0b57cec5SDimitry Andric 59*0b57cec5SDimitry Andric uint64_t val; 60*0b57cec5SDimitry Andric uint64_t alignment = 1; 61*0b57cec5SDimitry Andric 62*0b57cec5SDimitry Andric // Original source location. Used for error messages. 63*0b57cec5SDimitry Andric std::string loc; 64*0b57cec5SDimitry Andric }; 65*0b57cec5SDimitry Andric 66*0b57cec5SDimitry Andric // This represents an expression in the linker script. 67*0b57cec5SDimitry Andric // ScriptParser::readExpr reads an expression and returns an Expr. 68*0b57cec5SDimitry Andric // Later, we evaluate the expression by calling the function. 69*0b57cec5SDimitry Andric using Expr = std::function<ExprValue()>; 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric // This enum is used to implement linker script SECTIONS command. 72*0b57cec5SDimitry Andric // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 73*0b57cec5SDimitry Andric enum SectionsCommandKind { 74*0b57cec5SDimitry Andric AssignmentKind, // . = expr or <sym> = expr 75*0b57cec5SDimitry Andric OutputSectionKind, 76*0b57cec5SDimitry Andric InputSectionKind, 77*0b57cec5SDimitry Andric ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 78*0b57cec5SDimitry Andric }; 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric struct BaseCommand { 81*0b57cec5SDimitry Andric BaseCommand(int k) : kind(k) {} 82*0b57cec5SDimitry Andric int kind; 83*0b57cec5SDimitry Andric }; 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric // This represents ". = <expr>" or "<symbol> = <expr>". 86*0b57cec5SDimitry Andric struct SymbolAssignment : BaseCommand { 87*0b57cec5SDimitry Andric SymbolAssignment(StringRef name, Expr e, std::string loc) 88*0b57cec5SDimitry Andric : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { 91*0b57cec5SDimitry Andric return c->kind == AssignmentKind; 92*0b57cec5SDimitry Andric } 93*0b57cec5SDimitry Andric 94*0b57cec5SDimitry Andric // The LHS of an expression. Name is either a symbol name or ".". 95*0b57cec5SDimitry Andric StringRef name; 96*0b57cec5SDimitry Andric Defined *sym = nullptr; 97*0b57cec5SDimitry Andric 98*0b57cec5SDimitry Andric // The RHS of an expression. 99*0b57cec5SDimitry Andric Expr expression; 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 102*0b57cec5SDimitry Andric bool provide = false; 103*0b57cec5SDimitry Andric bool hidden = false; 104*0b57cec5SDimitry Andric 105*0b57cec5SDimitry Andric // Holds file name and line number for error reporting. 106*0b57cec5SDimitry Andric std::string location; 107*0b57cec5SDimitry Andric 108*0b57cec5SDimitry Andric // A string representation of this command. We use this for -Map. 109*0b57cec5SDimitry Andric std::string commandString; 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric // Address of this assignment command. 112*0b57cec5SDimitry Andric unsigned addr; 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric // Size of this assignment command. This is usually 0, but if 115*0b57cec5SDimitry Andric // you move '.' this may be greater than 0. 116*0b57cec5SDimitry Andric unsigned size; 117*0b57cec5SDimitry Andric }; 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric // Linker scripts allow additional constraints to be put on ouput sections. 120*0b57cec5SDimitry Andric // If an output section is marked as ONLY_IF_RO, the section is created 121*0b57cec5SDimitry Andric // only if its input sections are read-only. Likewise, an output section 122*0b57cec5SDimitry Andric // with ONLY_IF_RW is created if all input sections are RW. 123*0b57cec5SDimitry Andric enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric // This struct is used to represent the location and size of regions of 126*0b57cec5SDimitry Andric // target memory. Instances of the struct are created by parsing the 127*0b57cec5SDimitry Andric // MEMORY command. 128*0b57cec5SDimitry Andric struct MemoryRegion { 129*0b57cec5SDimitry Andric MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags, 130*0b57cec5SDimitry Andric uint32_t negFlags) 131*0b57cec5SDimitry Andric : name(name), origin(origin), length(length), flags(flags), 132*0b57cec5SDimitry Andric negFlags(negFlags) {} 133*0b57cec5SDimitry Andric 134*0b57cec5SDimitry Andric std::string name; 135*0b57cec5SDimitry Andric uint64_t origin; 136*0b57cec5SDimitry Andric uint64_t length; 137*0b57cec5SDimitry Andric uint32_t flags; 138*0b57cec5SDimitry Andric uint32_t negFlags; 139*0b57cec5SDimitry Andric uint64_t curPos = 0; 140*0b57cec5SDimitry Andric }; 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric // This struct represents one section match pattern in SECTIONS() command. 143*0b57cec5SDimitry Andric // It can optionally have negative match pattern for EXCLUDED_FILE command. 144*0b57cec5SDimitry Andric // Also it may be surrounded with SORT() command, so contains sorting rules. 145*0b57cec5SDimitry Andric struct SectionPattern { 146*0b57cec5SDimitry Andric SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 147*0b57cec5SDimitry Andric : excludedFilePat(pat1), sectionPat(pat2), 148*0b57cec5SDimitry Andric sortOuter(SortSectionPolicy::Default), 149*0b57cec5SDimitry Andric sortInner(SortSectionPolicy::Default) {} 150*0b57cec5SDimitry Andric 151*0b57cec5SDimitry Andric StringMatcher excludedFilePat; 152*0b57cec5SDimitry Andric StringMatcher sectionPat; 153*0b57cec5SDimitry Andric SortSectionPolicy sortOuter; 154*0b57cec5SDimitry Andric SortSectionPolicy sortInner; 155*0b57cec5SDimitry Andric }; 156*0b57cec5SDimitry Andric 157*0b57cec5SDimitry Andric struct InputSectionDescription : BaseCommand { 158*0b57cec5SDimitry Andric InputSectionDescription(StringRef filePattern) 159*0b57cec5SDimitry Andric : BaseCommand(InputSectionKind), filePat(filePattern) {} 160*0b57cec5SDimitry Andric 161*0b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { 162*0b57cec5SDimitry Andric return c->kind == InputSectionKind; 163*0b57cec5SDimitry Andric } 164*0b57cec5SDimitry Andric 165*0b57cec5SDimitry Andric StringMatcher filePat; 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric // Input sections that matches at least one of SectionPatterns 168*0b57cec5SDimitry Andric // will be associated with this InputSectionDescription. 169*0b57cec5SDimitry Andric std::vector<SectionPattern> sectionPatterns; 170*0b57cec5SDimitry Andric 171*0b57cec5SDimitry Andric std::vector<InputSection *> sections; 172*0b57cec5SDimitry Andric 173*0b57cec5SDimitry Andric // Temporary record of synthetic ThunkSection instances and the pass that 174*0b57cec5SDimitry Andric // they were created in. This is used to insert newly created ThunkSections 175*0b57cec5SDimitry Andric // into Sections at the end of a createThunks() pass. 176*0b57cec5SDimitry Andric std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 177*0b57cec5SDimitry Andric }; 178*0b57cec5SDimitry Andric 179*0b57cec5SDimitry Andric // Represents BYTE(), SHORT(), LONG(), or QUAD(). 180*0b57cec5SDimitry Andric struct ByteCommand : BaseCommand { 181*0b57cec5SDimitry Andric ByteCommand(Expr e, unsigned size, std::string commandString) 182*0b57cec5SDimitry Andric : BaseCommand(ByteKind), commandString(commandString), expression(e), 183*0b57cec5SDimitry Andric size(size) {} 184*0b57cec5SDimitry Andric 185*0b57cec5SDimitry Andric static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric // Keeps string representing the command. Used for -Map" is perhaps better. 188*0b57cec5SDimitry Andric std::string commandString; 189*0b57cec5SDimitry Andric 190*0b57cec5SDimitry Andric Expr expression; 191*0b57cec5SDimitry Andric 192*0b57cec5SDimitry Andric // This is just an offset of this assignment command in the output section. 193*0b57cec5SDimitry Andric unsigned offset; 194*0b57cec5SDimitry Andric 195*0b57cec5SDimitry Andric // Size of this data command. 196*0b57cec5SDimitry Andric unsigned size; 197*0b57cec5SDimitry Andric }; 198*0b57cec5SDimitry Andric 199*0b57cec5SDimitry Andric struct PhdrsCommand { 200*0b57cec5SDimitry Andric StringRef name; 201*0b57cec5SDimitry Andric unsigned type = llvm::ELF::PT_NULL; 202*0b57cec5SDimitry Andric bool hasFilehdr = false; 203*0b57cec5SDimitry Andric bool hasPhdrs = false; 204*0b57cec5SDimitry Andric llvm::Optional<unsigned> flags; 205*0b57cec5SDimitry Andric Expr lmaExpr = nullptr; 206*0b57cec5SDimitry Andric }; 207*0b57cec5SDimitry Andric 208*0b57cec5SDimitry Andric class LinkerScript final { 209*0b57cec5SDimitry Andric // Temporary state used in processSectionCommands() and assignAddresses() 210*0b57cec5SDimitry Andric // that must be reinitialized for each call to the above functions, and must 211*0b57cec5SDimitry Andric // not be used outside of the scope of a call to the above functions. 212*0b57cec5SDimitry Andric struct AddressState { 213*0b57cec5SDimitry Andric AddressState(); 214*0b57cec5SDimitry Andric uint64_t threadBssOffset = 0; 215*0b57cec5SDimitry Andric OutputSection *outSec = nullptr; 216*0b57cec5SDimitry Andric MemoryRegion *memRegion = nullptr; 217*0b57cec5SDimitry Andric MemoryRegion *lmaRegion = nullptr; 218*0b57cec5SDimitry Andric uint64_t lmaOffset = 0; 219*0b57cec5SDimitry Andric }; 220*0b57cec5SDimitry Andric 221*0b57cec5SDimitry Andric llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 222*0b57cec5SDimitry Andric 223*0b57cec5SDimitry Andric void addSymbol(SymbolAssignment *cmd); 224*0b57cec5SDimitry Andric void assignSymbol(SymbolAssignment *cmd, bool inSec); 225*0b57cec5SDimitry Andric void setDot(Expr e, const Twine &loc, bool inSec); 226*0b57cec5SDimitry Andric void expandOutputSection(uint64_t size); 227*0b57cec5SDimitry Andric void expandMemoryRegions(uint64_t size); 228*0b57cec5SDimitry Andric 229*0b57cec5SDimitry Andric std::vector<InputSection *> 230*0b57cec5SDimitry Andric computeInputSections(const InputSectionDescription *); 231*0b57cec5SDimitry Andric 232*0b57cec5SDimitry Andric std::vector<InputSection *> createInputSectionList(OutputSection &cmd); 233*0b57cec5SDimitry Andric 234*0b57cec5SDimitry Andric std::vector<size_t> getPhdrIndices(OutputSection *sec); 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric MemoryRegion *findMemoryRegion(OutputSection *sec); 237*0b57cec5SDimitry Andric 238*0b57cec5SDimitry Andric void switchTo(OutputSection *sec); 239*0b57cec5SDimitry Andric uint64_t advance(uint64_t size, unsigned align); 240*0b57cec5SDimitry Andric void output(InputSection *sec); 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric void assignOffsets(OutputSection *sec); 243*0b57cec5SDimitry Andric 244*0b57cec5SDimitry Andric // Ctx captures the local AddressState and makes it accessible 245*0b57cec5SDimitry Andric // deliberately. This is needed as there are some cases where we cannot just 246*0b57cec5SDimitry Andric // thread the current state through to a lambda function created by the 247*0b57cec5SDimitry Andric // script parser. 248*0b57cec5SDimitry Andric // This should remain a plain pointer as its lifetime is smaller than 249*0b57cec5SDimitry Andric // LinkerScript. 250*0b57cec5SDimitry Andric AddressState *ctx = nullptr; 251*0b57cec5SDimitry Andric 252*0b57cec5SDimitry Andric OutputSection *aether; 253*0b57cec5SDimitry Andric 254*0b57cec5SDimitry Andric uint64_t dot; 255*0b57cec5SDimitry Andric 256*0b57cec5SDimitry Andric public: 257*0b57cec5SDimitry Andric OutputSection *createOutputSection(StringRef name, StringRef location); 258*0b57cec5SDimitry Andric OutputSection *getOrCreateOutputSection(StringRef name); 259*0b57cec5SDimitry Andric 260*0b57cec5SDimitry Andric bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 261*0b57cec5SDimitry Andric uint64_t getDot() { return dot; } 262*0b57cec5SDimitry Andric void discard(ArrayRef<InputSection *> v); 263*0b57cec5SDimitry Andric 264*0b57cec5SDimitry Andric ExprValue getSymbolValue(StringRef name, const Twine &loc); 265*0b57cec5SDimitry Andric 266*0b57cec5SDimitry Andric void addOrphanSections(); 267*0b57cec5SDimitry Andric void adjustSectionsBeforeSorting(); 268*0b57cec5SDimitry Andric void adjustSectionsAfterSorting(); 269*0b57cec5SDimitry Andric 270*0b57cec5SDimitry Andric std::vector<PhdrEntry *> createPhdrs(); 271*0b57cec5SDimitry Andric bool needsInterpSection(); 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric bool shouldKeep(InputSectionBase *s); 274*0b57cec5SDimitry Andric void assignAddresses(); 275*0b57cec5SDimitry Andric void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 276*0b57cec5SDimitry Andric void processSectionCommands(); 277*0b57cec5SDimitry Andric void declareSymbols(); 278*0b57cec5SDimitry Andric 279*0b57cec5SDimitry Andric // Used to handle INSERT AFTER statements. 280*0b57cec5SDimitry Andric void processInsertCommands(); 281*0b57cec5SDimitry Andric 282*0b57cec5SDimitry Andric // SECTIONS command list. 283*0b57cec5SDimitry Andric std::vector<BaseCommand *> sectionCommands; 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric // PHDRS command list. 286*0b57cec5SDimitry Andric std::vector<PhdrsCommand> phdrsCommands; 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric bool hasSectionsCommand = false; 289*0b57cec5SDimitry Andric bool errorOnMissingSection = false; 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric // List of section patterns specified with KEEP commands. They will 292*0b57cec5SDimitry Andric // be kept even if they are unused and --gc-sections is specified. 293*0b57cec5SDimitry Andric std::vector<InputSectionDescription *> keptSections; 294*0b57cec5SDimitry Andric 295*0b57cec5SDimitry Andric // A map from memory region name to a memory region descriptor. 296*0b57cec5SDimitry Andric llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 297*0b57cec5SDimitry Andric 298*0b57cec5SDimitry Andric // A list of symbols referenced by the script. 299*0b57cec5SDimitry Andric std::vector<llvm::StringRef> referencedSymbols; 300*0b57cec5SDimitry Andric 301*0b57cec5SDimitry Andric // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need 302*0b57cec5SDimitry Andric // to be inserted into SECTIONS commands list. 303*0b57cec5SDimitry Andric llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands; 304*0b57cec5SDimitry Andric llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands; 305*0b57cec5SDimitry Andric }; 306*0b57cec5SDimitry Andric 307*0b57cec5SDimitry Andric extern LinkerScript *script; 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric } // end namespace elf 310*0b57cec5SDimitry Andric } // end namespace lld 311*0b57cec5SDimitry Andric 312*0b57cec5SDimitry Andric #endif // LLD_ELF_LINKER_SCRIPT_H 313