1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "Writer.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cstddef> 23 #include <cstdint> 24 #include <functional> 25 #include <memory> 26 #include <vector> 27 28 namespace lld { 29 namespace elf { 30 31 class Defined; 32 class InputSection; 33 class InputSectionBase; 34 class OutputSection; 35 class SectionBase; 36 class Symbol; 37 class ThunkSection; 38 39 // This represents an r-value in the linker script. 40 struct ExprValue { 41 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 42 const Twine &loc) 43 : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 44 45 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 46 47 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 48 uint64_t getValue() const; 49 uint64_t getSecAddr() const; 50 uint64_t getSectionOffset() const; 51 52 // If a value is relative to a section, it has a non-null Sec. 53 SectionBase *sec; 54 55 // True if this expression is enclosed in ABSOLUTE(). 56 // This flag affects the return value of getValue(). 57 bool forceAbsolute; 58 59 uint64_t val; 60 uint64_t alignment = 1; 61 62 // Original source location. Used for error messages. 63 std::string loc; 64 }; 65 66 // This represents an expression in the linker script. 67 // ScriptParser::readExpr reads an expression and returns an Expr. 68 // Later, we evaluate the expression by calling the function. 69 using Expr = std::function<ExprValue()>; 70 71 // This enum is used to implement linker script SECTIONS command. 72 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 73 enum SectionsCommandKind { 74 AssignmentKind, // . = expr or <sym> = expr 75 OutputSectionKind, 76 InputSectionKind, 77 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 78 }; 79 80 struct BaseCommand { 81 BaseCommand(int k) : kind(k) {} 82 int kind; 83 }; 84 85 // This represents ". = <expr>" or "<symbol> = <expr>". 86 struct SymbolAssignment : BaseCommand { 87 SymbolAssignment(StringRef name, Expr e, std::string loc) 88 : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 89 90 static bool classof(const BaseCommand *c) { 91 return c->kind == AssignmentKind; 92 } 93 94 // The LHS of an expression. Name is either a symbol name or ".". 95 StringRef name; 96 Defined *sym = nullptr; 97 98 // The RHS of an expression. 99 Expr expression; 100 101 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 102 bool provide = false; 103 bool hidden = false; 104 105 // Holds file name and line number for error reporting. 106 std::string location; 107 108 // A string representation of this command. We use this for -Map. 109 std::string commandString; 110 111 // Address of this assignment command. 112 unsigned addr; 113 114 // Size of this assignment command. This is usually 0, but if 115 // you move '.' this may be greater than 0. 116 unsigned size; 117 }; 118 119 // Linker scripts allow additional constraints to be put on ouput sections. 120 // If an output section is marked as ONLY_IF_RO, the section is created 121 // only if its input sections are read-only. Likewise, an output section 122 // with ONLY_IF_RW is created if all input sections are RW. 123 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 124 125 // This struct is used to represent the location and size of regions of 126 // target memory. Instances of the struct are created by parsing the 127 // MEMORY command. 128 struct MemoryRegion { 129 MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags, 130 uint32_t negFlags) 131 : name(name), origin(origin), length(length), flags(flags), 132 negFlags(negFlags) {} 133 134 std::string name; 135 uint64_t origin; 136 uint64_t length; 137 uint32_t flags; 138 uint32_t negFlags; 139 uint64_t curPos = 0; 140 }; 141 142 // This struct represents one section match pattern in SECTIONS() command. 143 // It can optionally have negative match pattern for EXCLUDED_FILE command. 144 // Also it may be surrounded with SORT() command, so contains sorting rules. 145 struct SectionPattern { 146 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 147 : excludedFilePat(pat1), sectionPat(pat2), 148 sortOuter(SortSectionPolicy::Default), 149 sortInner(SortSectionPolicy::Default) {} 150 151 StringMatcher excludedFilePat; 152 StringMatcher sectionPat; 153 SortSectionPolicy sortOuter; 154 SortSectionPolicy sortInner; 155 }; 156 157 struct InputSectionDescription : BaseCommand { 158 InputSectionDescription(StringRef filePattern) 159 : BaseCommand(InputSectionKind), filePat(filePattern) {} 160 161 static bool classof(const BaseCommand *c) { 162 return c->kind == InputSectionKind; 163 } 164 165 StringMatcher filePat; 166 167 // Input sections that matches at least one of SectionPatterns 168 // will be associated with this InputSectionDescription. 169 std::vector<SectionPattern> sectionPatterns; 170 171 // Includes InputSections and MergeInputSections. Used temporarily during 172 // assignment of input sections to output sections. 173 std::vector<InputSectionBase *> sectionBases; 174 175 // Used after the finalizeInputSections() pass. MergeInputSections have been 176 // merged into MergeSyntheticSections. 177 std::vector<InputSection *> sections; 178 179 // Temporary record of synthetic ThunkSection instances and the pass that 180 // they were created in. This is used to insert newly created ThunkSections 181 // into Sections at the end of a createThunks() pass. 182 std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 183 }; 184 185 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 186 struct ByteCommand : BaseCommand { 187 ByteCommand(Expr e, unsigned size, std::string commandString) 188 : BaseCommand(ByteKind), commandString(commandString), expression(e), 189 size(size) {} 190 191 static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 192 193 // Keeps string representing the command. Used for -Map" is perhaps better. 194 std::string commandString; 195 196 Expr expression; 197 198 // This is just an offset of this assignment command in the output section. 199 unsigned offset; 200 201 // Size of this data command. 202 unsigned size; 203 }; 204 205 struct PhdrsCommand { 206 StringRef name; 207 unsigned type = llvm::ELF::PT_NULL; 208 bool hasFilehdr = false; 209 bool hasPhdrs = false; 210 llvm::Optional<unsigned> flags; 211 Expr lmaExpr = nullptr; 212 }; 213 214 class LinkerScript final { 215 // Temporary state used in processSectionCommands() and assignAddresses() 216 // that must be reinitialized for each call to the above functions, and must 217 // not be used outside of the scope of a call to the above functions. 218 struct AddressState { 219 AddressState(); 220 uint64_t threadBssOffset = 0; 221 OutputSection *outSec = nullptr; 222 MemoryRegion *memRegion = nullptr; 223 MemoryRegion *lmaRegion = nullptr; 224 uint64_t lmaOffset = 0; 225 }; 226 227 llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 228 229 void addSymbol(SymbolAssignment *cmd); 230 void assignSymbol(SymbolAssignment *cmd, bool inSec); 231 void setDot(Expr e, const Twine &loc, bool inSec); 232 void expandOutputSection(uint64_t size); 233 void expandMemoryRegions(uint64_t size); 234 235 std::vector<InputSectionBase *> 236 computeInputSections(const InputSectionDescription *); 237 238 std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); 239 240 std::vector<size_t> getPhdrIndices(OutputSection *sec); 241 242 MemoryRegion *findMemoryRegion(OutputSection *sec); 243 244 void switchTo(OutputSection *sec); 245 uint64_t advance(uint64_t size, unsigned align); 246 void output(InputSection *sec); 247 248 void assignOffsets(OutputSection *sec); 249 250 // Ctx captures the local AddressState and makes it accessible 251 // deliberately. This is needed as there are some cases where we cannot just 252 // thread the current state through to a lambda function created by the 253 // script parser. 254 // This should remain a plain pointer as its lifetime is smaller than 255 // LinkerScript. 256 AddressState *ctx = nullptr; 257 258 OutputSection *aether; 259 260 uint64_t dot; 261 262 public: 263 OutputSection *createOutputSection(StringRef name, StringRef location); 264 OutputSection *getOrCreateOutputSection(StringRef name); 265 266 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 267 uint64_t getDot() { return dot; } 268 void discard(InputSectionBase *s); 269 270 ExprValue getSymbolValue(StringRef name, const Twine &loc); 271 272 void addOrphanSections(); 273 void adjustSectionsBeforeSorting(); 274 void adjustSectionsAfterSorting(); 275 276 std::vector<PhdrEntry *> createPhdrs(); 277 bool needsInterpSection(); 278 279 bool shouldKeep(InputSectionBase *s); 280 const Defined *assignAddresses(); 281 void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 282 void processSectionCommands(); 283 void processSymbolAssignments(); 284 void declareSymbols(); 285 286 // Used to handle INSERT AFTER statements. 287 void processInsertCommands(); 288 289 // SECTIONS command list. 290 std::vector<BaseCommand *> sectionCommands; 291 292 // PHDRS command list. 293 std::vector<PhdrsCommand> phdrsCommands; 294 295 bool hasSectionsCommand = false; 296 bool errorOnMissingSection = false; 297 298 // List of section patterns specified with KEEP commands. They will 299 // be kept even if they are unused and --gc-sections is specified. 300 std::vector<InputSectionDescription *> keptSections; 301 302 // A map from memory region name to a memory region descriptor. 303 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 304 305 // A list of symbols referenced by the script. 306 std::vector<llvm::StringRef> referencedSymbols; 307 308 // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need 309 // to be inserted into SECTIONS commands list. 310 llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands; 311 llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands; 312 }; 313 314 extern LinkerScript *script; 315 316 } // end namespace elf 317 } // end namespace lld 318 319 #endif // LLD_ELF_LINKER_SCRIPT_H 320