1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "Writer.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cstddef> 23 #include <cstdint> 24 #include <functional> 25 #include <memory> 26 #include <vector> 27 28 namespace lld { 29 namespace elf { 30 31 class Defined; 32 class InputSection; 33 class InputSectionBase; 34 class OutputSection; 35 class SectionBase; 36 class Symbol; 37 class ThunkSection; 38 39 // This represents an r-value in the linker script. 40 struct ExprValue { 41 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 42 const Twine &loc) 43 : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 44 45 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 46 47 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 48 uint64_t getValue() const; 49 uint64_t getSecAddr() const; 50 uint64_t getSectionOffset() const; 51 52 // If a value is relative to a section, it has a non-null Sec. 53 SectionBase *sec; 54 55 // True if this expression is enclosed in ABSOLUTE(). 56 // This flag affects the return value of getValue(). 57 bool forceAbsolute; 58 59 uint64_t val; 60 uint64_t alignment = 1; 61 62 // The original st_type if the expression represents a symbol. Any operation 63 // resets type to STT_NOTYPE. 64 uint8_t type = llvm::ELF::STT_NOTYPE; 65 66 // Original source location. Used for error messages. 67 std::string loc; 68 }; 69 70 // This represents an expression in the linker script. 71 // ScriptParser::readExpr reads an expression and returns an Expr. 72 // Later, we evaluate the expression by calling the function. 73 using Expr = std::function<ExprValue()>; 74 75 // This enum is used to implement linker script SECTIONS command. 76 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 77 enum SectionsCommandKind { 78 AssignmentKind, // . = expr or <sym> = expr 79 OutputSectionKind, 80 InputSectionKind, 81 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 82 }; 83 84 struct BaseCommand { 85 BaseCommand(int k) : kind(k) {} 86 int kind; 87 }; 88 89 // This represents ". = <expr>" or "<symbol> = <expr>". 90 struct SymbolAssignment : BaseCommand { 91 SymbolAssignment(StringRef name, Expr e, std::string loc) 92 : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 93 94 static bool classof(const BaseCommand *c) { 95 return c->kind == AssignmentKind; 96 } 97 98 // The LHS of an expression. Name is either a symbol name or ".". 99 StringRef name; 100 Defined *sym = nullptr; 101 102 // The RHS of an expression. 103 Expr expression; 104 105 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 106 bool provide = false; 107 bool hidden = false; 108 109 // Holds file name and line number for error reporting. 110 std::string location; 111 112 // A string representation of this command. We use this for -Map. 113 std::string commandString; 114 115 // Address of this assignment command. 116 uint64_t addr; 117 118 // Size of this assignment command. This is usually 0, but if 119 // you move '.' this may be greater than 0. 120 uint64_t size; 121 }; 122 123 // Linker scripts allow additional constraints to be put on output sections. 124 // If an output section is marked as ONLY_IF_RO, the section is created 125 // only if its input sections are read-only. Likewise, an output section 126 // with ONLY_IF_RW is created if all input sections are RW. 127 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 128 129 // This struct is used to represent the location and size of regions of 130 // target memory. Instances of the struct are created by parsing the 131 // MEMORY command. 132 struct MemoryRegion { 133 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 134 uint32_t negFlags) 135 : name(std::string(name)), origin(origin), length(length), flags(flags), 136 negFlags(negFlags) {} 137 138 std::string name; 139 Expr origin; 140 Expr length; 141 uint32_t flags; 142 uint32_t negFlags; 143 uint64_t curPos = 0; 144 }; 145 146 // This struct represents one section match pattern in SECTIONS() command. 147 // It can optionally have negative match pattern for EXCLUDED_FILE command. 148 // Also it may be surrounded with SORT() command, so contains sorting rules. 149 struct SectionPattern { 150 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 151 : excludedFilePat(pat1), sectionPat(pat2), 152 sortOuter(SortSectionPolicy::Default), 153 sortInner(SortSectionPolicy::Default) {} 154 155 StringMatcher excludedFilePat; 156 StringMatcher sectionPat; 157 SortSectionPolicy sortOuter; 158 SortSectionPolicy sortInner; 159 }; 160 161 struct InputSectionDescription : BaseCommand { 162 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 163 uint64_t withoutFlags = 0) 164 : BaseCommand(InputSectionKind), filePat(filePattern), 165 withFlags(withFlags), withoutFlags(withoutFlags) {} 166 167 static bool classof(const BaseCommand *c) { 168 return c->kind == InputSectionKind; 169 } 170 171 SingleStringMatcher filePat; 172 173 // Input sections that matches at least one of SectionPatterns 174 // will be associated with this InputSectionDescription. 175 std::vector<SectionPattern> sectionPatterns; 176 177 // Includes InputSections and MergeInputSections. Used temporarily during 178 // assignment of input sections to output sections. 179 std::vector<InputSectionBase *> sectionBases; 180 181 // Used after the finalizeInputSections() pass. MergeInputSections have been 182 // merged into MergeSyntheticSections. 183 std::vector<InputSection *> sections; 184 185 // Temporary record of synthetic ThunkSection instances and the pass that 186 // they were created in. This is used to insert newly created ThunkSections 187 // into Sections at the end of a createThunks() pass. 188 std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 189 190 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 191 uint64_t withFlags; 192 uint64_t withoutFlags; 193 }; 194 195 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 196 struct ByteCommand : BaseCommand { 197 ByteCommand(Expr e, unsigned size, std::string commandString) 198 : BaseCommand(ByteKind), commandString(commandString), expression(e), 199 size(size) {} 200 201 static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 202 203 // Keeps string representing the command. Used for -Map" is perhaps better. 204 std::string commandString; 205 206 Expr expression; 207 208 // This is just an offset of this assignment command in the output section. 209 unsigned offset; 210 211 // Size of this data command. 212 unsigned size; 213 }; 214 215 struct InsertCommand { 216 OutputSection *os; 217 bool isAfter; 218 StringRef where; 219 }; 220 221 struct PhdrsCommand { 222 StringRef name; 223 unsigned type = llvm::ELF::PT_NULL; 224 bool hasFilehdr = false; 225 bool hasPhdrs = false; 226 llvm::Optional<unsigned> flags; 227 Expr lmaExpr = nullptr; 228 }; 229 230 class LinkerScript final { 231 // Temporary state used in processSectionCommands() and assignAddresses() 232 // that must be reinitialized for each call to the above functions, and must 233 // not be used outside of the scope of a call to the above functions. 234 struct AddressState { 235 AddressState(); 236 uint64_t threadBssOffset = 0; 237 OutputSection *outSec = nullptr; 238 MemoryRegion *memRegion = nullptr; 239 MemoryRegion *lmaRegion = nullptr; 240 uint64_t lmaOffset = 0; 241 }; 242 243 llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 244 245 void addSymbol(SymbolAssignment *cmd); 246 void assignSymbol(SymbolAssignment *cmd, bool inSec); 247 void setDot(Expr e, const Twine &loc, bool inSec); 248 void expandOutputSection(uint64_t size); 249 void expandMemoryRegions(uint64_t size); 250 251 std::vector<InputSectionBase *> 252 computeInputSections(const InputSectionDescription *, 253 ArrayRef<InputSectionBase *>); 254 255 std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); 256 257 void discardSynthetic(OutputSection &); 258 259 std::vector<size_t> getPhdrIndices(OutputSection *sec); 260 261 MemoryRegion *findMemoryRegion(OutputSection *sec); 262 263 void switchTo(OutputSection *sec); 264 uint64_t advance(uint64_t size, unsigned align); 265 void output(InputSection *sec); 266 267 void assignOffsets(OutputSection *sec); 268 269 // Ctx captures the local AddressState and makes it accessible 270 // deliberately. This is needed as there are some cases where we cannot just 271 // thread the current state through to a lambda function created by the 272 // script parser. 273 // This should remain a plain pointer as its lifetime is smaller than 274 // LinkerScript. 275 AddressState *ctx = nullptr; 276 277 OutputSection *aether; 278 279 uint64_t dot; 280 281 public: 282 OutputSection *createOutputSection(StringRef name, StringRef location); 283 OutputSection *getOrCreateOutputSection(StringRef name); 284 285 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 286 uint64_t getDot() { return dot; } 287 void discard(InputSectionBase *s); 288 289 ExprValue getSymbolValue(StringRef name, const Twine &loc); 290 291 void addOrphanSections(); 292 void diagnoseOrphanHandling() const; 293 void adjustSectionsBeforeSorting(); 294 void adjustSectionsAfterSorting(); 295 296 std::vector<PhdrEntry *> createPhdrs(); 297 bool needsInterpSection(); 298 299 bool shouldKeep(InputSectionBase *s); 300 const Defined *assignAddresses(); 301 void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 302 void processSectionCommands(); 303 void processSymbolAssignments(); 304 void declareSymbols(); 305 306 // Used to handle INSERT AFTER statements. 307 void processInsertCommands(); 308 309 // SECTIONS command list. 310 std::vector<BaseCommand *> sectionCommands; 311 312 // PHDRS command list. 313 std::vector<PhdrsCommand> phdrsCommands; 314 315 bool hasSectionsCommand = false; 316 bool errorOnMissingSection = false; 317 318 // List of section patterns specified with KEEP commands. They will 319 // be kept even if they are unused and --gc-sections is specified. 320 std::vector<InputSectionDescription *> keptSections; 321 322 // A map from memory region name to a memory region descriptor. 323 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 324 325 // A list of symbols referenced by the script. 326 std::vector<llvm::StringRef> referencedSymbols; 327 328 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 329 // to be reordered. 330 std::vector<InsertCommand> insertCommands; 331 332 // Sections that will be warned/errored by --orphan-handling. 333 std::vector<const InputSectionBase *> orphanSections; 334 }; 335 336 extern LinkerScript *script; 337 338 } // end namespace elf 339 } // end namespace lld 340 341 #endif // LLD_ELF_LINKER_SCRIPT_H 342