1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "InputSection.h" 14 #include "Writer.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Compiler.h" 23 #include <cstddef> 24 #include <cstdint> 25 #include <functional> 26 #include <memory> 27 28 namespace lld::elf { 29 30 class Defined; 31 class InputFile; 32 class InputSection; 33 class InputSectionBase; 34 class OutputSection; 35 class SectionBase; 36 class ThunkSection; 37 struct OutputDesc; 38 39 // This represents an r-value in the linker script. 40 struct ExprValue { 41 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 42 const Twine &loc) 43 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} 44 45 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 46 47 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 48 uint64_t getValue() const; 49 uint64_t getSecAddr() const; 50 uint64_t getSectionOffset() const; 51 52 // If a value is relative to a section, it has a non-null Sec. 53 SectionBase *sec; 54 55 uint64_t val; 56 uint64_t alignment = 1; 57 58 // The original st_type if the expression represents a symbol. Any operation 59 // resets type to STT_NOTYPE. 60 uint8_t type = llvm::ELF::STT_NOTYPE; 61 62 // True if this expression is enclosed in ABSOLUTE(). 63 // This flag affects the return value of getValue(). 64 bool forceAbsolute; 65 66 // Original source location. Used for error messages. 67 std::string loc; 68 }; 69 70 // This represents an expression in the linker script. 71 // ScriptParser::readExpr reads an expression and returns an Expr. 72 // Later, we evaluate the expression by calling the function. 73 using Expr = std::function<ExprValue()>; 74 75 // This enum is used to implement linker script SECTIONS command. 76 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 77 enum SectionsCommandKind { 78 AssignmentKind, // . = expr or <sym> = expr 79 OutputSectionKind, 80 InputSectionKind, 81 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 82 }; 83 84 struct SectionCommand { 85 SectionCommand(int k) : kind(k) {} 86 int kind; 87 }; 88 89 // This represents ". = <expr>" or "<symbol> = <expr>". 90 struct SymbolAssignment : SectionCommand { 91 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) 92 : SectionCommand(AssignmentKind), name(name), expression(e), 93 symOrder(symOrder), location(loc) {} 94 95 static bool classof(const SectionCommand *c) { 96 return c->kind == AssignmentKind; 97 } 98 99 // The LHS of an expression. Name is either a symbol name or ".". 100 StringRef name; 101 Defined *sym = nullptr; 102 103 // The RHS of an expression. 104 Expr expression; 105 106 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 107 bool provide = false; 108 bool hidden = false; 109 110 // This assignment references DATA_SEGMENT_RELRO_END. 111 bool dataSegmentRelroEnd = false; 112 113 unsigned symOrder; 114 115 // Holds file name and line number for error reporting. 116 std::string location; 117 118 // A string representation of this command. We use this for -Map. 119 std::string commandString; 120 121 // Address of this assignment command. 122 uint64_t addr; 123 124 // Size of this assignment command. This is usually 0, but if 125 // you move '.' this may be greater than 0. 126 uint64_t size; 127 }; 128 129 // Linker scripts allow additional constraints to be put on output sections. 130 // If an output section is marked as ONLY_IF_RO, the section is created 131 // only if its input sections are read-only. Likewise, an output section 132 // with ONLY_IF_RW is created if all input sections are RW. 133 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 134 135 // This struct is used to represent the location and size of regions of 136 // target memory. Instances of the struct are created by parsing the 137 // MEMORY command. 138 struct MemoryRegion { 139 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 140 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) 141 : name(std::string(name)), origin(origin), length(length), flags(flags), 142 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} 143 144 std::string name; 145 Expr origin; 146 Expr length; 147 // A section can be assigned to the region if any of these ELF section flags 148 // are set... 149 uint32_t flags; 150 // ... or any of these flags are not set. 151 // For example, the memory region attribute "r" maps to SHF_WRITE. 152 uint32_t invFlags; 153 // A section cannot be assigned to the region if any of these ELF section 154 // flags are set... 155 uint32_t negFlags; 156 // ... or any of these flags are not set. 157 // For example, the memory region attribute "!r" maps to SHF_WRITE. 158 uint32_t negInvFlags; 159 uint64_t curPos = 0; 160 161 uint64_t getOrigin() const { return origin().getValue(); } 162 uint64_t getLength() const { return length().getValue(); } 163 164 bool compatibleWith(uint32_t secFlags) const { 165 if ((secFlags & negFlags) || (~secFlags & negInvFlags)) 166 return false; 167 return (secFlags & flags) || (~secFlags & invFlags); 168 } 169 }; 170 171 // This struct represents one section match pattern in SECTIONS() command. 172 // It can optionally have negative match pattern for EXCLUDED_FILE command. 173 // Also it may be surrounded with SORT() command, so contains sorting rules. 174 class SectionPattern { 175 StringMatcher excludedFilePat; 176 177 // Cache of the most recent input argument and result of excludesFile(). 178 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache; 179 180 public: 181 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 182 : excludedFilePat(pat1), sectionPat(pat2), 183 sortOuter(SortSectionPolicy::Default), 184 sortInner(SortSectionPolicy::Default) {} 185 186 bool excludesFile(const InputFile *file) const; 187 188 StringMatcher sectionPat; 189 SortSectionPolicy sortOuter; 190 SortSectionPolicy sortInner; 191 }; 192 193 class InputSectionDescription : public SectionCommand { 194 SingleStringMatcher filePat; 195 196 // Cache of the most recent input argument and result of matchesFile(). 197 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache; 198 199 public: 200 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 201 uint64_t withoutFlags = 0) 202 : SectionCommand(InputSectionKind), filePat(filePattern), 203 withFlags(withFlags), withoutFlags(withoutFlags) {} 204 205 static bool classof(const SectionCommand *c) { 206 return c->kind == InputSectionKind; 207 } 208 209 bool matchesFile(const InputFile *file) const; 210 211 // Input sections that matches at least one of SectionPatterns 212 // will be associated with this InputSectionDescription. 213 SmallVector<SectionPattern, 0> sectionPatterns; 214 215 // Includes InputSections and MergeInputSections. Used temporarily during 216 // assignment of input sections to output sections. 217 SmallVector<InputSectionBase *, 0> sectionBases; 218 219 // Used after the finalizeInputSections() pass. MergeInputSections have been 220 // merged into MergeSyntheticSections. 221 SmallVector<InputSection *, 0> sections; 222 223 // Temporary record of synthetic ThunkSection instances and the pass that 224 // they were created in. This is used to insert newly created ThunkSections 225 // into Sections at the end of a createThunks() pass. 226 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; 227 228 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 229 uint64_t withFlags; 230 uint64_t withoutFlags; 231 }; 232 233 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 234 struct ByteCommand : SectionCommand { 235 ByteCommand(Expr e, unsigned size, std::string commandString) 236 : SectionCommand(ByteKind), commandString(commandString), expression(e), 237 size(size) {} 238 239 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } 240 241 // Keeps string representing the command. Used for -Map" is perhaps better. 242 std::string commandString; 243 244 Expr expression; 245 246 // This is just an offset of this assignment command in the output section. 247 unsigned offset; 248 249 // Size of this data command. 250 unsigned size; 251 }; 252 253 struct InsertCommand { 254 SmallVector<StringRef, 0> names; 255 bool isAfter; 256 StringRef where; 257 }; 258 259 // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between 260 // certain output sections. 261 struct NoCrossRefCommand { 262 SmallVector<StringRef, 0> outputSections; 263 264 // When true, this describes a NOCROSSREFS_TO command that probits references 265 // to the first output section from any of the other sections. 266 bool toFirst = false; 267 }; 268 269 struct PhdrsCommand { 270 StringRef name; 271 unsigned type = llvm::ELF::PT_NULL; 272 bool hasFilehdr = false; 273 bool hasPhdrs = false; 274 std::optional<unsigned> flags; 275 Expr lmaExpr = nullptr; 276 }; 277 278 class LinkerScript final { 279 // Temporary state used in processSectionCommands() and assignAddresses() 280 // that must be reinitialized for each call to the above functions, and must 281 // not be used outside of the scope of a call to the above functions. 282 struct AddressState { 283 AddressState(); 284 OutputSection *outSec = nullptr; 285 MemoryRegion *memRegion = nullptr; 286 MemoryRegion *lmaRegion = nullptr; 287 uint64_t lmaOffset = 0; 288 uint64_t tbssAddr = 0; 289 }; 290 291 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection; 292 293 void addSymbol(SymbolAssignment *cmd); 294 void assignSymbol(SymbolAssignment *cmd, bool inSec); 295 void setDot(Expr e, const Twine &loc, bool inSec); 296 void expandOutputSection(uint64_t size); 297 void expandMemoryRegions(uint64_t size); 298 299 SmallVector<InputSectionBase *, 0> 300 computeInputSections(const InputSectionDescription *, 301 ArrayRef<InputSectionBase *>, 302 const OutputSection &outCmd); 303 304 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); 305 306 void discardSynthetic(OutputSection &); 307 308 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); 309 310 std::pair<MemoryRegion *, MemoryRegion *> 311 findMemoryRegion(OutputSection *sec, MemoryRegion *hint); 312 313 bool assignOffsets(OutputSection *sec); 314 315 // This captures the local AddressState and makes it accessible 316 // deliberately. This is needed as there are some cases where we cannot just 317 // thread the current state through to a lambda function created by the 318 // script parser. 319 // This should remain a plain pointer as its lifetime is smaller than 320 // LinkerScript. 321 AddressState *state = nullptr; 322 323 OutputSection *aether; 324 325 uint64_t dot; 326 327 public: 328 OutputDesc *createOutputSection(StringRef name, StringRef location); 329 OutputDesc *getOrCreateOutputSection(StringRef name); 330 331 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 332 uint64_t getDot() { return dot; } 333 void discard(InputSectionBase &s); 334 335 ExprValue getSymbolValue(StringRef name, const Twine &loc); 336 337 void addOrphanSections(); 338 void diagnoseOrphanHandling() const; 339 void diagnoseMissingSGSectionAddress() const; 340 void adjustOutputSections(); 341 void adjustSectionsAfterSorting(); 342 343 SmallVector<PhdrEntry *, 0> createPhdrs(); 344 bool needsInterpSection(); 345 346 bool shouldKeep(InputSectionBase *s); 347 std::pair<const OutputSection *, const Defined *> assignAddresses(); 348 bool spillSections(); 349 void erasePotentialSpillSections(); 350 void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs); 351 void processSectionCommands(); 352 void processSymbolAssignments(); 353 void declareSymbols(); 354 355 // Used to handle INSERT AFTER statements. 356 void processInsertCommands(); 357 358 // Describe memory region usage. 359 void printMemoryUsage(raw_ostream &os); 360 361 // Record a pending error during an assignAddresses invocation. 362 // assignAddresses is executed more than once. Therefore, lld::error should be 363 // avoided to not report duplicate errors. 364 void recordError(const Twine &msg); 365 366 // Check backward location counter assignment and memory region/LMA overflows. 367 void checkFinalScriptConditions() const; 368 369 // Add symbols that are referenced in the linker script to the symbol table. 370 // Symbols referenced in a PROVIDE command are only added to the symbol table 371 // if the PROVIDE command actually provides the symbol. 372 // It also adds the symbols referenced by the used PROVIDE symbols to the 373 // linker script referenced symbols list. 374 void addScriptReferencedSymbolsToSymTable(); 375 376 // Returns true if the PROVIDE symbol should be added to the link. 377 // A PROVIDE symbol is added to the link only if it satisfies an 378 // undefined reference. 379 static bool shouldAddProvideSym(StringRef symName); 380 381 // SECTIONS command list. 382 SmallVector<SectionCommand *, 0> sectionCommands; 383 384 // PHDRS command list. 385 SmallVector<PhdrsCommand, 0> phdrsCommands; 386 387 bool hasSectionsCommand = false; 388 bool seenDataAlign = false; 389 bool seenRelroEnd = false; 390 bool errorOnMissingSection = false; 391 SmallVector<SmallString<0>, 0> recordedErrors; 392 393 // List of section patterns specified with KEEP commands. They will 394 // be kept even if they are unused and --gc-sections is specified. 395 SmallVector<InputSectionDescription *, 0> keptSections; 396 397 // A map from memory region name to a memory region descriptor. 398 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 399 400 // A list of symbols referenced by the script. 401 SmallVector<llvm::StringRef, 0> referencedSymbols; 402 403 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 404 // to be reordered. 405 SmallVector<InsertCommand, 0> insertCommands; 406 407 // OutputSections specified by OVERWRITE_SECTIONS. 408 SmallVector<OutputDesc *, 0> overwriteSections; 409 410 // NOCROSSREFS(_TO) commands. 411 SmallVector<NoCrossRefCommand, 0> noCrossRefs; 412 413 // Sections that will be warned/errored by --orphan-handling. 414 SmallVector<const InputSectionBase *, 0> orphanSections; 415 416 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE 417 // expression. For example, if the PROVIDE command is: 418 // 419 // PROVIDE(v = a + b + c); 420 // 421 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c'] 422 llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap; 423 424 // List of potential spill locations (PotentialSpillSection) for an input 425 // section. 426 struct PotentialSpillList { 427 // Never nullptr. 428 PotentialSpillSection *head; 429 PotentialSpillSection *tail; 430 }; 431 llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists; 432 }; 433 434 struct ScriptWrapper { 435 LinkerScript s; 436 LinkerScript *operator->() { return &s; } 437 }; 438 439 LLVM_LIBRARY_VISIBILITY extern ScriptWrapper script; 440 441 } // end namespace lld::elf 442 443 #endif // LLD_ELF_LINKER_SCRIPT_H 444