1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "InputSection.h" 14 #include "Writer.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringRef.h" 22 #include "llvm/Support/Compiler.h" 23 #include <cstddef> 24 #include <cstdint> 25 #include <functional> 26 #include <memory> 27 28 namespace lld::elf { 29 30 class Defined; 31 class InputFile; 32 class InputSection; 33 class InputSectionBase; 34 class OutputSection; 35 class SectionBase; 36 class ThunkSection; 37 struct OutputDesc; 38 struct SectionClass; 39 struct SectionClassDesc; 40 41 // This represents an r-value in the linker script. 42 struct ExprValue { ExprValueExprValue43 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 44 const Twine &loc) 45 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} 46 ExprValueExprValue47 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 48 isAbsoluteExprValue49 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 50 uint64_t getValue() const; 51 uint64_t getSecAddr() const; 52 uint64_t getSectionOffset() const; 53 54 // If a value is relative to a section, it has a non-null Sec. 55 SectionBase *sec; 56 57 uint64_t val; 58 uint64_t alignment = 1; 59 60 // The original st_type if the expression represents a symbol. Any operation 61 // resets type to STT_NOTYPE. 62 uint8_t type = llvm::ELF::STT_NOTYPE; 63 64 // True if this expression is enclosed in ABSOLUTE(). 65 // This flag affects the return value of getValue(). 66 bool forceAbsolute; 67 68 // Original source location. Used for error messages. 69 std::string loc; 70 }; 71 72 // This represents an expression in the linker script. 73 // ScriptParser::readExpr reads an expression and returns an Expr. 74 // Later, we evaluate the expression by calling the function. 75 using Expr = std::function<ExprValue()>; 76 77 // This enum is used to implement linker script SECTIONS command. 78 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 79 enum SectionsCommandKind { 80 AssignmentKind, // . = expr or <sym> = expr 81 OutputSectionKind, 82 InputSectionKind, 83 ByteKind, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 84 ClassKind, // CLASS(class_name) 85 }; 86 87 struct SectionCommand { SectionCommandSectionCommand88 SectionCommand(int k) : kind(k) {} 89 int kind; 90 }; 91 92 // This represents ". = <expr>" or "<symbol> = <expr>". 93 struct SymbolAssignment : SectionCommand { SymbolAssignmentSymbolAssignment94 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) 95 : SectionCommand(AssignmentKind), name(name), expression(e), 96 symOrder(symOrder), location(loc) {} 97 classofSymbolAssignment98 static bool classof(const SectionCommand *c) { 99 return c->kind == AssignmentKind; 100 } 101 102 // The LHS of an expression. Name is either a symbol name or ".". 103 StringRef name; 104 Defined *sym = nullptr; 105 106 // The RHS of an expression. 107 Expr expression; 108 109 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 110 bool provide = false; 111 bool hidden = false; 112 113 // This assignment references DATA_SEGMENT_RELRO_END. 114 bool dataSegmentRelroEnd = false; 115 116 unsigned symOrder; 117 118 // Holds file name and line number for error reporting. 119 std::string location; 120 121 // A string representation of this command. We use this for -Map. 122 std::string commandString; 123 124 // Address of this assignment command. 125 uint64_t addr; 126 127 // Size of this assignment command. This is usually 0, but if 128 // you move '.' this may be greater than 0. 129 uint64_t size; 130 }; 131 132 // Linker scripts allow additional constraints to be put on output sections. 133 // If an output section is marked as ONLY_IF_RO, the section is created 134 // only if its input sections are read-only. Likewise, an output section 135 // with ONLY_IF_RW is created if all input sections are RW. 136 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 137 138 // This struct is used to represent the location and size of regions of 139 // target memory. Instances of the struct are created by parsing the 140 // MEMORY command. 141 struct MemoryRegion { MemoryRegionMemoryRegion142 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 143 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) 144 : name(std::string(name)), origin(origin), length(length), flags(flags), 145 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} 146 147 std::string name; 148 Expr origin; 149 Expr length; 150 // A section can be assigned to the region if any of these ELF section flags 151 // are set... 152 uint32_t flags; 153 // ... or any of these flags are not set. 154 // For example, the memory region attribute "r" maps to SHF_WRITE. 155 uint32_t invFlags; 156 // A section cannot be assigned to the region if any of these ELF section 157 // flags are set... 158 uint32_t negFlags; 159 // ... or any of these flags are not set. 160 // For example, the memory region attribute "!r" maps to SHF_WRITE. 161 uint32_t negInvFlags; 162 uint64_t curPos = 0; 163 getOriginMemoryRegion164 uint64_t getOrigin() const { return origin().getValue(); } getLengthMemoryRegion165 uint64_t getLength() const { return length().getValue(); } 166 compatibleWithMemoryRegion167 bool compatibleWith(uint32_t secFlags) const { 168 if ((secFlags & negFlags) || (~secFlags & negInvFlags)) 169 return false; 170 return (secFlags & flags) || (~secFlags & invFlags); 171 } 172 }; 173 174 // This struct represents one section match pattern in SECTIONS() command. 175 // It can optionally have negative match pattern for EXCLUDED_FILE command. 176 // Also it may be surrounded with SORT() command, so contains sorting rules. 177 class SectionPattern { 178 StringMatcher excludedFilePat; 179 180 // Cache of the most recent input argument and result of excludesFile(). 181 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache; 182 183 public: SectionPattern(StringMatcher && pat1,StringMatcher && pat2)184 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 185 : excludedFilePat(pat1), sectionPat(pat2), 186 sortOuter(SortSectionPolicy::Default), 187 sortInner(SortSectionPolicy::Default) {} 188 189 bool excludesFile(const InputFile &file) const; 190 191 StringMatcher sectionPat; 192 SortSectionPolicy sortOuter; 193 SortSectionPolicy sortInner; 194 }; 195 196 class InputSectionDescription : public SectionCommand { 197 enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType; 198 SingleStringMatcher filePat; 199 200 // Cache of the most recent input argument and result of matchesFile(). 201 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache; 202 203 public: 204 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 205 uint64_t withoutFlags = 0, StringRef classRef = {}) SectionCommand(InputSectionKind)206 : SectionCommand(InputSectionKind), matchType(MatchType::Trivial), 207 filePat(filePattern), classRef(classRef), withFlags(withFlags), 208 withoutFlags(withoutFlags) { 209 assert((filePattern.empty() || classRef.empty()) && 210 "file pattern and class reference are mutually exclusive"); 211 212 // The matching syntax for whole archives and files outside of an archive 213 // can't be handled by SingleStringMatcher, and instead are handled 214 // manually within matchesFile() 215 if (!filePattern.empty()) { 216 if (filePattern.back() == ':') { 217 matchType = MatchType::WholeArchive; 218 filePat = filePattern.drop_back(); 219 } else if (filePattern.front() == ':') { 220 matchType = MatchType::ArchivesExcluded; 221 filePat = filePattern.drop_front(); 222 } 223 } 224 } 225 classof(const SectionCommand * c)226 static bool classof(const SectionCommand *c) { 227 return c->kind == InputSectionKind; 228 } 229 230 bool matchesFile(const InputFile &file) const; 231 232 // Input sections that matches at least one of SectionPatterns 233 // will be associated with this InputSectionDescription. 234 SmallVector<SectionPattern, 0> sectionPatterns; 235 236 // If present, input section matching uses class membership instead of file 237 // and section patterns (mutually exclusive). 238 StringRef classRef; 239 240 // Includes InputSections and MergeInputSections. Used temporarily during 241 // assignment of input sections to output sections. 242 SmallVector<InputSectionBase *, 0> sectionBases; 243 244 // Used after the finalizeInputSections() pass. MergeInputSections have been 245 // merged into MergeSyntheticSections. 246 SmallVector<InputSection *, 0> sections; 247 248 // Temporary record of synthetic ThunkSection instances and the pass that 249 // they were created in. This is used to insert newly created ThunkSections 250 // into Sections at the end of a createThunks() pass. 251 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; 252 253 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 254 uint64_t withFlags; 255 uint64_t withoutFlags; 256 }; 257 258 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 259 struct ByteCommand : SectionCommand { ByteCommandByteCommand260 ByteCommand(Expr e, unsigned size, std::string commandString) 261 : SectionCommand(ByteKind), commandString(commandString), expression(e), 262 size(size) {} 263 classofByteCommand264 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } 265 266 // Keeps string representing the command. Used for -Map" is perhaps better. 267 std::string commandString; 268 269 Expr expression; 270 271 // This is just an offset of this assignment command in the output section. 272 unsigned offset; 273 274 // Size of this data command. 275 unsigned size; 276 }; 277 278 struct InsertCommand { 279 SmallVector<StringRef, 0> names; 280 bool isAfter; 281 StringRef where; 282 }; 283 284 // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between 285 // certain output sections. 286 struct NoCrossRefCommand { 287 SmallVector<StringRef, 0> outputSections; 288 289 // When true, this describes a NOCROSSREFS_TO command that probits references 290 // to the first output section from any of the other sections. 291 bool toFirst = false; 292 }; 293 294 struct PhdrsCommand { 295 StringRef name; 296 unsigned type = llvm::ELF::PT_NULL; 297 bool hasFilehdr = false; 298 bool hasPhdrs = false; 299 std::optional<unsigned> flags; 300 Expr lmaExpr = nullptr; 301 }; 302 303 class LinkerScript final { 304 // Temporary state used in processSectionCommands() and assignAddresses() 305 // that must be reinitialized for each call to the above functions, and must 306 // not be used outside of the scope of a call to the above functions. 307 struct AddressState { 308 AddressState(const LinkerScript &); 309 OutputSection *outSec = nullptr; 310 MemoryRegion *memRegion = nullptr; 311 MemoryRegion *lmaRegion = nullptr; 312 uint64_t lmaOffset = 0; 313 uint64_t tbssAddr = 0; 314 uint64_t overlaySize; 315 }; 316 317 Ctx &ctx; 318 SmallVector<std::unique_ptr<OutputDesc>, 0> descPool; 319 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection; 320 321 StringRef getOutputSectionName(const InputSectionBase *s) const; 322 void addSymbol(SymbolAssignment *cmd); 323 void declareSymbol(SymbolAssignment *cmd); 324 void assignSymbol(SymbolAssignment *cmd, bool inSec); 325 void setDot(Expr e, const Twine &loc, bool inSec); 326 void expandOutputSection(uint64_t size); 327 void expandMemoryRegions(uint64_t size); 328 329 SmallVector<InputSectionBase *, 0> 330 computeInputSections(const InputSectionDescription *, 331 ArrayRef<InputSectionBase *>, const SectionBase &outCmd); 332 333 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); 334 335 void discardSynthetic(OutputSection &); 336 337 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); 338 339 std::pair<MemoryRegion *, MemoryRegion *> 340 findMemoryRegion(OutputSection *sec, MemoryRegion *hint); 341 342 bool assignOffsets(OutputSection *sec); 343 344 // This captures the local AddressState and makes it accessible 345 // deliberately. This is needed as there are some cases where we cannot just 346 // thread the current state through to a lambda function created by the 347 // script parser. 348 // This should remain a plain pointer as its lifetime is smaller than 349 // LinkerScript. 350 AddressState *state = nullptr; 351 352 std::unique_ptr<OutputSection> aether; 353 354 uint64_t dot = 0; 355 356 public: 357 // OutputSection may be incomplete. Avoid inline ctor/dtor. 358 LinkerScript(Ctx &ctx); 359 ~LinkerScript(); 360 361 OutputDesc *createOutputSection(StringRef name, StringRef location); 362 OutputDesc *getOrCreateOutputSection(StringRef name); 363 hasPhdrsCommands()364 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } getDot()365 uint64_t getDot() { return dot; } 366 void discard(InputSectionBase &s); 367 368 ExprValue getSymbolValue(StringRef name, const Twine &loc); 369 370 void addOrphanSections(); 371 void diagnoseOrphanHandling() const; 372 void diagnoseMissingSGSectionAddress() const; 373 void adjustOutputSections(); 374 void adjustSectionsAfterSorting(); 375 376 SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs(); 377 bool needsInterpSection(); 378 379 bool shouldKeep(InputSectionBase *s); 380 std::pair<const OutputSection *, const Defined *> assignAddresses(); 381 bool spillSections(); 382 void erasePotentialSpillSections(); 383 void allocateHeaders(SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs); 384 void processSectionCommands(); 385 void processSymbolAssignments(); 386 void declareSymbols(); 387 388 // Used to handle INSERT AFTER statements. 389 void processInsertCommands(); 390 391 // Describe memory region usage. 392 void printMemoryUsage(raw_ostream &os); 393 394 // Record a pending error during an assignAddresses invocation. 395 // assignAddresses is executed more than once. Therefore, lld::error should be 396 // avoided to not report duplicate errors. 397 void recordError(const Twine &msg); 398 399 // Check backward location counter assignment and memory region/LMA overflows. 400 void checkFinalScriptConditions() const; 401 402 // Add symbols that are referenced in the linker script to the symbol table. 403 // Symbols referenced in a PROVIDE command are only added to the symbol table 404 // if the PROVIDE command actually provides the symbol. 405 // It also adds the symbols referenced by the used PROVIDE symbols to the 406 // linker script referenced symbols list. 407 void addScriptReferencedSymbolsToSymTable(); 408 409 // Returns true if the PROVIDE symbol should be added to the link. 410 // A PROVIDE symbol is added to the link only if it satisfies an 411 // undefined reference. 412 bool shouldAddProvideSym(StringRef symName); 413 414 // SECTIONS command list. 415 SmallVector<SectionCommand *, 0> sectionCommands; 416 417 // PHDRS command list. 418 SmallVector<PhdrsCommand, 0> phdrsCommands; 419 420 bool hasSectionsCommand = false; 421 bool seenDataAlign = false; 422 bool seenRelroEnd = false; 423 bool errorOnMissingSection = false; 424 SmallVector<SmallString<0>, 0> recordedErrors; 425 426 // List of section patterns specified with KEEP commands. They will 427 // be kept even if they are unused and --gc-sections is specified. 428 SmallVector<InputSectionDescription *, 0> keptSections; 429 430 // A map from memory region name to a memory region descriptor. 431 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 432 433 // A list of symbols referenced by the script. 434 SmallVector<llvm::StringRef, 0> referencedSymbols; 435 436 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 437 // to be reordered. 438 SmallVector<InsertCommand, 0> insertCommands; 439 440 // OutputSections specified by OVERWRITE_SECTIONS. 441 SmallVector<OutputDesc *, 0> overwriteSections; 442 443 // NOCROSSREFS(_TO) commands. 444 SmallVector<NoCrossRefCommand, 0> noCrossRefs; 445 446 // Sections that will be warned/errored by --orphan-handling. 447 SmallVector<const InputSectionBase *, 0> orphanSections; 448 449 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE 450 // expression. For example, if the PROVIDE command is: 451 // 452 // PROVIDE(v = a + b + c); 453 // 454 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c'] 455 llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap; 456 // Store defined symbols that should ignore PROVIDE commands. 457 llvm::DenseSet<Symbol *> unusedProvideSyms; 458 459 // List of potential spill locations (PotentialSpillSection) for an input 460 // section. 461 struct PotentialSpillList { 462 // Never nullptr. 463 PotentialSpillSection *head; 464 PotentialSpillSection *tail; 465 }; 466 llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists; 467 468 // Named lists of input sections that can be collectively referenced in output 469 // section descriptions. Multiple references allow for sections to spill from 470 // one output section to another. 471 llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses; 472 }; 473 474 } // end namespace lld::elf 475 476 #endif // LLD_ELF_LINKER_SCRIPT_H 477