1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "Writer.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/MapVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Support/Compiler.h" 21 #include <cstddef> 22 #include <cstdint> 23 #include <functional> 24 #include <memory> 25 26 namespace lld::elf { 27 28 class Defined; 29 class InputFile; 30 class InputSection; 31 class InputSectionBase; 32 class OutputSection; 33 class SectionBase; 34 class ThunkSection; 35 struct OutputDesc; 36 37 // This represents an r-value in the linker script. 38 struct ExprValue { 39 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 40 const Twine &loc) 41 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} 42 43 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 44 45 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 46 uint64_t getValue() const; 47 uint64_t getSecAddr() const; 48 uint64_t getSectionOffset() const; 49 50 // If a value is relative to a section, it has a non-null Sec. 51 SectionBase *sec; 52 53 uint64_t val; 54 uint64_t alignment = 1; 55 56 // The original st_type if the expression represents a symbol. Any operation 57 // resets type to STT_NOTYPE. 58 uint8_t type = llvm::ELF::STT_NOTYPE; 59 60 // True if this expression is enclosed in ABSOLUTE(). 61 // This flag affects the return value of getValue(). 62 bool forceAbsolute; 63 64 // Original source location. Used for error messages. 65 std::string loc; 66 }; 67 68 // This represents an expression in the linker script. 69 // ScriptParser::readExpr reads an expression and returns an Expr. 70 // Later, we evaluate the expression by calling the function. 71 using Expr = std::function<ExprValue()>; 72 73 // This enum is used to implement linker script SECTIONS command. 74 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 75 enum SectionsCommandKind { 76 AssignmentKind, // . = expr or <sym> = expr 77 OutputSectionKind, 78 InputSectionKind, 79 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 80 }; 81 82 struct SectionCommand { 83 SectionCommand(int k) : kind(k) {} 84 int kind; 85 }; 86 87 // This represents ". = <expr>" or "<symbol> = <expr>". 88 struct SymbolAssignment : SectionCommand { 89 SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) 90 : SectionCommand(AssignmentKind), name(name), expression(e), 91 symOrder(symOrder), location(loc) {} 92 93 static bool classof(const SectionCommand *c) { 94 return c->kind == AssignmentKind; 95 } 96 97 // The LHS of an expression. Name is either a symbol name or ".". 98 StringRef name; 99 Defined *sym = nullptr; 100 101 // The RHS of an expression. 102 Expr expression; 103 104 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 105 bool provide = false; 106 bool hidden = false; 107 108 // This assignment references DATA_SEGMENT_RELRO_END. 109 bool dataSegmentRelroEnd = false; 110 111 unsigned symOrder; 112 113 // Holds file name and line number for error reporting. 114 std::string location; 115 116 // A string representation of this command. We use this for -Map. 117 std::string commandString; 118 119 // Address of this assignment command. 120 uint64_t addr; 121 122 // Size of this assignment command. This is usually 0, but if 123 // you move '.' this may be greater than 0. 124 uint64_t size; 125 }; 126 127 // Linker scripts allow additional constraints to be put on output sections. 128 // If an output section is marked as ONLY_IF_RO, the section is created 129 // only if its input sections are read-only. Likewise, an output section 130 // with ONLY_IF_RW is created if all input sections are RW. 131 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 132 133 // This struct is used to represent the location and size of regions of 134 // target memory. Instances of the struct are created by parsing the 135 // MEMORY command. 136 struct MemoryRegion { 137 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 138 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) 139 : name(std::string(name)), origin(origin), length(length), flags(flags), 140 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} 141 142 std::string name; 143 Expr origin; 144 Expr length; 145 // A section can be assigned to the region if any of these ELF section flags 146 // are set... 147 uint32_t flags; 148 // ... or any of these flags are not set. 149 // For example, the memory region attribute "r" maps to SHF_WRITE. 150 uint32_t invFlags; 151 // A section cannot be assigned to the region if any of these ELF section 152 // flags are set... 153 uint32_t negFlags; 154 // ... or any of these flags are not set. 155 // For example, the memory region attribute "!r" maps to SHF_WRITE. 156 uint32_t negInvFlags; 157 uint64_t curPos = 0; 158 159 uint64_t getOrigin() const { return origin().getValue(); } 160 uint64_t getLength() const { return length().getValue(); } 161 162 bool compatibleWith(uint32_t secFlags) const { 163 if ((secFlags & negFlags) || (~secFlags & negInvFlags)) 164 return false; 165 return (secFlags & flags) || (~secFlags & invFlags); 166 } 167 }; 168 169 // This struct represents one section match pattern in SECTIONS() command. 170 // It can optionally have negative match pattern for EXCLUDED_FILE command. 171 // Also it may be surrounded with SORT() command, so contains sorting rules. 172 class SectionPattern { 173 StringMatcher excludedFilePat; 174 175 // Cache of the most recent input argument and result of excludesFile(). 176 mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache; 177 178 public: 179 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 180 : excludedFilePat(pat1), sectionPat(pat2), 181 sortOuter(SortSectionPolicy::Default), 182 sortInner(SortSectionPolicy::Default) {} 183 184 bool excludesFile(const InputFile *file) const; 185 186 StringMatcher sectionPat; 187 SortSectionPolicy sortOuter; 188 SortSectionPolicy sortInner; 189 }; 190 191 class InputSectionDescription : public SectionCommand { 192 SingleStringMatcher filePat; 193 194 // Cache of the most recent input argument and result of matchesFile(). 195 mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache; 196 197 public: 198 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 199 uint64_t withoutFlags = 0) 200 : SectionCommand(InputSectionKind), filePat(filePattern), 201 withFlags(withFlags), withoutFlags(withoutFlags) {} 202 203 static bool classof(const SectionCommand *c) { 204 return c->kind == InputSectionKind; 205 } 206 207 bool matchesFile(const InputFile *file) const; 208 209 // Input sections that matches at least one of SectionPatterns 210 // will be associated with this InputSectionDescription. 211 SmallVector<SectionPattern, 0> sectionPatterns; 212 213 // Includes InputSections and MergeInputSections. Used temporarily during 214 // assignment of input sections to output sections. 215 SmallVector<InputSectionBase *, 0> sectionBases; 216 217 // Used after the finalizeInputSections() pass. MergeInputSections have been 218 // merged into MergeSyntheticSections. 219 SmallVector<InputSection *, 0> sections; 220 221 // Temporary record of synthetic ThunkSection instances and the pass that 222 // they were created in. This is used to insert newly created ThunkSections 223 // into Sections at the end of a createThunks() pass. 224 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; 225 226 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 227 uint64_t withFlags; 228 uint64_t withoutFlags; 229 }; 230 231 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 232 struct ByteCommand : SectionCommand { 233 ByteCommand(Expr e, unsigned size, std::string commandString) 234 : SectionCommand(ByteKind), commandString(commandString), expression(e), 235 size(size) {} 236 237 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } 238 239 // Keeps string representing the command. Used for -Map" is perhaps better. 240 std::string commandString; 241 242 Expr expression; 243 244 // This is just an offset of this assignment command in the output section. 245 unsigned offset; 246 247 // Size of this data command. 248 unsigned size; 249 }; 250 251 struct InsertCommand { 252 SmallVector<StringRef, 0> names; 253 bool isAfter; 254 StringRef where; 255 }; 256 257 struct PhdrsCommand { 258 StringRef name; 259 unsigned type = llvm::ELF::PT_NULL; 260 bool hasFilehdr = false; 261 bool hasPhdrs = false; 262 std::optional<unsigned> flags; 263 Expr lmaExpr = nullptr; 264 }; 265 266 class LinkerScript final { 267 // Temporary state used in processSectionCommands() and assignAddresses() 268 // that must be reinitialized for each call to the above functions, and must 269 // not be used outside of the scope of a call to the above functions. 270 struct AddressState { 271 AddressState(); 272 OutputSection *outSec = nullptr; 273 MemoryRegion *memRegion = nullptr; 274 MemoryRegion *lmaRegion = nullptr; 275 uint64_t lmaOffset = 0; 276 uint64_t tbssAddr = 0; 277 }; 278 279 llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection; 280 281 void addSymbol(SymbolAssignment *cmd); 282 void assignSymbol(SymbolAssignment *cmd, bool inSec); 283 void setDot(Expr e, const Twine &loc, bool inSec); 284 void expandOutputSection(uint64_t size); 285 void expandMemoryRegions(uint64_t size); 286 287 SmallVector<InputSectionBase *, 0> 288 computeInputSections(const InputSectionDescription *, 289 ArrayRef<InputSectionBase *>); 290 291 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); 292 293 void discardSynthetic(OutputSection &); 294 295 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); 296 297 std::pair<MemoryRegion *, MemoryRegion *> 298 findMemoryRegion(OutputSection *sec, MemoryRegion *hint); 299 300 void assignOffsets(OutputSection *sec); 301 302 // This captures the local AddressState and makes it accessible 303 // deliberately. This is needed as there are some cases where we cannot just 304 // thread the current state through to a lambda function created by the 305 // script parser. 306 // This should remain a plain pointer as its lifetime is smaller than 307 // LinkerScript. 308 AddressState *state = nullptr; 309 310 OutputSection *aether; 311 312 uint64_t dot; 313 314 public: 315 OutputDesc *createOutputSection(StringRef name, StringRef location); 316 OutputDesc *getOrCreateOutputSection(StringRef name); 317 318 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 319 uint64_t getDot() { return dot; } 320 void discard(InputSectionBase &s); 321 322 ExprValue getSymbolValue(StringRef name, const Twine &loc); 323 324 void addOrphanSections(); 325 void diagnoseOrphanHandling() const; 326 void diagnoseMissingSGSectionAddress() const; 327 void adjustOutputSections(); 328 void adjustSectionsAfterSorting(); 329 330 SmallVector<PhdrEntry *, 0> createPhdrs(); 331 bool needsInterpSection(); 332 333 bool shouldKeep(InputSectionBase *s); 334 const Defined *assignAddresses(); 335 void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs); 336 void processSectionCommands(); 337 void processSymbolAssignments(); 338 void declareSymbols(); 339 340 bool isDiscarded(const OutputSection *sec) const; 341 342 // Used to handle INSERT AFTER statements. 343 void processInsertCommands(); 344 345 // Describe memory region usage. 346 void printMemoryUsage(raw_ostream &os); 347 348 // Check backward location counter assignment and memory region/LMA overflows. 349 void checkFinalScriptConditions() const; 350 351 // SECTIONS command list. 352 SmallVector<SectionCommand *, 0> sectionCommands; 353 354 // PHDRS command list. 355 SmallVector<PhdrsCommand, 0> phdrsCommands; 356 357 bool hasSectionsCommand = false; 358 bool seenDataAlign = false; 359 bool seenRelroEnd = false; 360 bool errorOnMissingSection = false; 361 std::string backwardDotErr; 362 363 // List of section patterns specified with KEEP commands. They will 364 // be kept even if they are unused and --gc-sections is specified. 365 SmallVector<InputSectionDescription *, 0> keptSections; 366 367 // A map from memory region name to a memory region descriptor. 368 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 369 370 // A list of symbols referenced by the script. 371 SmallVector<llvm::StringRef, 0> referencedSymbols; 372 373 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 374 // to be reordered. 375 SmallVector<InsertCommand, 0> insertCommands; 376 377 // OutputSections specified by OVERWRITE_SECTIONS. 378 SmallVector<OutputDesc *, 0> overwriteSections; 379 380 // Sections that will be warned/errored by --orphan-handling. 381 SmallVector<const InputSectionBase *, 0> orphanSections; 382 }; 383 384 LLVM_LIBRARY_VISIBILITY extern std::unique_ptr<LinkerScript> script; 385 386 } // end namespace lld::elf 387 388 #endif // LLD_ELF_LINKER_SCRIPT_H 389