1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "Writer.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cstddef> 23 #include <cstdint> 24 #include <functional> 25 #include <memory> 26 #include <vector> 27 28 namespace lld { 29 namespace elf { 30 31 class Defined; 32 class InputFile; 33 class InputSection; 34 class InputSectionBase; 35 class OutputSection; 36 class SectionBase; 37 class Symbol; 38 class ThunkSection; 39 40 // This represents an r-value in the linker script. 41 struct ExprValue { 42 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 43 const Twine &loc) 44 : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {} 45 46 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 47 48 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 49 uint64_t getValue() const; 50 uint64_t getSecAddr() const; 51 uint64_t getSectionOffset() const; 52 53 // If a value is relative to a section, it has a non-null Sec. 54 SectionBase *sec; 55 56 uint64_t val; 57 uint64_t alignment = 1; 58 59 // The original st_type if the expression represents a symbol. Any operation 60 // resets type to STT_NOTYPE. 61 uint8_t type = llvm::ELF::STT_NOTYPE; 62 63 // True if this expression is enclosed in ABSOLUTE(). 64 // This flag affects the return value of getValue(). 65 bool forceAbsolute; 66 67 // Original source location. Used for error messages. 68 std::string loc; 69 }; 70 71 // This represents an expression in the linker script. 72 // ScriptParser::readExpr reads an expression and returns an Expr. 73 // Later, we evaluate the expression by calling the function. 74 using Expr = std::function<ExprValue()>; 75 76 // This enum is used to implement linker script SECTIONS command. 77 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 78 enum SectionsCommandKind { 79 AssignmentKind, // . = expr or <sym> = expr 80 OutputSectionKind, 81 InputSectionKind, 82 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 83 }; 84 85 struct SectionCommand { 86 SectionCommand(int k) : kind(k) {} 87 int kind; 88 }; 89 90 // This represents ". = <expr>" or "<symbol> = <expr>". 91 struct SymbolAssignment : SectionCommand { 92 SymbolAssignment(StringRef name, Expr e, std::string loc) 93 : SectionCommand(AssignmentKind), name(name), expression(e), 94 location(loc) {} 95 96 static bool classof(const SectionCommand *c) { 97 return c->kind == AssignmentKind; 98 } 99 100 // The LHS of an expression. Name is either a symbol name or ".". 101 StringRef name; 102 Defined *sym = nullptr; 103 104 // The RHS of an expression. 105 Expr expression; 106 107 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 108 bool provide = false; 109 bool hidden = false; 110 111 // Holds file name and line number for error reporting. 112 std::string location; 113 114 // A string representation of this command. We use this for -Map. 115 std::string commandString; 116 117 // Address of this assignment command. 118 uint64_t addr; 119 120 // Size of this assignment command. This is usually 0, but if 121 // you move '.' this may be greater than 0. 122 uint64_t size; 123 }; 124 125 // Linker scripts allow additional constraints to be put on output sections. 126 // If an output section is marked as ONLY_IF_RO, the section is created 127 // only if its input sections are read-only. Likewise, an output section 128 // with ONLY_IF_RW is created if all input sections are RW. 129 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 130 131 // This struct is used to represent the location and size of regions of 132 // target memory. Instances of the struct are created by parsing the 133 // MEMORY command. 134 struct MemoryRegion { 135 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 136 uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags) 137 : name(std::string(name)), origin(origin), length(length), flags(flags), 138 invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {} 139 140 std::string name; 141 Expr origin; 142 Expr length; 143 // A section can be assigned to the region if any of these ELF section flags 144 // are set... 145 uint32_t flags; 146 // ... or any of these flags are not set. 147 // For example, the memory region attribute "r" maps to SHF_WRITE. 148 uint32_t invFlags; 149 // A section cannot be assigned to the region if any of these ELF section 150 // flags are set... 151 uint32_t negFlags; 152 // ... or any of these flags are not set. 153 // For example, the memory region attribute "!r" maps to SHF_WRITE. 154 uint32_t negInvFlags; 155 uint64_t curPos = 0; 156 157 bool compatibleWith(uint32_t secFlags) const { 158 if ((secFlags & negFlags) || (~secFlags & negInvFlags)) 159 return false; 160 return (secFlags & flags) || (~secFlags & invFlags); 161 } 162 }; 163 164 // This struct represents one section match pattern in SECTIONS() command. 165 // It can optionally have negative match pattern for EXCLUDED_FILE command. 166 // Also it may be surrounded with SORT() command, so contains sorting rules. 167 class SectionPattern { 168 StringMatcher excludedFilePat; 169 170 // Cache of the most recent input argument and result of excludesFile(). 171 mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache; 172 173 public: 174 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 175 : excludedFilePat(pat1), sectionPat(pat2), 176 sortOuter(SortSectionPolicy::Default), 177 sortInner(SortSectionPolicy::Default) {} 178 179 bool excludesFile(const InputFile *file) const; 180 181 StringMatcher sectionPat; 182 SortSectionPolicy sortOuter; 183 SortSectionPolicy sortInner; 184 }; 185 186 class InputSectionDescription : public SectionCommand { 187 SingleStringMatcher filePat; 188 189 // Cache of the most recent input argument and result of matchesFile(). 190 mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache; 191 192 public: 193 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 194 uint64_t withoutFlags = 0) 195 : SectionCommand(InputSectionKind), filePat(filePattern), 196 withFlags(withFlags), withoutFlags(withoutFlags) {} 197 198 static bool classof(const SectionCommand *c) { 199 return c->kind == InputSectionKind; 200 } 201 202 bool matchesFile(const InputFile *file) const; 203 204 // Input sections that matches at least one of SectionPatterns 205 // will be associated with this InputSectionDescription. 206 SmallVector<SectionPattern, 0> sectionPatterns; 207 208 // Includes InputSections and MergeInputSections. Used temporarily during 209 // assignment of input sections to output sections. 210 SmallVector<InputSectionBase *, 0> sectionBases; 211 212 // Used after the finalizeInputSections() pass. MergeInputSections have been 213 // merged into MergeSyntheticSections. 214 SmallVector<InputSection *, 0> sections; 215 216 // Temporary record of synthetic ThunkSection instances and the pass that 217 // they were created in. This is used to insert newly created ThunkSections 218 // into Sections at the end of a createThunks() pass. 219 SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections; 220 221 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 222 uint64_t withFlags; 223 uint64_t withoutFlags; 224 }; 225 226 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 227 struct ByteCommand : SectionCommand { 228 ByteCommand(Expr e, unsigned size, std::string commandString) 229 : SectionCommand(ByteKind), commandString(commandString), expression(e), 230 size(size) {} 231 232 static bool classof(const SectionCommand *c) { return c->kind == ByteKind; } 233 234 // Keeps string representing the command. Used for -Map" is perhaps better. 235 std::string commandString; 236 237 Expr expression; 238 239 // This is just an offset of this assignment command in the output section. 240 unsigned offset; 241 242 // Size of this data command. 243 unsigned size; 244 }; 245 246 struct InsertCommand { 247 SmallVector<StringRef, 0> names; 248 bool isAfter; 249 StringRef where; 250 }; 251 252 struct PhdrsCommand { 253 StringRef name; 254 unsigned type = llvm::ELF::PT_NULL; 255 bool hasFilehdr = false; 256 bool hasPhdrs = false; 257 llvm::Optional<unsigned> flags; 258 Expr lmaExpr = nullptr; 259 }; 260 261 class LinkerScript final { 262 // Temporary state used in processSectionCommands() and assignAddresses() 263 // that must be reinitialized for each call to the above functions, and must 264 // not be used outside of the scope of a call to the above functions. 265 struct AddressState { 266 AddressState(); 267 OutputSection *outSec = nullptr; 268 MemoryRegion *memRegion = nullptr; 269 MemoryRegion *lmaRegion = nullptr; 270 uint64_t lmaOffset = 0; 271 uint64_t tbssAddr = 0; 272 }; 273 274 llvm::DenseMap<llvm::CachedHashStringRef, OutputSection *> 275 nameToOutputSection; 276 277 void addSymbol(SymbolAssignment *cmd); 278 void assignSymbol(SymbolAssignment *cmd, bool inSec); 279 void setDot(Expr e, const Twine &loc, bool inSec); 280 void expandOutputSection(uint64_t size); 281 void expandMemoryRegions(uint64_t size); 282 283 SmallVector<InputSectionBase *, 0> 284 computeInputSections(const InputSectionDescription *, 285 ArrayRef<InputSectionBase *>); 286 287 SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd); 288 289 void discardSynthetic(OutputSection &); 290 291 SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec); 292 293 std::pair<MemoryRegion *, MemoryRegion *> 294 findMemoryRegion(OutputSection *sec, MemoryRegion *hint); 295 296 void assignOffsets(OutputSection *sec); 297 298 // Ctx captures the local AddressState and makes it accessible 299 // deliberately. This is needed as there are some cases where we cannot just 300 // thread the current state through to a lambda function created by the 301 // script parser. 302 // This should remain a plain pointer as its lifetime is smaller than 303 // LinkerScript. 304 AddressState *ctx = nullptr; 305 306 OutputSection *aether; 307 308 uint64_t dot; 309 310 public: 311 OutputSection *createOutputSection(StringRef name, StringRef location); 312 OutputSection *getOrCreateOutputSection(StringRef name); 313 314 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 315 uint64_t getDot() { return dot; } 316 void discard(InputSectionBase &s); 317 318 ExprValue getSymbolValue(StringRef name, const Twine &loc); 319 320 void addOrphanSections(); 321 void diagnoseOrphanHandling() const; 322 void adjustOutputSections(); 323 void adjustSectionsAfterSorting(); 324 325 SmallVector<PhdrEntry *, 0> createPhdrs(); 326 bool needsInterpSection(); 327 328 bool shouldKeep(InputSectionBase *s); 329 const Defined *assignAddresses(); 330 void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs); 331 void processSectionCommands(); 332 void processSymbolAssignments(); 333 void declareSymbols(); 334 335 bool isDiscarded(const OutputSection *sec) const; 336 337 // Used to handle INSERT AFTER statements. 338 void processInsertCommands(); 339 340 // SECTIONS command list. 341 SmallVector<SectionCommand *, 0> sectionCommands; 342 343 // PHDRS command list. 344 SmallVector<PhdrsCommand, 0> phdrsCommands; 345 346 bool hasSectionsCommand = false; 347 bool errorOnMissingSection = false; 348 349 // List of section patterns specified with KEEP commands. They will 350 // be kept even if they are unused and --gc-sections is specified. 351 SmallVector<InputSectionDescription *, 0> keptSections; 352 353 // A map from memory region name to a memory region descriptor. 354 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 355 356 // A list of symbols referenced by the script. 357 SmallVector<llvm::StringRef, 0> referencedSymbols; 358 359 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 360 // to be reordered. 361 SmallVector<InsertCommand, 0> insertCommands; 362 363 // OutputSections specified by OVERWRITE_SECTIONS. 364 SmallVector<OutputSection *, 0> overwriteSections; 365 366 // Sections that will be warned/errored by --orphan-handling. 367 SmallVector<const InputSectionBase *, 0> orphanSections; 368 }; 369 370 extern std::unique_ptr<LinkerScript> script; 371 372 } // end namespace elf 373 } // end namespace lld 374 375 #endif // LLD_ELF_LINKER_SCRIPT_H 376