1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_LINKER_SCRIPT_H 10 #define LLD_ELF_LINKER_SCRIPT_H 11 12 #include "Config.h" 13 #include "Writer.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Strings.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include <cstddef> 23 #include <cstdint> 24 #include <functional> 25 #include <memory> 26 #include <vector> 27 28 namespace lld { 29 namespace elf { 30 31 class Defined; 32 class InputFile; 33 class InputSection; 34 class InputSectionBase; 35 class OutputSection; 36 class SectionBase; 37 class Symbol; 38 class ThunkSection; 39 40 // This represents an r-value in the linker script. 41 struct ExprValue { 42 ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val, 43 const Twine &loc) 44 : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {} 45 46 ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {} 47 48 bool isAbsolute() const { return forceAbsolute || sec == nullptr; } 49 uint64_t getValue() const; 50 uint64_t getSecAddr() const; 51 uint64_t getSectionOffset() const; 52 53 // If a value is relative to a section, it has a non-null Sec. 54 SectionBase *sec; 55 56 // True if this expression is enclosed in ABSOLUTE(). 57 // This flag affects the return value of getValue(). 58 bool forceAbsolute; 59 60 uint64_t val; 61 uint64_t alignment = 1; 62 63 // The original st_type if the expression represents a symbol. Any operation 64 // resets type to STT_NOTYPE. 65 uint8_t type = llvm::ELF::STT_NOTYPE; 66 67 // Original source location. Used for error messages. 68 std::string loc; 69 }; 70 71 // This represents an expression in the linker script. 72 // ScriptParser::readExpr reads an expression and returns an Expr. 73 // Later, we evaluate the expression by calling the function. 74 using Expr = std::function<ExprValue()>; 75 76 // This enum is used to implement linker script SECTIONS command. 77 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS 78 enum SectionsCommandKind { 79 AssignmentKind, // . = expr or <sym> = expr 80 OutputSectionKind, 81 InputSectionKind, 82 ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr) 83 }; 84 85 struct BaseCommand { 86 BaseCommand(int k) : kind(k) {} 87 int kind; 88 }; 89 90 // This represents ". = <expr>" or "<symbol> = <expr>". 91 struct SymbolAssignment : BaseCommand { 92 SymbolAssignment(StringRef name, Expr e, std::string loc) 93 : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {} 94 95 static bool classof(const BaseCommand *c) { 96 return c->kind == AssignmentKind; 97 } 98 99 // The LHS of an expression. Name is either a symbol name or ".". 100 StringRef name; 101 Defined *sym = nullptr; 102 103 // The RHS of an expression. 104 Expr expression; 105 106 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN. 107 bool provide = false; 108 bool hidden = false; 109 110 // Holds file name and line number for error reporting. 111 std::string location; 112 113 // A string representation of this command. We use this for -Map. 114 std::string commandString; 115 116 // Address of this assignment command. 117 uint64_t addr; 118 119 // Size of this assignment command. This is usually 0, but if 120 // you move '.' this may be greater than 0. 121 uint64_t size; 122 }; 123 124 // Linker scripts allow additional constraints to be put on output sections. 125 // If an output section is marked as ONLY_IF_RO, the section is created 126 // only if its input sections are read-only. Likewise, an output section 127 // with ONLY_IF_RW is created if all input sections are RW. 128 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite }; 129 130 // This struct is used to represent the location and size of regions of 131 // target memory. Instances of the struct are created by parsing the 132 // MEMORY command. 133 struct MemoryRegion { 134 MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags, 135 uint32_t negFlags) 136 : name(std::string(name)), origin(origin), length(length), flags(flags), 137 negFlags(negFlags) {} 138 139 std::string name; 140 Expr origin; 141 Expr length; 142 uint32_t flags; 143 uint32_t negFlags; 144 uint64_t curPos = 0; 145 }; 146 147 // This struct represents one section match pattern in SECTIONS() command. 148 // It can optionally have negative match pattern for EXCLUDED_FILE command. 149 // Also it may be surrounded with SORT() command, so contains sorting rules. 150 class SectionPattern { 151 StringMatcher excludedFilePat; 152 153 // Cache of the most recent input argument and result of excludesFile(). 154 mutable llvm::Optional<std::pair<const InputFile *, bool>> excludesFileCache; 155 156 public: 157 SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2) 158 : excludedFilePat(pat1), sectionPat(pat2), 159 sortOuter(SortSectionPolicy::Default), 160 sortInner(SortSectionPolicy::Default) {} 161 162 bool excludesFile(const InputFile *file) const; 163 164 StringMatcher sectionPat; 165 SortSectionPolicy sortOuter; 166 SortSectionPolicy sortInner; 167 }; 168 169 class InputSectionDescription : public BaseCommand { 170 SingleStringMatcher filePat; 171 172 // Cache of the most recent input argument and result of matchesFile(). 173 mutable llvm::Optional<std::pair<const InputFile *, bool>> matchesFileCache; 174 175 public: 176 InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0, 177 uint64_t withoutFlags = 0) 178 : BaseCommand(InputSectionKind), filePat(filePattern), 179 withFlags(withFlags), withoutFlags(withoutFlags) {} 180 181 static bool classof(const BaseCommand *c) { 182 return c->kind == InputSectionKind; 183 } 184 185 bool matchesFile(const InputFile *file) const; 186 187 // Input sections that matches at least one of SectionPatterns 188 // will be associated with this InputSectionDescription. 189 std::vector<SectionPattern> sectionPatterns; 190 191 // Includes InputSections and MergeInputSections. Used temporarily during 192 // assignment of input sections to output sections. 193 std::vector<InputSectionBase *> sectionBases; 194 195 // Used after the finalizeInputSections() pass. MergeInputSections have been 196 // merged into MergeSyntheticSections. 197 std::vector<InputSection *> sections; 198 199 // Temporary record of synthetic ThunkSection instances and the pass that 200 // they were created in. This is used to insert newly created ThunkSections 201 // into Sections at the end of a createThunks() pass. 202 std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections; 203 204 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command. 205 uint64_t withFlags; 206 uint64_t withoutFlags; 207 }; 208 209 // Represents BYTE(), SHORT(), LONG(), or QUAD(). 210 struct ByteCommand : BaseCommand { 211 ByteCommand(Expr e, unsigned size, std::string commandString) 212 : BaseCommand(ByteKind), commandString(commandString), expression(e), 213 size(size) {} 214 215 static bool classof(const BaseCommand *c) { return c->kind == ByteKind; } 216 217 // Keeps string representing the command. Used for -Map" is perhaps better. 218 std::string commandString; 219 220 Expr expression; 221 222 // This is just an offset of this assignment command in the output section. 223 unsigned offset; 224 225 // Size of this data command. 226 unsigned size; 227 }; 228 229 struct InsertCommand { 230 std::vector<StringRef> names; 231 bool isAfter; 232 StringRef where; 233 }; 234 235 struct PhdrsCommand { 236 StringRef name; 237 unsigned type = llvm::ELF::PT_NULL; 238 bool hasFilehdr = false; 239 bool hasPhdrs = false; 240 llvm::Optional<unsigned> flags; 241 Expr lmaExpr = nullptr; 242 }; 243 244 class LinkerScript final { 245 // Temporary state used in processSectionCommands() and assignAddresses() 246 // that must be reinitialized for each call to the above functions, and must 247 // not be used outside of the scope of a call to the above functions. 248 struct AddressState { 249 AddressState(); 250 OutputSection *outSec = nullptr; 251 MemoryRegion *memRegion = nullptr; 252 MemoryRegion *lmaRegion = nullptr; 253 uint64_t lmaOffset = 0; 254 uint64_t tbssAddr = 0; 255 }; 256 257 llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection; 258 259 void addSymbol(SymbolAssignment *cmd); 260 void assignSymbol(SymbolAssignment *cmd, bool inSec); 261 void setDot(Expr e, const Twine &loc, bool inSec); 262 void expandOutputSection(uint64_t size); 263 void expandMemoryRegions(uint64_t size); 264 265 std::vector<InputSectionBase *> 266 computeInputSections(const InputSectionDescription *, 267 ArrayRef<InputSectionBase *>); 268 269 std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd); 270 271 void discardSynthetic(OutputSection &); 272 273 std::vector<size_t> getPhdrIndices(OutputSection *sec); 274 275 std::pair<MemoryRegion *, MemoryRegion *> 276 findMemoryRegion(OutputSection *sec, MemoryRegion *hint); 277 278 void switchTo(OutputSection *sec); 279 uint64_t advance(uint64_t size, unsigned align); 280 void output(InputSection *sec); 281 282 void assignOffsets(OutputSection *sec); 283 284 // Ctx captures the local AddressState and makes it accessible 285 // deliberately. This is needed as there are some cases where we cannot just 286 // thread the current state through to a lambda function created by the 287 // script parser. 288 // This should remain a plain pointer as its lifetime is smaller than 289 // LinkerScript. 290 AddressState *ctx = nullptr; 291 292 OutputSection *aether; 293 294 uint64_t dot; 295 296 public: 297 OutputSection *createOutputSection(StringRef name, StringRef location); 298 OutputSection *getOrCreateOutputSection(StringRef name); 299 300 bool hasPhdrsCommands() { return !phdrsCommands.empty(); } 301 uint64_t getDot() { return dot; } 302 void discard(InputSectionBase *s); 303 304 ExprValue getSymbolValue(StringRef name, const Twine &loc); 305 306 void addOrphanSections(); 307 void diagnoseOrphanHandling() const; 308 void adjustSectionsBeforeSorting(); 309 void adjustSectionsAfterSorting(); 310 311 std::vector<PhdrEntry *> createPhdrs(); 312 bool needsInterpSection(); 313 314 bool shouldKeep(InputSectionBase *s); 315 const Defined *assignAddresses(); 316 void allocateHeaders(std::vector<PhdrEntry *> &phdrs); 317 void processSectionCommands(); 318 void processSymbolAssignments(); 319 void declareSymbols(); 320 321 bool isDiscarded(const OutputSection *sec) const; 322 323 // Used to handle INSERT AFTER statements. 324 void processInsertCommands(); 325 326 // SECTIONS command list. 327 std::vector<BaseCommand *> sectionCommands; 328 329 // PHDRS command list. 330 std::vector<PhdrsCommand> phdrsCommands; 331 332 bool hasSectionsCommand = false; 333 bool errorOnMissingSection = false; 334 335 // List of section patterns specified with KEEP commands. They will 336 // be kept even if they are unused and --gc-sections is specified. 337 std::vector<InputSectionDescription *> keptSections; 338 339 // A map from memory region name to a memory region descriptor. 340 llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions; 341 342 // A list of symbols referenced by the script. 343 std::vector<llvm::StringRef> referencedSymbols; 344 345 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need 346 // to be reordered. 347 std::vector<InsertCommand> insertCommands; 348 349 // OutputSections specified by OVERWRITE_SECTIONS. 350 std::vector<OutputSection *> overwriteSections; 351 352 // Sections that will be warned/errored by --orphan-handling. 353 std::vector<const InputSectionBase *> orphanSections; 354 }; 355 356 extern LinkerScript *script; 357 358 } // end namespace elf 359 } // end namespace lld 360 361 #endif // LLD_ELF_LINKER_SCRIPT_H 362