xref: /freebsd/contrib/llvm-project/lld/ELF/LinkerScript.h (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "InputSection.h"
14 #include "Writer.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Compiler.h"
23 #include <cstddef>
24 #include <cstdint>
25 #include <functional>
26 #include <memory>
27 
28 namespace lld::elf {
29 
30 class Defined;
31 class InputFile;
32 class InputSection;
33 class InputSectionBase;
34 class OutputSection;
35 class SectionBase;
36 class ThunkSection;
37 struct OutputDesc;
38 
39 // This represents an r-value in the linker script.
40 struct ExprValue {
41   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42             const Twine &loc)
43       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
44 
45   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46 
47   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48   uint64_t getValue() const;
49   uint64_t getSecAddr() const;
50   uint64_t getSectionOffset() const;
51 
52   // If a value is relative to a section, it has a non-null Sec.
53   SectionBase *sec;
54 
55   uint64_t val;
56   uint64_t alignment = 1;
57 
58   // The original st_type if the expression represents a symbol. Any operation
59   // resets type to STT_NOTYPE.
60   uint8_t type = llvm::ELF::STT_NOTYPE;
61 
62   // True if this expression is enclosed in ABSOLUTE().
63   // This flag affects the return value of getValue().
64   bool forceAbsolute;
65 
66   // Original source location. Used for error messages.
67   std::string loc;
68 };
69 
70 // This represents an expression in the linker script.
71 // ScriptParser::readExpr reads an expression and returns an Expr.
72 // Later, we evaluate the expression by calling the function.
73 using Expr = std::function<ExprValue()>;
74 
75 // This enum is used to implement linker script SECTIONS command.
76 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
77 enum SectionsCommandKind {
78   AssignmentKind, // . = expr or <sym> = expr
79   OutputSectionKind,
80   InputSectionKind,
81   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
82 };
83 
84 struct SectionCommand {
85   SectionCommand(int k) : kind(k) {}
86   int kind;
87 };
88 
89 // This represents ". = <expr>" or "<symbol> = <expr>".
90 struct SymbolAssignment : SectionCommand {
91   SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
92       : SectionCommand(AssignmentKind), name(name), expression(e),
93         symOrder(symOrder), location(loc) {}
94 
95   static bool classof(const SectionCommand *c) {
96     return c->kind == AssignmentKind;
97   }
98 
99   // The LHS of an expression. Name is either a symbol name or ".".
100   StringRef name;
101   Defined *sym = nullptr;
102 
103   // The RHS of an expression.
104   Expr expression;
105 
106   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
107   bool provide = false;
108   bool hidden = false;
109 
110   // This assignment references DATA_SEGMENT_RELRO_END.
111   bool dataSegmentRelroEnd = false;
112 
113   unsigned symOrder;
114 
115   // Holds file name and line number for error reporting.
116   std::string location;
117 
118   // A string representation of this command. We use this for -Map.
119   std::string commandString;
120 
121   // Address of this assignment command.
122   uint64_t addr;
123 
124   // Size of this assignment command. This is usually 0, but if
125   // you move '.' this may be greater than 0.
126   uint64_t size;
127 };
128 
129 // Linker scripts allow additional constraints to be put on output sections.
130 // If an output section is marked as ONLY_IF_RO, the section is created
131 // only if its input sections are read-only. Likewise, an output section
132 // with ONLY_IF_RW is created if all input sections are RW.
133 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
134 
135 // This struct is used to represent the location and size of regions of
136 // target memory. Instances of the struct are created by parsing the
137 // MEMORY command.
138 struct MemoryRegion {
139   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
140                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
141       : name(std::string(name)), origin(origin), length(length), flags(flags),
142         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
143 
144   std::string name;
145   Expr origin;
146   Expr length;
147   // A section can be assigned to the region if any of these ELF section flags
148   // are set...
149   uint32_t flags;
150   // ... or any of these flags are not set.
151   // For example, the memory region attribute "r" maps to SHF_WRITE.
152   uint32_t invFlags;
153   // A section cannot be assigned to the region if any of these ELF section
154   // flags are set...
155   uint32_t negFlags;
156   // ... or any of these flags are not set.
157   // For example, the memory region attribute "!r" maps to SHF_WRITE.
158   uint32_t negInvFlags;
159   uint64_t curPos = 0;
160 
161   uint64_t getOrigin() const { return origin().getValue(); }
162   uint64_t getLength() const { return length().getValue(); }
163 
164   bool compatibleWith(uint32_t secFlags) const {
165     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
166       return false;
167     return (secFlags & flags) || (~secFlags & invFlags);
168   }
169 };
170 
171 // This struct represents one section match pattern in SECTIONS() command.
172 // It can optionally have negative match pattern for EXCLUDED_FILE command.
173 // Also it may be surrounded with SORT() command, so contains sorting rules.
174 class SectionPattern {
175   StringMatcher excludedFilePat;
176 
177   // Cache of the most recent input argument and result of excludesFile().
178   mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
179 
180 public:
181   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
182       : excludedFilePat(pat1), sectionPat(pat2),
183         sortOuter(SortSectionPolicy::Default),
184         sortInner(SortSectionPolicy::Default) {}
185 
186   bool excludesFile(const InputFile *file) const;
187 
188   StringMatcher sectionPat;
189   SortSectionPolicy sortOuter;
190   SortSectionPolicy sortInner;
191 };
192 
193 class InputSectionDescription : public SectionCommand {
194   SingleStringMatcher filePat;
195 
196   // Cache of the most recent input argument and result of matchesFile().
197   mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
198 
199 public:
200   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
201                           uint64_t withoutFlags = 0)
202       : SectionCommand(InputSectionKind), filePat(filePattern),
203         withFlags(withFlags), withoutFlags(withoutFlags) {}
204 
205   static bool classof(const SectionCommand *c) {
206     return c->kind == InputSectionKind;
207   }
208 
209   bool matchesFile(const InputFile *file) const;
210 
211   // Input sections that matches at least one of SectionPatterns
212   // will be associated with this InputSectionDescription.
213   SmallVector<SectionPattern, 0> sectionPatterns;
214 
215   // Includes InputSections and MergeInputSections. Used temporarily during
216   // assignment of input sections to output sections.
217   SmallVector<InputSectionBase *, 0> sectionBases;
218 
219   // Used after the finalizeInputSections() pass. MergeInputSections have been
220   // merged into MergeSyntheticSections.
221   SmallVector<InputSection *, 0> sections;
222 
223   // Temporary record of synthetic ThunkSection instances and the pass that
224   // they were created in. This is used to insert newly created ThunkSections
225   // into Sections at the end of a createThunks() pass.
226   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
227 
228   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
229   uint64_t withFlags;
230   uint64_t withoutFlags;
231 };
232 
233 // Represents BYTE(), SHORT(), LONG(), or QUAD().
234 struct ByteCommand : SectionCommand {
235   ByteCommand(Expr e, unsigned size, std::string commandString)
236       : SectionCommand(ByteKind), commandString(commandString), expression(e),
237         size(size) {}
238 
239   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
240 
241   // Keeps string representing the command. Used for -Map" is perhaps better.
242   std::string commandString;
243 
244   Expr expression;
245 
246   // This is just an offset of this assignment command in the output section.
247   unsigned offset;
248 
249   // Size of this data command.
250   unsigned size;
251 };
252 
253 struct InsertCommand {
254   SmallVector<StringRef, 0> names;
255   bool isAfter;
256   StringRef where;
257 };
258 
259 // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
260 // certain output sections.
261 struct NoCrossRefCommand {
262   SmallVector<StringRef, 0> outputSections;
263 
264   // When true, this describes a NOCROSSREFS_TO command that probits references
265   // to the first output section from any of the other sections.
266   bool toFirst = false;
267 };
268 
269 struct PhdrsCommand {
270   StringRef name;
271   unsigned type = llvm::ELF::PT_NULL;
272   bool hasFilehdr = false;
273   bool hasPhdrs = false;
274   std::optional<unsigned> flags;
275   Expr lmaExpr = nullptr;
276 };
277 
278 class LinkerScript final {
279   // Temporary state used in processSectionCommands() and assignAddresses()
280   // that must be reinitialized for each call to the above functions, and must
281   // not be used outside of the scope of a call to the above functions.
282   struct AddressState {
283     AddressState();
284     OutputSection *outSec = nullptr;
285     MemoryRegion *memRegion = nullptr;
286     MemoryRegion *lmaRegion = nullptr;
287     uint64_t lmaOffset = 0;
288     uint64_t tbssAddr = 0;
289   };
290 
291   llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
292 
293   void addSymbol(SymbolAssignment *cmd);
294   void assignSymbol(SymbolAssignment *cmd, bool inSec);
295   void setDot(Expr e, const Twine &loc, bool inSec);
296   void expandOutputSection(uint64_t size);
297   void expandMemoryRegions(uint64_t size);
298 
299   SmallVector<InputSectionBase *, 0>
300   computeInputSections(const InputSectionDescription *,
301                        ArrayRef<InputSectionBase *>,
302                        const OutputSection &outCmd);
303 
304   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
305 
306   void discardSynthetic(OutputSection &);
307 
308   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
309 
310   std::pair<MemoryRegion *, MemoryRegion *>
311   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
312 
313   bool assignOffsets(OutputSection *sec);
314 
315   // This captures the local AddressState and makes it accessible
316   // deliberately. This is needed as there are some cases where we cannot just
317   // thread the current state through to a lambda function created by the
318   // script parser.
319   // This should remain a plain pointer as its lifetime is smaller than
320   // LinkerScript.
321   AddressState *state = nullptr;
322 
323   OutputSection *aether;
324 
325   uint64_t dot;
326 
327 public:
328   OutputDesc *createOutputSection(StringRef name, StringRef location);
329   OutputDesc *getOrCreateOutputSection(StringRef name);
330 
331   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
332   uint64_t getDot() { return dot; }
333   void discard(InputSectionBase &s);
334 
335   ExprValue getSymbolValue(StringRef name, const Twine &loc);
336 
337   void addOrphanSections();
338   void diagnoseOrphanHandling() const;
339   void diagnoseMissingSGSectionAddress() const;
340   void adjustOutputSections();
341   void adjustSectionsAfterSorting();
342 
343   SmallVector<PhdrEntry *, 0> createPhdrs();
344   bool needsInterpSection();
345 
346   bool shouldKeep(InputSectionBase *s);
347   std::pair<const OutputSection *, const Defined *> assignAddresses();
348   bool spillSections();
349   void erasePotentialSpillSections();
350   void allocateHeaders(SmallVector<PhdrEntry *, 0> &phdrs);
351   void processSectionCommands();
352   void processSymbolAssignments();
353   void declareSymbols();
354 
355   // Used to handle INSERT AFTER statements.
356   void processInsertCommands();
357 
358   // Describe memory region usage.
359   void printMemoryUsage(raw_ostream &os);
360 
361   // Record a pending error during an assignAddresses invocation.
362   // assignAddresses is executed more than once. Therefore, lld::error should be
363   // avoided to not report duplicate errors.
364   void recordError(const Twine &msg);
365 
366   // Check backward location counter assignment and memory region/LMA overflows.
367   void checkFinalScriptConditions() const;
368 
369   // Add symbols that are referenced in the linker script to the symbol table.
370   // Symbols referenced in a PROVIDE command are only added to the symbol table
371   // if the PROVIDE command actually provides the symbol.
372   // It also adds the symbols referenced by the used PROVIDE symbols to the
373   // linker script referenced symbols list.
374   void addScriptReferencedSymbolsToSymTable();
375 
376   // Returns true if the PROVIDE symbol should be added to the link.
377   // A PROVIDE symbol is added to the link only if it satisfies an
378   // undefined reference.
379   static bool shouldAddProvideSym(StringRef symName);
380 
381   // SECTIONS command list.
382   SmallVector<SectionCommand *, 0> sectionCommands;
383 
384   // PHDRS command list.
385   SmallVector<PhdrsCommand, 0> phdrsCommands;
386 
387   bool hasSectionsCommand = false;
388   bool seenDataAlign = false;
389   bool seenRelroEnd = false;
390   bool errorOnMissingSection = false;
391   SmallVector<SmallString<0>, 0> recordedErrors;
392 
393   // List of section patterns specified with KEEP commands. They will
394   // be kept even if they are unused and --gc-sections is specified.
395   SmallVector<InputSectionDescription *, 0> keptSections;
396 
397   // A map from memory region name to a memory region descriptor.
398   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
399 
400   // A list of symbols referenced by the script.
401   SmallVector<llvm::StringRef, 0> referencedSymbols;
402 
403   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
404   // to be reordered.
405   SmallVector<InsertCommand, 0> insertCommands;
406 
407   // OutputSections specified by OVERWRITE_SECTIONS.
408   SmallVector<OutputDesc *, 0> overwriteSections;
409 
410   // NOCROSSREFS(_TO) commands.
411   SmallVector<NoCrossRefCommand, 0> noCrossRefs;
412 
413   // Sections that will be warned/errored by --orphan-handling.
414   SmallVector<const InputSectionBase *, 0> orphanSections;
415 
416   // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
417   // expression. For example, if the PROVIDE command is:
418   //
419   // PROVIDE(v = a + b + c);
420   //
421   // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
422   llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
423 
424   // List of potential spill locations (PotentialSpillSection) for an input
425   // section.
426   struct PotentialSpillList {
427     // Never nullptr.
428     PotentialSpillSection *head;
429     PotentialSpillSection *tail;
430   };
431   llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
432 };
433 
434 struct ScriptWrapper {
435   LinkerScript s;
436   LinkerScript *operator->() { return &s; }
437 };
438 
439 LLVM_LIBRARY_VISIBILITY extern ScriptWrapper script;
440 
441 } // end namespace lld::elf
442 
443 #endif // LLD_ELF_LINKER_SCRIPT_H
444