xref: /freebsd/contrib/llvm-project/lld/ELF/LinkerScript.h (revision 6132212808e8dccedc9e5d85fea4390c2f38059a)
1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <vector>
27 
28 namespace lld {
29 namespace elf {
30 
31 class Defined;
32 class InputSection;
33 class InputSectionBase;
34 class OutputSection;
35 class SectionBase;
36 class Symbol;
37 class ThunkSection;
38 
39 // This represents an r-value in the linker script.
40 struct ExprValue {
41   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42             const Twine &loc)
43       : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44 
45   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46 
47   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48   uint64_t getValue() const;
49   uint64_t getSecAddr() const;
50   uint64_t getSectionOffset() const;
51 
52   // If a value is relative to a section, it has a non-null Sec.
53   SectionBase *sec;
54 
55   // True if this expression is enclosed in ABSOLUTE().
56   // This flag affects the return value of getValue().
57   bool forceAbsolute;
58 
59   uint64_t val;
60   uint64_t alignment = 1;
61 
62   // Original source location. Used for error messages.
63   std::string loc;
64 };
65 
66 // This represents an expression in the linker script.
67 // ScriptParser::readExpr reads an expression and returns an Expr.
68 // Later, we evaluate the expression by calling the function.
69 using Expr = std::function<ExprValue()>;
70 
71 // This enum is used to implement linker script SECTIONS command.
72 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
73 enum SectionsCommandKind {
74   AssignmentKind, // . = expr or <sym> = expr
75   OutputSectionKind,
76   InputSectionKind,
77   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
78 };
79 
80 struct BaseCommand {
81   BaseCommand(int k) : kind(k) {}
82   int kind;
83 };
84 
85 // This represents ". = <expr>" or "<symbol> = <expr>".
86 struct SymbolAssignment : BaseCommand {
87   SymbolAssignment(StringRef name, Expr e, std::string loc)
88       : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
89 
90   static bool classof(const BaseCommand *c) {
91     return c->kind == AssignmentKind;
92   }
93 
94   // The LHS of an expression. Name is either a symbol name or ".".
95   StringRef name;
96   Defined *sym = nullptr;
97 
98   // The RHS of an expression.
99   Expr expression;
100 
101   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
102   bool provide = false;
103   bool hidden = false;
104 
105   // Holds file name and line number for error reporting.
106   std::string location;
107 
108   // A string representation of this command. We use this for -Map.
109   std::string commandString;
110 
111   // Address of this assignment command.
112   uint64_t addr;
113 
114   // Size of this assignment command. This is usually 0, but if
115   // you move '.' this may be greater than 0.
116   uint64_t size;
117 };
118 
119 // Linker scripts allow additional constraints to be put on output sections.
120 // If an output section is marked as ONLY_IF_RO, the section is created
121 // only if its input sections are read-only. Likewise, an output section
122 // with ONLY_IF_RW is created if all input sections are RW.
123 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
124 
125 // This struct is used to represent the location and size of regions of
126 // target memory. Instances of the struct are created by parsing the
127 // MEMORY command.
128 struct MemoryRegion {
129   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
130                uint32_t negFlags)
131       : name(std::string(name)), origin(origin), length(length), flags(flags),
132         negFlags(negFlags) {}
133 
134   std::string name;
135   Expr origin;
136   Expr length;
137   uint32_t flags;
138   uint32_t negFlags;
139   uint64_t curPos = 0;
140 };
141 
142 // This struct represents one section match pattern in SECTIONS() command.
143 // It can optionally have negative match pattern for EXCLUDED_FILE command.
144 // Also it may be surrounded with SORT() command, so contains sorting rules.
145 struct SectionPattern {
146   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
147       : excludedFilePat(pat1), sectionPat(pat2),
148         sortOuter(SortSectionPolicy::Default),
149         sortInner(SortSectionPolicy::Default) {}
150 
151   StringMatcher excludedFilePat;
152   StringMatcher sectionPat;
153   SortSectionPolicy sortOuter;
154   SortSectionPolicy sortInner;
155 };
156 
157 struct InputSectionDescription : BaseCommand {
158   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
159                           uint64_t withoutFlags = 0)
160       : BaseCommand(InputSectionKind), filePat(filePattern),
161         withFlags(withFlags), withoutFlags(withoutFlags) {}
162 
163   static bool classof(const BaseCommand *c) {
164     return c->kind == InputSectionKind;
165   }
166 
167   SingleStringMatcher filePat;
168 
169   // Input sections that matches at least one of SectionPatterns
170   // will be associated with this InputSectionDescription.
171   std::vector<SectionPattern> sectionPatterns;
172 
173   // Includes InputSections and MergeInputSections. Used temporarily during
174   // assignment of input sections to output sections.
175   std::vector<InputSectionBase *> sectionBases;
176 
177   // Used after the finalizeInputSections() pass. MergeInputSections have been
178   // merged into MergeSyntheticSections.
179   std::vector<InputSection *> sections;
180 
181   // Temporary record of synthetic ThunkSection instances and the pass that
182   // they were created in. This is used to insert newly created ThunkSections
183   // into Sections at the end of a createThunks() pass.
184   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
185 
186   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
187   uint64_t withFlags;
188   uint64_t withoutFlags;
189 };
190 
191 // Represents BYTE(), SHORT(), LONG(), or QUAD().
192 struct ByteCommand : BaseCommand {
193   ByteCommand(Expr e, unsigned size, std::string commandString)
194       : BaseCommand(ByteKind), commandString(commandString), expression(e),
195         size(size) {}
196 
197   static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
198 
199   // Keeps string representing the command. Used for -Map" is perhaps better.
200   std::string commandString;
201 
202   Expr expression;
203 
204   // This is just an offset of this assignment command in the output section.
205   unsigned offset;
206 
207   // Size of this data command.
208   unsigned size;
209 };
210 
211 struct InsertCommand {
212   OutputSection *os;
213   bool isAfter;
214   StringRef where;
215 };
216 
217 struct PhdrsCommand {
218   StringRef name;
219   unsigned type = llvm::ELF::PT_NULL;
220   bool hasFilehdr = false;
221   bool hasPhdrs = false;
222   llvm::Optional<unsigned> flags;
223   Expr lmaExpr = nullptr;
224 };
225 
226 class LinkerScript final {
227   // Temporary state used in processSectionCommands() and assignAddresses()
228   // that must be reinitialized for each call to the above functions, and must
229   // not be used outside of the scope of a call to the above functions.
230   struct AddressState {
231     AddressState();
232     uint64_t threadBssOffset = 0;
233     OutputSection *outSec = nullptr;
234     MemoryRegion *memRegion = nullptr;
235     MemoryRegion *lmaRegion = nullptr;
236     uint64_t lmaOffset = 0;
237   };
238 
239   llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
240 
241   void addSymbol(SymbolAssignment *cmd);
242   void assignSymbol(SymbolAssignment *cmd, bool inSec);
243   void setDot(Expr e, const Twine &loc, bool inSec);
244   void expandOutputSection(uint64_t size);
245   void expandMemoryRegions(uint64_t size);
246 
247   std::vector<InputSectionBase *>
248   computeInputSections(const InputSectionDescription *,
249                        ArrayRef<InputSectionBase *>);
250 
251   std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
252 
253   void discardSynthetic(OutputSection &);
254 
255   std::vector<size_t> getPhdrIndices(OutputSection *sec);
256 
257   MemoryRegion *findMemoryRegion(OutputSection *sec);
258 
259   void switchTo(OutputSection *sec);
260   uint64_t advance(uint64_t size, unsigned align);
261   void output(InputSection *sec);
262 
263   void assignOffsets(OutputSection *sec);
264 
265   // Ctx captures the local AddressState and makes it accessible
266   // deliberately. This is needed as there are some cases where we cannot just
267   // thread the current state through to a lambda function created by the
268   // script parser.
269   // This should remain a plain pointer as its lifetime is smaller than
270   // LinkerScript.
271   AddressState *ctx = nullptr;
272 
273   OutputSection *aether;
274 
275   uint64_t dot;
276 
277 public:
278   OutputSection *createOutputSection(StringRef name, StringRef location);
279   OutputSection *getOrCreateOutputSection(StringRef name);
280 
281   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
282   uint64_t getDot() { return dot; }
283   void discard(InputSectionBase *s);
284 
285   ExprValue getSymbolValue(StringRef name, const Twine &loc);
286 
287   void addOrphanSections();
288   void diagnoseOrphanHandling() const;
289   void adjustSectionsBeforeSorting();
290   void adjustSectionsAfterSorting();
291 
292   std::vector<PhdrEntry *> createPhdrs();
293   bool needsInterpSection();
294 
295   bool shouldKeep(InputSectionBase *s);
296   const Defined *assignAddresses();
297   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
298   void processSectionCommands();
299   void processSymbolAssignments();
300   void declareSymbols();
301 
302   // Used to handle INSERT AFTER statements.
303   void processInsertCommands();
304 
305   // SECTIONS command list.
306   std::vector<BaseCommand *> sectionCommands;
307 
308   // PHDRS command list.
309   std::vector<PhdrsCommand> phdrsCommands;
310 
311   bool hasSectionsCommand = false;
312   bool errorOnMissingSection = false;
313 
314   // List of section patterns specified with KEEP commands. They will
315   // be kept even if they are unused and --gc-sections is specified.
316   std::vector<InputSectionDescription *> keptSections;
317 
318   // A map from memory region name to a memory region descriptor.
319   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
320 
321   // A list of symbols referenced by the script.
322   std::vector<llvm::StringRef> referencedSymbols;
323 
324   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
325   // to be reordered.
326   std::vector<InsertCommand> insertCommands;
327 
328   // Sections that will be warned/errored by --orphan-handling.
329   std::vector<const InputSectionBase *> orphanSections;
330 };
331 
332 extern LinkerScript *script;
333 
334 } // end namespace elf
335 } // end namespace lld
336 
337 #endif // LLD_ELF_LINKER_SCRIPT_H
338