xref: /freebsd/contrib/llvm-project/lld/ELF/LinkerScript.h (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "Writer.h"
14 #include "lld/Common/LLVM.h"
15 #include "lld/Common/Strings.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include <cstddef>
23 #include <cstdint>
24 #include <functional>
25 #include <memory>
26 #include <vector>
27 
28 namespace lld {
29 namespace elf {
30 
31 class Defined;
32 class InputSection;
33 class InputSectionBase;
34 class OutputSection;
35 class SectionBase;
36 class Symbol;
37 class ThunkSection;
38 
39 // This represents an r-value in the linker script.
40 struct ExprValue {
41   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42             const Twine &loc)
43       : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44 
45   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46 
47   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48   uint64_t getValue() const;
49   uint64_t getSecAddr() const;
50   uint64_t getSectionOffset() const;
51 
52   // If a value is relative to a section, it has a non-null Sec.
53   SectionBase *sec;
54 
55   // True if this expression is enclosed in ABSOLUTE().
56   // This flag affects the return value of getValue().
57   bool forceAbsolute;
58 
59   uint64_t val;
60   uint64_t alignment = 1;
61 
62   // The original st_type if the expression represents a symbol. Any operation
63   // resets type to STT_NOTYPE.
64   uint8_t type = llvm::ELF::STT_NOTYPE;
65 
66   // Original source location. Used for error messages.
67   std::string loc;
68 };
69 
70 // This represents an expression in the linker script.
71 // ScriptParser::readExpr reads an expression and returns an Expr.
72 // Later, we evaluate the expression by calling the function.
73 using Expr = std::function<ExprValue()>;
74 
75 // This enum is used to implement linker script SECTIONS command.
76 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
77 enum SectionsCommandKind {
78   AssignmentKind, // . = expr or <sym> = expr
79   OutputSectionKind,
80   InputSectionKind,
81   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
82 };
83 
84 struct BaseCommand {
85   BaseCommand(int k) : kind(k) {}
86   int kind;
87 };
88 
89 // This represents ". = <expr>" or "<symbol> = <expr>".
90 struct SymbolAssignment : BaseCommand {
91   SymbolAssignment(StringRef name, Expr e, std::string loc)
92       : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
93 
94   static bool classof(const BaseCommand *c) {
95     return c->kind == AssignmentKind;
96   }
97 
98   // The LHS of an expression. Name is either a symbol name or ".".
99   StringRef name;
100   Defined *sym = nullptr;
101 
102   // The RHS of an expression.
103   Expr expression;
104 
105   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
106   bool provide = false;
107   bool hidden = false;
108 
109   // Holds file name and line number for error reporting.
110   std::string location;
111 
112   // A string representation of this command. We use this for -Map.
113   std::string commandString;
114 
115   // Address of this assignment command.
116   uint64_t addr;
117 
118   // Size of this assignment command. This is usually 0, but if
119   // you move '.' this may be greater than 0.
120   uint64_t size;
121 };
122 
123 // Linker scripts allow additional constraints to be put on output sections.
124 // If an output section is marked as ONLY_IF_RO, the section is created
125 // only if its input sections are read-only. Likewise, an output section
126 // with ONLY_IF_RW is created if all input sections are RW.
127 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
128 
129 // This struct is used to represent the location and size of regions of
130 // target memory. Instances of the struct are created by parsing the
131 // MEMORY command.
132 struct MemoryRegion {
133   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
134                uint32_t negFlags)
135       : name(std::string(name)), origin(origin), length(length), flags(flags),
136         negFlags(negFlags) {}
137 
138   std::string name;
139   Expr origin;
140   Expr length;
141   uint32_t flags;
142   uint32_t negFlags;
143   uint64_t curPos = 0;
144 };
145 
146 // This struct represents one section match pattern in SECTIONS() command.
147 // It can optionally have negative match pattern for EXCLUDED_FILE command.
148 // Also it may be surrounded with SORT() command, so contains sorting rules.
149 struct SectionPattern {
150   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
151       : excludedFilePat(pat1), sectionPat(pat2),
152         sortOuter(SortSectionPolicy::Default),
153         sortInner(SortSectionPolicy::Default) {}
154 
155   StringMatcher excludedFilePat;
156   StringMatcher sectionPat;
157   SortSectionPolicy sortOuter;
158   SortSectionPolicy sortInner;
159 };
160 
161 struct InputSectionDescription : BaseCommand {
162   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
163                           uint64_t withoutFlags = 0)
164       : BaseCommand(InputSectionKind), filePat(filePattern),
165         withFlags(withFlags), withoutFlags(withoutFlags) {}
166 
167   static bool classof(const BaseCommand *c) {
168     return c->kind == InputSectionKind;
169   }
170 
171   SingleStringMatcher filePat;
172 
173   // Input sections that matches at least one of SectionPatterns
174   // will be associated with this InputSectionDescription.
175   std::vector<SectionPattern> sectionPatterns;
176 
177   // Includes InputSections and MergeInputSections. Used temporarily during
178   // assignment of input sections to output sections.
179   std::vector<InputSectionBase *> sectionBases;
180 
181   // Used after the finalizeInputSections() pass. MergeInputSections have been
182   // merged into MergeSyntheticSections.
183   std::vector<InputSection *> sections;
184 
185   // Temporary record of synthetic ThunkSection instances and the pass that
186   // they were created in. This is used to insert newly created ThunkSections
187   // into Sections at the end of a createThunks() pass.
188   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
189 
190   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
191   uint64_t withFlags;
192   uint64_t withoutFlags;
193 };
194 
195 // Represents BYTE(), SHORT(), LONG(), or QUAD().
196 struct ByteCommand : BaseCommand {
197   ByteCommand(Expr e, unsigned size, std::string commandString)
198       : BaseCommand(ByteKind), commandString(commandString), expression(e),
199         size(size) {}
200 
201   static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
202 
203   // Keeps string representing the command. Used for -Map" is perhaps better.
204   std::string commandString;
205 
206   Expr expression;
207 
208   // This is just an offset of this assignment command in the output section.
209   unsigned offset;
210 
211   // Size of this data command.
212   unsigned size;
213 };
214 
215 struct InsertCommand {
216   OutputSection *os;
217   bool isAfter;
218   StringRef where;
219 };
220 
221 struct PhdrsCommand {
222   StringRef name;
223   unsigned type = llvm::ELF::PT_NULL;
224   bool hasFilehdr = false;
225   bool hasPhdrs = false;
226   llvm::Optional<unsigned> flags;
227   Expr lmaExpr = nullptr;
228 };
229 
230 class LinkerScript final {
231   // Temporary state used in processSectionCommands() and assignAddresses()
232   // that must be reinitialized for each call to the above functions, and must
233   // not be used outside of the scope of a call to the above functions.
234   struct AddressState {
235     AddressState();
236     uint64_t threadBssOffset = 0;
237     OutputSection *outSec = nullptr;
238     MemoryRegion *memRegion = nullptr;
239     MemoryRegion *lmaRegion = nullptr;
240     uint64_t lmaOffset = 0;
241   };
242 
243   llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
244 
245   void addSymbol(SymbolAssignment *cmd);
246   void assignSymbol(SymbolAssignment *cmd, bool inSec);
247   void setDot(Expr e, const Twine &loc, bool inSec);
248   void expandOutputSection(uint64_t size);
249   void expandMemoryRegions(uint64_t size);
250 
251   std::vector<InputSectionBase *>
252   computeInputSections(const InputSectionDescription *,
253                        ArrayRef<InputSectionBase *>);
254 
255   std::vector<InputSectionBase *> createInputSectionList(OutputSection &cmd);
256 
257   void discardSynthetic(OutputSection &);
258 
259   std::vector<size_t> getPhdrIndices(OutputSection *sec);
260 
261   MemoryRegion *findMemoryRegion(OutputSection *sec);
262 
263   void switchTo(OutputSection *sec);
264   uint64_t advance(uint64_t size, unsigned align);
265   void output(InputSection *sec);
266 
267   void assignOffsets(OutputSection *sec);
268 
269   // Ctx captures the local AddressState and makes it accessible
270   // deliberately. This is needed as there are some cases where we cannot just
271   // thread the current state through to a lambda function created by the
272   // script parser.
273   // This should remain a plain pointer as its lifetime is smaller than
274   // LinkerScript.
275   AddressState *ctx = nullptr;
276 
277   OutputSection *aether;
278 
279   uint64_t dot;
280 
281 public:
282   OutputSection *createOutputSection(StringRef name, StringRef location);
283   OutputSection *getOrCreateOutputSection(StringRef name);
284 
285   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
286   uint64_t getDot() { return dot; }
287   void discard(InputSectionBase *s);
288 
289   ExprValue getSymbolValue(StringRef name, const Twine &loc);
290 
291   void addOrphanSections();
292   void diagnoseOrphanHandling() const;
293   void adjustSectionsBeforeSorting();
294   void adjustSectionsAfterSorting();
295 
296   std::vector<PhdrEntry *> createPhdrs();
297   bool needsInterpSection();
298 
299   bool shouldKeep(InputSectionBase *s);
300   const Defined *assignAddresses();
301   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
302   void processSectionCommands();
303   void processSymbolAssignments();
304   void declareSymbols();
305 
306   // Used to handle INSERT AFTER statements.
307   void processInsertCommands();
308 
309   // SECTIONS command list.
310   std::vector<BaseCommand *> sectionCommands;
311 
312   // PHDRS command list.
313   std::vector<PhdrsCommand> phdrsCommands;
314 
315   bool hasSectionsCommand = false;
316   bool errorOnMissingSection = false;
317 
318   // List of section patterns specified with KEEP commands. They will
319   // be kept even if they are unused and --gc-sections is specified.
320   std::vector<InputSectionDescription *> keptSections;
321 
322   // A map from memory region name to a memory region descriptor.
323   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
324 
325   // A list of symbols referenced by the script.
326   std::vector<llvm::StringRef> referencedSymbols;
327 
328   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
329   // to be reordered.
330   std::vector<InsertCommand> insertCommands;
331 
332   // Sections that will be warned/errored by --orphan-handling.
333   std::vector<const InputSectionBase *> orphanSections;
334 };
335 
336 extern LinkerScript *script;
337 
338 } // end namespace elf
339 } // end namespace lld
340 
341 #endif // LLD_ELF_LINKER_SCRIPT_H
342