xref: /freebsd/contrib/llvm-project/lld/ELF/LinkerScript.h (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric 
9*0b57cec5SDimitry Andric #ifndef LLD_ELF_LINKER_SCRIPT_H
10*0b57cec5SDimitry Andric #define LLD_ELF_LINKER_SCRIPT_H
11*0b57cec5SDimitry Andric 
12*0b57cec5SDimitry Andric #include "Config.h"
13*0b57cec5SDimitry Andric #include "Writer.h"
14*0b57cec5SDimitry Andric #include "lld/Common/LLVM.h"
15*0b57cec5SDimitry Andric #include "lld/Common/Strings.h"
16*0b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
17*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
18*0b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
19*0b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h"
20*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
21*0b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
22*0b57cec5SDimitry Andric #include <cstddef>
23*0b57cec5SDimitry Andric #include <cstdint>
24*0b57cec5SDimitry Andric #include <functional>
25*0b57cec5SDimitry Andric #include <memory>
26*0b57cec5SDimitry Andric #include <vector>
27*0b57cec5SDimitry Andric 
28*0b57cec5SDimitry Andric namespace lld {
29*0b57cec5SDimitry Andric namespace elf {
30*0b57cec5SDimitry Andric 
31*0b57cec5SDimitry Andric class Defined;
32*0b57cec5SDimitry Andric class InputSection;
33*0b57cec5SDimitry Andric class InputSectionBase;
34*0b57cec5SDimitry Andric class OutputSection;
35*0b57cec5SDimitry Andric class SectionBase;
36*0b57cec5SDimitry Andric class Symbol;
37*0b57cec5SDimitry Andric class ThunkSection;
38*0b57cec5SDimitry Andric 
39*0b57cec5SDimitry Andric // This represents an r-value in the linker script.
40*0b57cec5SDimitry Andric struct ExprValue {
41*0b57cec5SDimitry Andric   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
42*0b57cec5SDimitry Andric             const Twine &loc)
43*0b57cec5SDimitry Andric       : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
44*0b57cec5SDimitry Andric 
45*0b57cec5SDimitry Andric   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
46*0b57cec5SDimitry Andric 
47*0b57cec5SDimitry Andric   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
48*0b57cec5SDimitry Andric   uint64_t getValue() const;
49*0b57cec5SDimitry Andric   uint64_t getSecAddr() const;
50*0b57cec5SDimitry Andric   uint64_t getSectionOffset() const;
51*0b57cec5SDimitry Andric 
52*0b57cec5SDimitry Andric   // If a value is relative to a section, it has a non-null Sec.
53*0b57cec5SDimitry Andric   SectionBase *sec;
54*0b57cec5SDimitry Andric 
55*0b57cec5SDimitry Andric   // True if this expression is enclosed in ABSOLUTE().
56*0b57cec5SDimitry Andric   // This flag affects the return value of getValue().
57*0b57cec5SDimitry Andric   bool forceAbsolute;
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric   uint64_t val;
60*0b57cec5SDimitry Andric   uint64_t alignment = 1;
61*0b57cec5SDimitry Andric 
62*0b57cec5SDimitry Andric   // Original source location. Used for error messages.
63*0b57cec5SDimitry Andric   std::string loc;
64*0b57cec5SDimitry Andric };
65*0b57cec5SDimitry Andric 
66*0b57cec5SDimitry Andric // This represents an expression in the linker script.
67*0b57cec5SDimitry Andric // ScriptParser::readExpr reads an expression and returns an Expr.
68*0b57cec5SDimitry Andric // Later, we evaluate the expression by calling the function.
69*0b57cec5SDimitry Andric using Expr = std::function<ExprValue()>;
70*0b57cec5SDimitry Andric 
71*0b57cec5SDimitry Andric // This enum is used to implement linker script SECTIONS command.
72*0b57cec5SDimitry Andric // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
73*0b57cec5SDimitry Andric enum SectionsCommandKind {
74*0b57cec5SDimitry Andric   AssignmentKind, // . = expr or <sym> = expr
75*0b57cec5SDimitry Andric   OutputSectionKind,
76*0b57cec5SDimitry Andric   InputSectionKind,
77*0b57cec5SDimitry Andric   ByteKind    // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
78*0b57cec5SDimitry Andric };
79*0b57cec5SDimitry Andric 
80*0b57cec5SDimitry Andric struct BaseCommand {
81*0b57cec5SDimitry Andric   BaseCommand(int k) : kind(k) {}
82*0b57cec5SDimitry Andric   int kind;
83*0b57cec5SDimitry Andric };
84*0b57cec5SDimitry Andric 
85*0b57cec5SDimitry Andric // This represents ". = <expr>" or "<symbol> = <expr>".
86*0b57cec5SDimitry Andric struct SymbolAssignment : BaseCommand {
87*0b57cec5SDimitry Andric   SymbolAssignment(StringRef name, Expr e, std::string loc)
88*0b57cec5SDimitry Andric       : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
89*0b57cec5SDimitry Andric 
90*0b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) {
91*0b57cec5SDimitry Andric     return c->kind == AssignmentKind;
92*0b57cec5SDimitry Andric   }
93*0b57cec5SDimitry Andric 
94*0b57cec5SDimitry Andric   // The LHS of an expression. Name is either a symbol name or ".".
95*0b57cec5SDimitry Andric   StringRef name;
96*0b57cec5SDimitry Andric   Defined *sym = nullptr;
97*0b57cec5SDimitry Andric 
98*0b57cec5SDimitry Andric   // The RHS of an expression.
99*0b57cec5SDimitry Andric   Expr expression;
100*0b57cec5SDimitry Andric 
101*0b57cec5SDimitry Andric   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
102*0b57cec5SDimitry Andric   bool provide = false;
103*0b57cec5SDimitry Andric   bool hidden = false;
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric   // Holds file name and line number for error reporting.
106*0b57cec5SDimitry Andric   std::string location;
107*0b57cec5SDimitry Andric 
108*0b57cec5SDimitry Andric   // A string representation of this command. We use this for -Map.
109*0b57cec5SDimitry Andric   std::string commandString;
110*0b57cec5SDimitry Andric 
111*0b57cec5SDimitry Andric   // Address of this assignment command.
112*0b57cec5SDimitry Andric   unsigned addr;
113*0b57cec5SDimitry Andric 
114*0b57cec5SDimitry Andric   // Size of this assignment command. This is usually 0, but if
115*0b57cec5SDimitry Andric   // you move '.' this may be greater than 0.
116*0b57cec5SDimitry Andric   unsigned size;
117*0b57cec5SDimitry Andric };
118*0b57cec5SDimitry Andric 
119*0b57cec5SDimitry Andric // Linker scripts allow additional constraints to be put on ouput sections.
120*0b57cec5SDimitry Andric // If an output section is marked as ONLY_IF_RO, the section is created
121*0b57cec5SDimitry Andric // only if its input sections are read-only. Likewise, an output section
122*0b57cec5SDimitry Andric // with ONLY_IF_RW is created if all input sections are RW.
123*0b57cec5SDimitry Andric enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
124*0b57cec5SDimitry Andric 
125*0b57cec5SDimitry Andric // This struct is used to represent the location and size of regions of
126*0b57cec5SDimitry Andric // target memory. Instances of the struct are created by parsing the
127*0b57cec5SDimitry Andric // MEMORY command.
128*0b57cec5SDimitry Andric struct MemoryRegion {
129*0b57cec5SDimitry Andric   MemoryRegion(StringRef name, uint64_t origin, uint64_t length, uint32_t flags,
130*0b57cec5SDimitry Andric                uint32_t negFlags)
131*0b57cec5SDimitry Andric       : name(name), origin(origin), length(length), flags(flags),
132*0b57cec5SDimitry Andric         negFlags(negFlags) {}
133*0b57cec5SDimitry Andric 
134*0b57cec5SDimitry Andric   std::string name;
135*0b57cec5SDimitry Andric   uint64_t origin;
136*0b57cec5SDimitry Andric   uint64_t length;
137*0b57cec5SDimitry Andric   uint32_t flags;
138*0b57cec5SDimitry Andric   uint32_t negFlags;
139*0b57cec5SDimitry Andric   uint64_t curPos = 0;
140*0b57cec5SDimitry Andric };
141*0b57cec5SDimitry Andric 
142*0b57cec5SDimitry Andric // This struct represents one section match pattern in SECTIONS() command.
143*0b57cec5SDimitry Andric // It can optionally have negative match pattern for EXCLUDED_FILE command.
144*0b57cec5SDimitry Andric // Also it may be surrounded with SORT() command, so contains sorting rules.
145*0b57cec5SDimitry Andric struct SectionPattern {
146*0b57cec5SDimitry Andric   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
147*0b57cec5SDimitry Andric       : excludedFilePat(pat1), sectionPat(pat2),
148*0b57cec5SDimitry Andric         sortOuter(SortSectionPolicy::Default),
149*0b57cec5SDimitry Andric         sortInner(SortSectionPolicy::Default) {}
150*0b57cec5SDimitry Andric 
151*0b57cec5SDimitry Andric   StringMatcher excludedFilePat;
152*0b57cec5SDimitry Andric   StringMatcher sectionPat;
153*0b57cec5SDimitry Andric   SortSectionPolicy sortOuter;
154*0b57cec5SDimitry Andric   SortSectionPolicy sortInner;
155*0b57cec5SDimitry Andric };
156*0b57cec5SDimitry Andric 
157*0b57cec5SDimitry Andric struct InputSectionDescription : BaseCommand {
158*0b57cec5SDimitry Andric   InputSectionDescription(StringRef filePattern)
159*0b57cec5SDimitry Andric       : BaseCommand(InputSectionKind), filePat(filePattern) {}
160*0b57cec5SDimitry Andric 
161*0b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) {
162*0b57cec5SDimitry Andric     return c->kind == InputSectionKind;
163*0b57cec5SDimitry Andric   }
164*0b57cec5SDimitry Andric 
165*0b57cec5SDimitry Andric   StringMatcher filePat;
166*0b57cec5SDimitry Andric 
167*0b57cec5SDimitry Andric   // Input sections that matches at least one of SectionPatterns
168*0b57cec5SDimitry Andric   // will be associated with this InputSectionDescription.
169*0b57cec5SDimitry Andric   std::vector<SectionPattern> sectionPatterns;
170*0b57cec5SDimitry Andric 
171*0b57cec5SDimitry Andric   std::vector<InputSection *> sections;
172*0b57cec5SDimitry Andric 
173*0b57cec5SDimitry Andric   // Temporary record of synthetic ThunkSection instances and the pass that
174*0b57cec5SDimitry Andric   // they were created in. This is used to insert newly created ThunkSections
175*0b57cec5SDimitry Andric   // into Sections at the end of a createThunks() pass.
176*0b57cec5SDimitry Andric   std::vector<std::pair<ThunkSection *, uint32_t>> thunkSections;
177*0b57cec5SDimitry Andric };
178*0b57cec5SDimitry Andric 
179*0b57cec5SDimitry Andric // Represents BYTE(), SHORT(), LONG(), or QUAD().
180*0b57cec5SDimitry Andric struct ByteCommand : BaseCommand {
181*0b57cec5SDimitry Andric   ByteCommand(Expr e, unsigned size, std::string commandString)
182*0b57cec5SDimitry Andric       : BaseCommand(ByteKind), commandString(commandString), expression(e),
183*0b57cec5SDimitry Andric         size(size) {}
184*0b57cec5SDimitry Andric 
185*0b57cec5SDimitry Andric   static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
186*0b57cec5SDimitry Andric 
187*0b57cec5SDimitry Andric   // Keeps string representing the command. Used for -Map" is perhaps better.
188*0b57cec5SDimitry Andric   std::string commandString;
189*0b57cec5SDimitry Andric 
190*0b57cec5SDimitry Andric   Expr expression;
191*0b57cec5SDimitry Andric 
192*0b57cec5SDimitry Andric   // This is just an offset of this assignment command in the output section.
193*0b57cec5SDimitry Andric   unsigned offset;
194*0b57cec5SDimitry Andric 
195*0b57cec5SDimitry Andric   // Size of this data command.
196*0b57cec5SDimitry Andric   unsigned size;
197*0b57cec5SDimitry Andric };
198*0b57cec5SDimitry Andric 
199*0b57cec5SDimitry Andric struct PhdrsCommand {
200*0b57cec5SDimitry Andric   StringRef name;
201*0b57cec5SDimitry Andric   unsigned type = llvm::ELF::PT_NULL;
202*0b57cec5SDimitry Andric   bool hasFilehdr = false;
203*0b57cec5SDimitry Andric   bool hasPhdrs = false;
204*0b57cec5SDimitry Andric   llvm::Optional<unsigned> flags;
205*0b57cec5SDimitry Andric   Expr lmaExpr = nullptr;
206*0b57cec5SDimitry Andric };
207*0b57cec5SDimitry Andric 
208*0b57cec5SDimitry Andric class LinkerScript final {
209*0b57cec5SDimitry Andric   // Temporary state used in processSectionCommands() and assignAddresses()
210*0b57cec5SDimitry Andric   // that must be reinitialized for each call to the above functions, and must
211*0b57cec5SDimitry Andric   // not be used outside of the scope of a call to the above functions.
212*0b57cec5SDimitry Andric   struct AddressState {
213*0b57cec5SDimitry Andric     AddressState();
214*0b57cec5SDimitry Andric     uint64_t threadBssOffset = 0;
215*0b57cec5SDimitry Andric     OutputSection *outSec = nullptr;
216*0b57cec5SDimitry Andric     MemoryRegion *memRegion = nullptr;
217*0b57cec5SDimitry Andric     MemoryRegion *lmaRegion = nullptr;
218*0b57cec5SDimitry Andric     uint64_t lmaOffset = 0;
219*0b57cec5SDimitry Andric   };
220*0b57cec5SDimitry Andric 
221*0b57cec5SDimitry Andric   llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
222*0b57cec5SDimitry Andric 
223*0b57cec5SDimitry Andric   void addSymbol(SymbolAssignment *cmd);
224*0b57cec5SDimitry Andric   void assignSymbol(SymbolAssignment *cmd, bool inSec);
225*0b57cec5SDimitry Andric   void setDot(Expr e, const Twine &loc, bool inSec);
226*0b57cec5SDimitry Andric   void expandOutputSection(uint64_t size);
227*0b57cec5SDimitry Andric   void expandMemoryRegions(uint64_t size);
228*0b57cec5SDimitry Andric 
229*0b57cec5SDimitry Andric   std::vector<InputSection *>
230*0b57cec5SDimitry Andric   computeInputSections(const InputSectionDescription *);
231*0b57cec5SDimitry Andric 
232*0b57cec5SDimitry Andric   std::vector<InputSection *> createInputSectionList(OutputSection &cmd);
233*0b57cec5SDimitry Andric 
234*0b57cec5SDimitry Andric   std::vector<size_t> getPhdrIndices(OutputSection *sec);
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric   MemoryRegion *findMemoryRegion(OutputSection *sec);
237*0b57cec5SDimitry Andric 
238*0b57cec5SDimitry Andric   void switchTo(OutputSection *sec);
239*0b57cec5SDimitry Andric   uint64_t advance(uint64_t size, unsigned align);
240*0b57cec5SDimitry Andric   void output(InputSection *sec);
241*0b57cec5SDimitry Andric 
242*0b57cec5SDimitry Andric   void assignOffsets(OutputSection *sec);
243*0b57cec5SDimitry Andric 
244*0b57cec5SDimitry Andric   // Ctx captures the local AddressState and makes it accessible
245*0b57cec5SDimitry Andric   // deliberately. This is needed as there are some cases where we cannot just
246*0b57cec5SDimitry Andric   // thread the current state through to a lambda function created by the
247*0b57cec5SDimitry Andric   // script parser.
248*0b57cec5SDimitry Andric   // This should remain a plain pointer as its lifetime is smaller than
249*0b57cec5SDimitry Andric   // LinkerScript.
250*0b57cec5SDimitry Andric   AddressState *ctx = nullptr;
251*0b57cec5SDimitry Andric 
252*0b57cec5SDimitry Andric   OutputSection *aether;
253*0b57cec5SDimitry Andric 
254*0b57cec5SDimitry Andric   uint64_t dot;
255*0b57cec5SDimitry Andric 
256*0b57cec5SDimitry Andric public:
257*0b57cec5SDimitry Andric   OutputSection *createOutputSection(StringRef name, StringRef location);
258*0b57cec5SDimitry Andric   OutputSection *getOrCreateOutputSection(StringRef name);
259*0b57cec5SDimitry Andric 
260*0b57cec5SDimitry Andric   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
261*0b57cec5SDimitry Andric   uint64_t getDot() { return dot; }
262*0b57cec5SDimitry Andric   void discard(ArrayRef<InputSection *> v);
263*0b57cec5SDimitry Andric 
264*0b57cec5SDimitry Andric   ExprValue getSymbolValue(StringRef name, const Twine &loc);
265*0b57cec5SDimitry Andric 
266*0b57cec5SDimitry Andric   void addOrphanSections();
267*0b57cec5SDimitry Andric   void adjustSectionsBeforeSorting();
268*0b57cec5SDimitry Andric   void adjustSectionsAfterSorting();
269*0b57cec5SDimitry Andric 
270*0b57cec5SDimitry Andric   std::vector<PhdrEntry *> createPhdrs();
271*0b57cec5SDimitry Andric   bool needsInterpSection();
272*0b57cec5SDimitry Andric 
273*0b57cec5SDimitry Andric   bool shouldKeep(InputSectionBase *s);
274*0b57cec5SDimitry Andric   void assignAddresses();
275*0b57cec5SDimitry Andric   void allocateHeaders(std::vector<PhdrEntry *> &phdrs);
276*0b57cec5SDimitry Andric   void processSectionCommands();
277*0b57cec5SDimitry Andric   void declareSymbols();
278*0b57cec5SDimitry Andric 
279*0b57cec5SDimitry Andric   // Used to handle INSERT AFTER statements.
280*0b57cec5SDimitry Andric   void processInsertCommands();
281*0b57cec5SDimitry Andric 
282*0b57cec5SDimitry Andric   // SECTIONS command list.
283*0b57cec5SDimitry Andric   std::vector<BaseCommand *> sectionCommands;
284*0b57cec5SDimitry Andric 
285*0b57cec5SDimitry Andric   // PHDRS command list.
286*0b57cec5SDimitry Andric   std::vector<PhdrsCommand> phdrsCommands;
287*0b57cec5SDimitry Andric 
288*0b57cec5SDimitry Andric   bool hasSectionsCommand = false;
289*0b57cec5SDimitry Andric   bool errorOnMissingSection = false;
290*0b57cec5SDimitry Andric 
291*0b57cec5SDimitry Andric   // List of section patterns specified with KEEP commands. They will
292*0b57cec5SDimitry Andric   // be kept even if they are unused and --gc-sections is specified.
293*0b57cec5SDimitry Andric   std::vector<InputSectionDescription *> keptSections;
294*0b57cec5SDimitry Andric 
295*0b57cec5SDimitry Andric   // A map from memory region name to a memory region descriptor.
296*0b57cec5SDimitry Andric   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
297*0b57cec5SDimitry Andric 
298*0b57cec5SDimitry Andric   // A list of symbols referenced by the script.
299*0b57cec5SDimitry Andric   std::vector<llvm::StringRef> referencedSymbols;
300*0b57cec5SDimitry Andric 
301*0b57cec5SDimitry Andric   // Used to implement INSERT [AFTER|BEFORE]. Contains commands that need
302*0b57cec5SDimitry Andric   // to be inserted into SECTIONS commands list.
303*0b57cec5SDimitry Andric   llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertAfterCommands;
304*0b57cec5SDimitry Andric   llvm::DenseMap<StringRef, std::vector<BaseCommand *>> insertBeforeCommands;
305*0b57cec5SDimitry Andric };
306*0b57cec5SDimitry Andric 
307*0b57cec5SDimitry Andric extern LinkerScript *script;
308*0b57cec5SDimitry Andric 
309*0b57cec5SDimitry Andric } // end namespace elf
310*0b57cec5SDimitry Andric } // end namespace lld
311*0b57cec5SDimitry Andric 
312*0b57cec5SDimitry Andric #endif // LLD_ELF_LINKER_SCRIPT_H
313