xref: /freebsd/contrib/llvm-project/lld/ELF/InputSection.h (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_INPUT_SECTION_H
10 #define LLD_ELF_INPUT_SECTION_H
11 
12 #include "Config.h"
13 #include "Relocations.h"
14 #include "lld/Common/CommonLinkerContext.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Memory.h"
17 #include "llvm/ADT/CachedHashString.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/TinyPtrVector.h"
21 #include "llvm/Object/ELF.h"
22 #include "llvm/Support/Compiler.h"
23 
24 namespace lld {
25 namespace elf {
26 
27 class InputFile;
28 class Symbol;
29 
30 class Defined;
31 struct Partition;
32 class SyntheticSection;
33 template <class ELFT> class ObjFile;
34 class OutputSection;
35 
36 LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions;
37 
38 // Returned by InputSectionBase::relsOrRelas. At most one member is empty.
39 template <class ELFT> struct RelsOrRelas {
40   Relocs<typename ELFT::Rel> rels;
41   Relocs<typename ELFT::Rela> relas;
42   Relocs<typename ELFT::Crel> crels;
43   bool areRelocsRel() const { return rels.size(); }
44   bool areRelocsCrel() const { return crels.size(); }
45 };
46 
47 #define invokeOnRelocs(sec, f, ...)                                            \
48   {                                                                            \
49     const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>();           \
50     if (rs.areRelocsCrel())                                                    \
51       f(__VA_ARGS__, rs.crels);                                                \
52     else if (rs.areRelocsRel())                                                \
53       f(__VA_ARGS__, rs.rels);                                                 \
54     else                                                                       \
55       f(__VA_ARGS__, rs.relas);                                                \
56   }
57 
58 // This is the base class of all sections that lld handles. Some are sections in
59 // input files, some are sections in the produced output file and some exist
60 // just as a convenience for implementing special ways of combining some
61 // sections.
62 class SectionBase {
63 public:
64   enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output };
65 
66   Kind kind() const { return (Kind)sectionKind; }
67 
68   LLVM_PREFERRED_TYPE(Kind)
69   uint8_t sectionKind : 3;
70 
71   // The next two bit fields are only used by InputSectionBase, but we
72   // put them here so the struct packs better.
73 
74   LLVM_PREFERRED_TYPE(bool)
75   uint8_t bss : 1;
76 
77   // Set for sections that should not be folded by ICF.
78   LLVM_PREFERRED_TYPE(bool)
79   uint8_t keepUnique : 1;
80 
81   uint8_t partition = 1;
82   uint32_t type;
83   StringRef name;
84 
85   // The 1-indexed partition that this section is assigned to by the garbage
86   // collector, or 0 if this section is dead. Normally there is only one
87   // partition, so this will either be 0 or 1.
88   elf::Partition &getPartition() const;
89 
90   // These corresponds to the fields in Elf_Shdr.
91   uint64_t flags;
92   uint32_t addralign;
93   uint32_t entsize;
94   uint32_t link;
95   uint32_t info;
96 
97   OutputSection *getOutputSection();
98   const OutputSection *getOutputSection() const {
99     return const_cast<SectionBase *>(this)->getOutputSection();
100   }
101 
102   // Translate an offset in the input section to an offset in the output
103   // section.
104   uint64_t getOffset(uint64_t offset) const;
105 
106   uint64_t getVA(uint64_t offset = 0) const;
107 
108   bool isLive() const { return partition != 0; }
109   void markLive() { partition = 1; }
110   void markDead() { partition = 0; }
111 
112 protected:
113   constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags,
114                         uint32_t entsize, uint32_t addralign, uint32_t type,
115                         uint32_t info, uint32_t link)
116       : sectionKind(sectionKind), bss(false), keepUnique(false), type(type),
117         name(name), flags(flags), addralign(addralign), entsize(entsize),
118         link(link), info(info) {}
119 };
120 
121 struct SymbolAnchor {
122   uint64_t offset;
123   Defined *d;
124   bool end; // true for the anchor of st_value+st_size
125 };
126 
127 struct RelaxAux {
128   // This records symbol start and end offsets which will be adjusted according
129   // to the nearest relocDeltas element.
130   SmallVector<SymbolAnchor, 0> anchors;
131   // For relocations[i], the actual offset is
132   //   r_offset - (i ? relocDeltas[i-1] : 0).
133   std::unique_ptr<uint32_t[]> relocDeltas;
134   // For relocations[i], the actual type is relocTypes[i].
135   std::unique_ptr<RelType[]> relocTypes;
136   SmallVector<uint32_t, 0> writes;
137 };
138 
139 // This corresponds to a section of an input file.
140 class InputSectionBase : public SectionBase {
141 public:
142   template <class ELFT>
143   InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &header,
144                    StringRef name, Kind sectionKind);
145 
146   InputSectionBase(InputFile *file, uint64_t flags, uint32_t type,
147                    uint64_t entsize, uint32_t link, uint32_t info,
148                    uint32_t addralign, ArrayRef<uint8_t> data, StringRef name,
149                    Kind sectionKind);
150 
151   static bool classof(const SectionBase *s) { return s->kind() != Output; }
152 
153   // The file which contains this section. Its dynamic type is usually
154   // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic
155   // section).
156   InputFile *file;
157 
158   // Input sections are part of an output section. Special sections
159   // like .eh_frame and merge sections are first combined into a
160   // synthetic section that is then added to an output section. In all
161   // cases this points one level up.
162   SectionBase *parent = nullptr;
163 
164   // Section index of the relocation section if exists.
165   uint32_t relSecIdx = 0;
166 
167   // Getter when the dynamic type is ObjFile<ELFT>.
168   template <class ELFT> ObjFile<ELFT> *getFile() const {
169     return cast<ObjFile<ELFT>>(file);
170   }
171 
172   // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to
173   // indicate the number of bytes which is not counted in the size. This should
174   // be reset to zero after uses.
175   uint32_t bytesDropped = 0;
176 
177   mutable bool compressed = false;
178 
179   // Whether this section is SHT_CREL and has been decoded to RELA by
180   // relsOrRelas.
181   bool decodedCrel = false;
182 
183   // Whether the section needs to be padded with a NOP filler due to
184   // deleteFallThruJmpInsn.
185   bool nopFiller = false;
186 
187   void drop_back(unsigned num) {
188     assert(bytesDropped + num < 256);
189     bytesDropped += num;
190   }
191 
192   void push_back(uint64_t num) {
193     assert(bytesDropped >= num);
194     bytesDropped -= num;
195   }
196 
197   mutable const uint8_t *content_;
198   uint64_t size;
199 
200   void trim() {
201     if (bytesDropped) {
202       size -= bytesDropped;
203       bytesDropped = 0;
204     }
205   }
206 
207   ArrayRef<uint8_t> content() const {
208     return ArrayRef<uint8_t>(content_, size);
209   }
210   ArrayRef<uint8_t> contentMaybeDecompress() const {
211     if (compressed)
212       decompress();
213     return content();
214   }
215 
216   // The next member in the section group if this section is in a group. This is
217   // used by --gc-sections.
218   InputSectionBase *nextInSectionGroup = nullptr;
219 
220   template <class ELFT>
221   RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const;
222 
223   // InputSections that are dependent on us (reverse dependency for GC)
224   llvm::TinyPtrVector<InputSection *> dependentSections;
225 
226   // Returns the size of this section (even if this is a common or BSS.)
227   size_t getSize() const;
228 
229   InputSection *getLinkOrderDep() const;
230 
231   // Get a symbol that encloses this offset from within the section. If type is
232   // not zero, return a symbol with the specified type.
233   Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const;
234   Defined *getEnclosingFunction(uint64_t offset) const {
235     return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
236   }
237 
238   // Returns a source location string. Used to construct an error message.
239   std::string getLocation(uint64_t offset) const;
240   std::string getSrcMsg(const Symbol &sym, uint64_t offset) const;
241   std::string getObjMsg(uint64_t offset) const;
242 
243   // Each section knows how to relocate itself. These functions apply
244   // relocations, assuming that Buf points to this section's copy in
245   // the mmap'ed output buffer.
246   template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd);
247   static uint64_t getRelocTargetVA(const InputFile *File, RelType Type,
248                                    int64_t A, uint64_t P, const Symbol &Sym,
249                                    RelExpr Expr);
250 
251   // The native ELF reloc data type is not very convenient to handle.
252   // So we convert ELF reloc records to our own records in Relocations.cpp.
253   // This vector contains such "cooked" relocations.
254   SmallVector<Relocation, 0> relocations;
255 
256   void addReloc(const Relocation &r) { relocations.push_back(r); }
257   MutableArrayRef<Relocation> relocs() { return relocations; }
258   ArrayRef<Relocation> relocs() const { return relocations; }
259 
260   union {
261     // These are modifiers to jump instructions that are necessary when basic
262     // block sections are enabled.  Basic block sections creates opportunities
263     // to relax jump instructions at basic block boundaries after reordering the
264     // basic blocks.
265     JumpInstrMod *jumpInstrMod = nullptr;
266 
267     // Auxiliary information for RISC-V and LoongArch linker relaxation.
268     // They do not use jumpInstrMod.
269     RelaxAux *relaxAux;
270 
271     // The compressed content size when `compressed` is true.
272     size_t compressedSize;
273   };
274 
275   // A function compiled with -fsplit-stack calling a function
276   // compiled without -fsplit-stack needs its prologue adjusted. Find
277   // such functions and adjust their prologues.  This is very similar
278   // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more
279   // information.
280   template <typename ELFT>
281   void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end);
282 
283 
284   template <typename T> llvm::ArrayRef<T> getDataAs() const {
285     size_t s = content().size();
286     assert(s % sizeof(T) == 0);
287     return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T));
288   }
289 
290 protected:
291   template <typename ELFT>
292   void parseCompressedHeader();
293   void decompress() const;
294 };
295 
296 // SectionPiece represents a piece of splittable section contents.
297 // We allocate a lot of these and binary search on them. This means that they
298 // have to be as compact as possible, which is why we don't store the size (can
299 // be found by looking at the next one).
300 struct SectionPiece {
301   SectionPiece() = default;
302   SectionPiece(size_t off, uint32_t hash, bool live)
303       : inputOff(off), live(live), hash(hash >> 1) {}
304 
305   uint32_t inputOff;
306   LLVM_PREFERRED_TYPE(bool)
307   uint32_t live : 1;
308   uint32_t hash : 31;
309   uint64_t outputOff = 0;
310 };
311 
312 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
313 
314 // This corresponds to a SHF_MERGE section of an input file.
315 class MergeInputSection : public InputSectionBase {
316 public:
317   template <class ELFT>
318   MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,
319                     StringRef name);
320   MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize,
321                     ArrayRef<uint8_t> data, StringRef name);
322 
323   static bool classof(const SectionBase *s) { return s->kind() == Merge; }
324   void splitIntoPieces();
325 
326   // Translate an offset in the input section to an offset in the parent
327   // MergeSyntheticSection.
328   uint64_t getParentOffset(uint64_t offset) const;
329 
330   // Splittable sections are handled as a sequence of data
331   // rather than a single large blob of data.
332   SmallVector<SectionPiece, 0> pieces;
333 
334   // Returns I'th piece's data. This function is very hot when
335   // string merging is enabled, so we want to inline.
336   LLVM_ATTRIBUTE_ALWAYS_INLINE
337   llvm::CachedHashStringRef getData(size_t i) const {
338     size_t begin = pieces[i].inputOff;
339     size_t end =
340         (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff;
341     return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash};
342   }
343 
344   // Returns the SectionPiece at a given input section offset.
345   SectionPiece &getSectionPiece(uint64_t offset);
346   const SectionPiece &getSectionPiece(uint64_t offset) const {
347     return const_cast<MergeInputSection *>(this)->getSectionPiece(offset);
348   }
349 
350   SyntheticSection *getParent() const {
351     return cast_or_null<SyntheticSection>(parent);
352   }
353 
354 private:
355   void splitStrings(StringRef s, size_t size);
356   void splitNonStrings(ArrayRef<uint8_t> a, size_t size);
357 };
358 
359 struct EhSectionPiece {
360   EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size,
361                  unsigned firstRelocation)
362       : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {}
363 
364   ArrayRef<uint8_t> data() const {
365     return {sec->content().data() + this->inputOff, size};
366   }
367 
368   size_t inputOff;
369   ssize_t outputOff = -1;
370   InputSectionBase *sec;
371   uint32_t size;
372   unsigned firstRelocation;
373 };
374 
375 // This corresponds to a .eh_frame section of an input file.
376 class EhInputSection : public InputSectionBase {
377 public:
378   template <class ELFT>
379   EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,
380                  StringRef name);
381   static bool classof(const SectionBase *s) { return s->kind() == EHFrame; }
382   template <class ELFT> void split();
383   template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels);
384 
385   // Splittable sections are handled as a sequence of data
386   // rather than a single large blob of data.
387   SmallVector<EhSectionPiece, 0> cies, fdes;
388 
389   SyntheticSection *getParent() const;
390   uint64_t getParentOffset(uint64_t offset) const;
391 };
392 
393 // This is a section that is added directly to an output section
394 // instead of needing special combination via a synthetic section. This
395 // includes all input sections with the exceptions of SHF_MERGE and
396 // .eh_frame. It also includes the synthetic sections themselves.
397 class InputSection : public InputSectionBase {
398 public:
399   InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign,
400                ArrayRef<uint8_t> data, StringRef name, Kind k = Regular);
401   template <class ELFT>
402   InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,
403                StringRef name);
404 
405   static bool classof(const SectionBase *s) {
406     return s->kind() == SectionBase::Regular ||
407            s->kind() == SectionBase::Synthetic ||
408            s->kind() == SectionBase::Spill;
409   }
410 
411   // Write this section to a mmap'ed file, assuming Buf is pointing to
412   // beginning of the output section.
413   template <class ELFT> void writeTo(uint8_t *buf);
414 
415   OutputSection *getParent() const {
416     return reinterpret_cast<OutputSection *>(parent);
417   }
418 
419   // This variable has two usages. Initially, it represents an index in the
420   // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER
421   // sections. After assignAddresses is called, it represents the offset from
422   // the beginning of the output section this section was assigned to.
423   uint64_t outSecOff = 0;
424 
425   InputSectionBase *getRelocatedSection() const;
426 
427   template <class ELFT, class RelTy>
428   void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels);
429 
430   // Points to the canonical section. If ICF folds two sections, repl pointer of
431   // one section points to the other.
432   InputSection *repl = this;
433 
434   // Used by ICF.
435   uint32_t eqClass[2] = {0, 0};
436 
437   // Called by ICF to merge two input sections.
438   void replace(InputSection *other);
439 
440   static InputSection discarded;
441 
442 private:
443   template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf);
444 
445   template <class ELFT, class RelTy, class RelIt>
446   void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels);
447 
448   template <class ELFT> void copyShtGroup(uint8_t *buf);
449 };
450 
451 // A marker for a potential spill location for another input section. This
452 // broadly acts as if it were the original section until address assignment.
453 // Then it is either replaced with the real input section or removed.
454 class PotentialSpillSection : public InputSection {
455 public:
456   // The containing input section description; used to quickly replace this stub
457   // with the actual section.
458   InputSectionDescription *isd;
459 
460   // Next potential spill location for the same source input section.
461   PotentialSpillSection *next = nullptr;
462 
463   PotentialSpillSection(const InputSectionBase &source,
464                         InputSectionDescription &isd);
465 
466   static bool classof(const SectionBase *sec) {
467     return sec->kind() == InputSectionBase::Spill;
468   }
469 };
470 
471 static_assert(sizeof(InputSection) <= 160, "InputSection is too big");
472 
473 class SyntheticSection : public InputSection {
474 public:
475   SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign,
476                    StringRef name)
477       : InputSection(ctx.internalFile, flags, type, addralign, {}, name,
478                      InputSectionBase::Synthetic) {}
479 
480   virtual ~SyntheticSection() = default;
481   virtual size_t getSize() const = 0;
482   virtual bool updateAllocSize() { return false; }
483   // If the section has the SHF_ALLOC flag and the size may be changed if
484   // thunks are added, update the section size.
485   virtual bool isNeeded() const { return true; }
486   virtual void finalizeContents() {}
487   virtual void writeTo(uint8_t *buf) = 0;
488 
489   static bool classof(const SectionBase *sec) {
490     return sec->kind() == InputSectionBase::Synthetic;
491   }
492 };
493 
494 inline bool isStaticRelSecType(uint32_t type) {
495   return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL ||
496          type == llvm::ELF::SHT_REL;
497 }
498 
499 inline bool isDebugSection(const InputSectionBase &sec) {
500   return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 &&
501          sec.name.starts_with(".debug");
502 }
503 
504 // The set of TOC entries (.toc + addend) for which we should not apply
505 // toc-indirect to toc-relative relaxation. const Symbol * refers to the
506 // STT_SECTION symbol associated to the .toc input section.
507 extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax;
508 
509 } // namespace elf
510 
511 std::string toString(const elf::InputSectionBase *);
512 } // namespace lld
513 
514 #endif
515