1 //===- InputSection.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_SECTION_H 10 #define LLD_ELF_INPUT_SECTION_H 11 12 #include "Config.h" 13 #include "Relocations.h" 14 #include "lld/Common/CommonLinkerContext.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/TinyPtrVector.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Support/Compiler.h" 23 24 namespace lld { 25 namespace elf { 26 27 class InputFile; 28 class Symbol; 29 30 class Defined; 31 struct Partition; 32 class SyntheticSection; 33 template <class ELFT> class ObjFile; 34 class OutputSection; 35 36 LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; 37 38 // Returned by InputSectionBase::relsOrRelas. At least one member is empty. 39 template <class ELFT> struct RelsOrRelas { 40 ArrayRef<typename ELFT::Rel> rels; 41 ArrayRef<typename ELFT::Rela> relas; 42 bool areRelocsRel() const { return rels.size(); } 43 }; 44 45 // This is the base class of all sections that lld handles. Some are sections in 46 // input files, some are sections in the produced output file and some exist 47 // just as a convenience for implementing special ways of combining some 48 // sections. 49 class SectionBase { 50 public: 51 enum Kind { Regular, Synthetic, EHFrame, Merge, Output }; 52 53 Kind kind() const { return (Kind)sectionKind; } 54 55 uint8_t sectionKind : 3; 56 57 // The next two bit fields are only used by InputSectionBase, but we 58 // put them here so the struct packs better. 59 60 uint8_t bss : 1; 61 62 // Set for sections that should not be folded by ICF. 63 uint8_t keepUnique : 1; 64 65 uint8_t partition = 1; 66 uint32_t type; 67 StringRef name; 68 69 // The 1-indexed partition that this section is assigned to by the garbage 70 // collector, or 0 if this section is dead. Normally there is only one 71 // partition, so this will either be 0 or 1. 72 elf::Partition &getPartition() const; 73 74 // These corresponds to the fields in Elf_Shdr. 75 uint64_t flags; 76 uint32_t addralign; 77 uint32_t entsize; 78 uint32_t link; 79 uint32_t info; 80 81 OutputSection *getOutputSection(); 82 const OutputSection *getOutputSection() const { 83 return const_cast<SectionBase *>(this)->getOutputSection(); 84 } 85 86 // Translate an offset in the input section to an offset in the output 87 // section. 88 uint64_t getOffset(uint64_t offset) const; 89 90 uint64_t getVA(uint64_t offset = 0) const; 91 92 bool isLive() const { return partition != 0; } 93 void markLive() { partition = 1; } 94 void markDead() { partition = 0; } 95 96 protected: 97 constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, 98 uint32_t entsize, uint32_t addralign, uint32_t type, 99 uint32_t info, uint32_t link) 100 : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), 101 name(name), flags(flags), addralign(addralign), entsize(entsize), 102 link(link), info(info) {} 103 }; 104 105 struct SymbolAnchor { 106 uint64_t offset; 107 Defined *d; 108 bool end; // true for the anchor of st_value+st_size 109 }; 110 111 struct RelaxAux { 112 // This records symbol start and end offsets which will be adjusted according 113 // to the nearest relocDeltas element. 114 SmallVector<SymbolAnchor, 0> anchors; 115 // For relocations[i], the actual offset is 116 // r_offset - (i ? relocDeltas[i-1] : 0). 117 std::unique_ptr<uint32_t[]> relocDeltas; 118 // For relocations[i], the actual type is relocTypes[i]. 119 std::unique_ptr<RelType[]> relocTypes; 120 SmallVector<uint32_t, 0> writes; 121 }; 122 123 // This corresponds to a section of an input file. 124 class InputSectionBase : public SectionBase { 125 public: 126 template <class ELFT> 127 InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &header, 128 StringRef name, Kind sectionKind); 129 130 InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, 131 uint64_t entsize, uint32_t link, uint32_t info, 132 uint32_t addralign, ArrayRef<uint8_t> data, StringRef name, 133 Kind sectionKind); 134 135 static bool classof(const SectionBase *s) { return s->kind() != Output; } 136 137 // The file which contains this section. Its dynamic type is always 138 // ObjFile<ELFT>, but in order to avoid ELFT, we use InputFile as 139 // its static type. 140 InputFile *file; 141 142 // Input sections are part of an output section. Special sections 143 // like .eh_frame and merge sections are first combined into a 144 // synthetic section that is then added to an output section. In all 145 // cases this points one level up. 146 SectionBase *parent = nullptr; 147 148 // Section index of the relocation section if exists. 149 uint32_t relSecIdx = 0; 150 151 template <class ELFT> ObjFile<ELFT> *getFile() const { 152 return cast_or_null<ObjFile<ELFT>>(file); 153 } 154 155 // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to 156 // indicate the number of bytes which is not counted in the size. This should 157 // be reset to zero after uses. 158 uint32_t bytesDropped = 0; 159 160 mutable bool compressed = false; 161 162 // Whether the section needs to be padded with a NOP filler due to 163 // deleteFallThruJmpInsn. 164 bool nopFiller = false; 165 166 void drop_back(unsigned num) { 167 assert(bytesDropped + num < 256); 168 bytesDropped += num; 169 } 170 171 void push_back(uint64_t num) { 172 assert(bytesDropped >= num); 173 bytesDropped -= num; 174 } 175 176 mutable const uint8_t *content_; 177 uint64_t size; 178 179 void trim() { 180 if (bytesDropped) { 181 size -= bytesDropped; 182 bytesDropped = 0; 183 } 184 } 185 186 ArrayRef<uint8_t> content() const { 187 return ArrayRef<uint8_t>(content_, size); 188 } 189 ArrayRef<uint8_t> contentMaybeDecompress() const { 190 if (compressed) 191 decompress(); 192 return content(); 193 } 194 195 // The next member in the section group if this section is in a group. This is 196 // used by --gc-sections. 197 InputSectionBase *nextInSectionGroup = nullptr; 198 199 template <class ELFT> RelsOrRelas<ELFT> relsOrRelas() const; 200 201 // InputSections that are dependent on us (reverse dependency for GC) 202 llvm::TinyPtrVector<InputSection *> dependentSections; 203 204 // Returns the size of this section (even if this is a common or BSS.) 205 size_t getSize() const; 206 207 InputSection *getLinkOrderDep() const; 208 209 // Get a symbol that encloses this offset from within the section. If type is 210 // not zero, return a symbol with the specified type. 211 Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; 212 Defined *getEnclosingFunction(uint64_t offset) const { 213 return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC); 214 } 215 216 // Returns a source location string. Used to construct an error message. 217 std::string getLocation(uint64_t offset) const; 218 std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; 219 std::string getObjMsg(uint64_t offset) const; 220 221 // Each section knows how to relocate itself. These functions apply 222 // relocations, assuming that Buf points to this section's copy in 223 // the mmap'ed output buffer. 224 template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); 225 static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, 226 int64_t A, uint64_t P, const Symbol &Sym, 227 RelExpr Expr); 228 229 // The native ELF reloc data type is not very convenient to handle. 230 // So we convert ELF reloc records to our own records in Relocations.cpp. 231 // This vector contains such "cooked" relocations. 232 SmallVector<Relocation, 0> relocations; 233 234 void addReloc(const Relocation &r) { relocations.push_back(r); } 235 MutableArrayRef<Relocation> relocs() { return relocations; } 236 ArrayRef<Relocation> relocs() const { return relocations; } 237 238 union { 239 // These are modifiers to jump instructions that are necessary when basic 240 // block sections are enabled. Basic block sections creates opportunities 241 // to relax jump instructions at basic block boundaries after reordering the 242 // basic blocks. 243 JumpInstrMod *jumpInstrMod = nullptr; 244 245 // Auxiliary information for RISC-V and LoongArch linker relaxation. 246 // They do not use jumpInstrMod. 247 RelaxAux *relaxAux; 248 249 // The compressed content size when `compressed` is true. 250 size_t compressedSize; 251 }; 252 253 // A function compiled with -fsplit-stack calling a function 254 // compiled without -fsplit-stack needs its prologue adjusted. Find 255 // such functions and adjust their prologues. This is very similar 256 // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more 257 // information. 258 template <typename ELFT> 259 void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end); 260 261 262 template <typename T> llvm::ArrayRef<T> getDataAs() const { 263 size_t s = content().size(); 264 assert(s % sizeof(T) == 0); 265 return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); 266 } 267 268 protected: 269 template <typename ELFT> 270 void parseCompressedHeader(); 271 void decompress() const; 272 }; 273 274 // SectionPiece represents a piece of splittable section contents. 275 // We allocate a lot of these and binary search on them. This means that they 276 // have to be as compact as possible, which is why we don't store the size (can 277 // be found by looking at the next one). 278 struct SectionPiece { 279 SectionPiece() = default; 280 SectionPiece(size_t off, uint32_t hash, bool live) 281 : inputOff(off), live(live), hash(hash >> 1) {} 282 283 uint32_t inputOff; 284 uint32_t live : 1; 285 uint32_t hash : 31; 286 uint64_t outputOff = 0; 287 }; 288 289 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); 290 291 // This corresponds to a SHF_MERGE section of an input file. 292 class MergeInputSection : public InputSectionBase { 293 public: 294 template <class ELFT> 295 MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 296 StringRef name); 297 MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, 298 ArrayRef<uint8_t> data, StringRef name); 299 300 static bool classof(const SectionBase *s) { return s->kind() == Merge; } 301 void splitIntoPieces(); 302 303 // Translate an offset in the input section to an offset in the parent 304 // MergeSyntheticSection. 305 uint64_t getParentOffset(uint64_t offset) const; 306 307 // Splittable sections are handled as a sequence of data 308 // rather than a single large blob of data. 309 SmallVector<SectionPiece, 0> pieces; 310 311 // Returns I'th piece's data. This function is very hot when 312 // string merging is enabled, so we want to inline. 313 LLVM_ATTRIBUTE_ALWAYS_INLINE 314 llvm::CachedHashStringRef getData(size_t i) const { 315 size_t begin = pieces[i].inputOff; 316 size_t end = 317 (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; 318 return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash}; 319 } 320 321 // Returns the SectionPiece at a given input section offset. 322 SectionPiece &getSectionPiece(uint64_t offset); 323 const SectionPiece &getSectionPiece(uint64_t offset) const { 324 return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); 325 } 326 327 SyntheticSection *getParent() const { 328 return cast_or_null<SyntheticSection>(parent); 329 } 330 331 private: 332 void splitStrings(StringRef s, size_t size); 333 void splitNonStrings(ArrayRef<uint8_t> a, size_t size); 334 }; 335 336 struct EhSectionPiece { 337 EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, 338 unsigned firstRelocation) 339 : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} 340 341 ArrayRef<uint8_t> data() const { 342 return {sec->content().data() + this->inputOff, size}; 343 } 344 345 size_t inputOff; 346 ssize_t outputOff = -1; 347 InputSectionBase *sec; 348 uint32_t size; 349 unsigned firstRelocation; 350 }; 351 352 // This corresponds to a .eh_frame section of an input file. 353 class EhInputSection : public InputSectionBase { 354 public: 355 template <class ELFT> 356 EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 357 StringRef name); 358 static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } 359 template <class ELFT> void split(); 360 template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); 361 362 // Splittable sections are handled as a sequence of data 363 // rather than a single large blob of data. 364 SmallVector<EhSectionPiece, 0> cies, fdes; 365 366 SyntheticSection *getParent() const; 367 uint64_t getParentOffset(uint64_t offset) const; 368 }; 369 370 // This is a section that is added directly to an output section 371 // instead of needing special combination via a synthetic section. This 372 // includes all input sections with the exceptions of SHF_MERGE and 373 // .eh_frame. It also includes the synthetic sections themselves. 374 class InputSection : public InputSectionBase { 375 public: 376 InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, 377 ArrayRef<uint8_t> data, StringRef name, Kind k = Regular); 378 template <class ELFT> 379 InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 380 StringRef name); 381 382 static bool classof(const SectionBase *s) { 383 return s->kind() == SectionBase::Regular || 384 s->kind() == SectionBase::Synthetic; 385 } 386 387 // Write this section to a mmap'ed file, assuming Buf is pointing to 388 // beginning of the output section. 389 template <class ELFT> void writeTo(uint8_t *buf); 390 391 OutputSection *getParent() const { 392 return reinterpret_cast<OutputSection *>(parent); 393 } 394 395 // This variable has two usages. Initially, it represents an index in the 396 // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER 397 // sections. After assignAddresses is called, it represents the offset from 398 // the beginning of the output section this section was assigned to. 399 uint64_t outSecOff = 0; 400 401 InputSectionBase *getRelocatedSection() const; 402 403 template <class ELFT, class RelTy> 404 void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef<RelTy> rels); 405 406 // Points to the canonical section. If ICF folds two sections, repl pointer of 407 // one section points to the other. 408 InputSection *repl = this; 409 410 // Used by ICF. 411 uint32_t eqClass[2] = {0, 0}; 412 413 // Called by ICF to merge two input sections. 414 void replace(InputSection *other); 415 416 static InputSection discarded; 417 418 private: 419 template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); 420 421 template <class ELFT, class RelTy, class RelIt> 422 void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); 423 424 template <class ELFT> void copyShtGroup(uint8_t *buf); 425 }; 426 427 static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); 428 429 class SyntheticSection : public InputSection { 430 public: 431 SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign, 432 StringRef name) 433 : InputSection(ctx.internalFile, flags, type, addralign, {}, name, 434 InputSectionBase::Synthetic) {} 435 436 virtual ~SyntheticSection() = default; 437 virtual size_t getSize() const = 0; 438 virtual bool updateAllocSize() { return false; } 439 // If the section has the SHF_ALLOC flag and the size may be changed if 440 // thunks are added, update the section size. 441 virtual bool isNeeded() const { return true; } 442 virtual void finalizeContents() {} 443 virtual void writeTo(uint8_t *buf) = 0; 444 445 static bool classof(const SectionBase *sec) { 446 return sec->kind() == InputSectionBase::Synthetic; 447 } 448 }; 449 450 inline bool isDebugSection(const InputSectionBase &sec) { 451 return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && 452 sec.name.starts_with(".debug"); 453 } 454 455 // The set of TOC entries (.toc + addend) for which we should not apply 456 // toc-indirect to toc-relative relaxation. const Symbol * refers to the 457 // STT_SECTION symbol associated to the .toc input section. 458 extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; 459 460 } // namespace elf 461 462 std::string toString(const elf::InputSectionBase *); 463 } // namespace lld 464 465 #endif 466