1 //===- InputSection.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_SECTION_H 10 #define LLD_ELF_INPUT_SECTION_H 11 12 #include "Config.h" 13 #include "Relocations.h" 14 #include "lld/Common/CommonLinkerContext.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/TinyPtrVector.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Support/Compiler.h" 23 24 namespace lld { 25 namespace elf { 26 27 class InputFile; 28 class Symbol; 29 30 class Defined; 31 struct Partition; 32 class SyntheticSection; 33 template <class ELFT> class ObjFile; 34 class OutputSection; 35 36 LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions; 37 38 // Returned by InputSectionBase::relsOrRelas. At most one member is empty. 39 template <class ELFT> struct RelsOrRelas { 40 Relocs<typename ELFT::Rel> rels; 41 Relocs<typename ELFT::Rela> relas; 42 Relocs<typename ELFT::Crel> crels; 43 bool areRelocsRel() const { return rels.size(); } 44 bool areRelocsCrel() const { return crels.size(); } 45 }; 46 47 #define invokeOnRelocs(sec, f, ...) \ 48 { \ 49 const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ 50 if (rs.areRelocsCrel()) \ 51 f(__VA_ARGS__, rs.crels); \ 52 else if (rs.areRelocsRel()) \ 53 f(__VA_ARGS__, rs.rels); \ 54 else \ 55 f(__VA_ARGS__, rs.relas); \ 56 } 57 58 // This is the base class of all sections that lld handles. Some are sections in 59 // input files, some are sections in the produced output file and some exist 60 // just as a convenience for implementing special ways of combining some 61 // sections. 62 class SectionBase { 63 public: 64 enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output }; 65 66 Kind kind() const { return (Kind)sectionKind; } 67 68 LLVM_PREFERRED_TYPE(Kind) 69 uint8_t sectionKind : 3; 70 71 // The next two bit fields are only used by InputSectionBase, but we 72 // put them here so the struct packs better. 73 74 LLVM_PREFERRED_TYPE(bool) 75 uint8_t bss : 1; 76 77 // Set for sections that should not be folded by ICF. 78 LLVM_PREFERRED_TYPE(bool) 79 uint8_t keepUnique : 1; 80 81 uint8_t partition = 1; 82 uint32_t type; 83 StringRef name; 84 85 // The 1-indexed partition that this section is assigned to by the garbage 86 // collector, or 0 if this section is dead. Normally there is only one 87 // partition, so this will either be 0 or 1. 88 elf::Partition &getPartition() const; 89 90 // These corresponds to the fields in Elf_Shdr. 91 uint64_t flags; 92 uint32_t addralign; 93 uint32_t entsize; 94 uint32_t link; 95 uint32_t info; 96 97 OutputSection *getOutputSection(); 98 const OutputSection *getOutputSection() const { 99 return const_cast<SectionBase *>(this)->getOutputSection(); 100 } 101 102 // Translate an offset in the input section to an offset in the output 103 // section. 104 uint64_t getOffset(uint64_t offset) const; 105 106 uint64_t getVA(uint64_t offset = 0) const; 107 108 bool isLive() const { return partition != 0; } 109 void markLive() { partition = 1; } 110 void markDead() { partition = 0; } 111 112 protected: 113 constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, 114 uint32_t entsize, uint32_t addralign, uint32_t type, 115 uint32_t info, uint32_t link) 116 : sectionKind(sectionKind), bss(false), keepUnique(false), type(type), 117 name(name), flags(flags), addralign(addralign), entsize(entsize), 118 link(link), info(info) {} 119 }; 120 121 struct SymbolAnchor { 122 uint64_t offset; 123 Defined *d; 124 bool end; // true for the anchor of st_value+st_size 125 }; 126 127 struct RelaxAux { 128 // This records symbol start and end offsets which will be adjusted according 129 // to the nearest relocDeltas element. 130 SmallVector<SymbolAnchor, 0> anchors; 131 // For relocations[i], the actual offset is 132 // r_offset - (i ? relocDeltas[i-1] : 0). 133 std::unique_ptr<uint32_t[]> relocDeltas; 134 // For relocations[i], the actual type is relocTypes[i]. 135 std::unique_ptr<RelType[]> relocTypes; 136 SmallVector<uint32_t, 0> writes; 137 }; 138 139 // This corresponds to a section of an input file. 140 class InputSectionBase : public SectionBase { 141 public: 142 template <class ELFT> 143 InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &header, 144 StringRef name, Kind sectionKind); 145 146 InputSectionBase(InputFile *file, uint64_t flags, uint32_t type, 147 uint64_t entsize, uint32_t link, uint32_t info, 148 uint32_t addralign, ArrayRef<uint8_t> data, StringRef name, 149 Kind sectionKind); 150 151 static bool classof(const SectionBase *s) { return s->kind() != Output; } 152 153 // The file which contains this section. Its dynamic type is usually 154 // ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic 155 // section). 156 InputFile *file; 157 158 // Input sections are part of an output section. Special sections 159 // like .eh_frame and merge sections are first combined into a 160 // synthetic section that is then added to an output section. In all 161 // cases this points one level up. 162 SectionBase *parent = nullptr; 163 164 // Section index of the relocation section if exists. 165 uint32_t relSecIdx = 0; 166 167 // Getter when the dynamic type is ObjFile<ELFT>. 168 template <class ELFT> ObjFile<ELFT> *getFile() const { 169 return cast<ObjFile<ELFT>>(file); 170 } 171 172 // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to 173 // indicate the number of bytes which is not counted in the size. This should 174 // be reset to zero after uses. 175 uint32_t bytesDropped = 0; 176 177 mutable bool compressed = false; 178 179 // Whether this section is SHT_CREL and has been decoded to RELA by 180 // relsOrRelas. 181 bool decodedCrel = false; 182 183 // Whether the section needs to be padded with a NOP filler due to 184 // deleteFallThruJmpInsn. 185 bool nopFiller = false; 186 187 void drop_back(unsigned num) { 188 assert(bytesDropped + num < 256); 189 bytesDropped += num; 190 } 191 192 void push_back(uint64_t num) { 193 assert(bytesDropped >= num); 194 bytesDropped -= num; 195 } 196 197 mutable const uint8_t *content_; 198 uint64_t size; 199 200 void trim() { 201 if (bytesDropped) { 202 size -= bytesDropped; 203 bytesDropped = 0; 204 } 205 } 206 207 ArrayRef<uint8_t> content() const { 208 return ArrayRef<uint8_t>(content_, size); 209 } 210 ArrayRef<uint8_t> contentMaybeDecompress() const { 211 if (compressed) 212 decompress(); 213 return content(); 214 } 215 216 // The next member in the section group if this section is in a group. This is 217 // used by --gc-sections. 218 InputSectionBase *nextInSectionGroup = nullptr; 219 220 template <class ELFT> 221 RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; 222 223 // InputSections that are dependent on us (reverse dependency for GC) 224 llvm::TinyPtrVector<InputSection *> dependentSections; 225 226 // Returns the size of this section (even if this is a common or BSS.) 227 size_t getSize() const; 228 229 InputSection *getLinkOrderDep() const; 230 231 // Get a symbol that encloses this offset from within the section. If type is 232 // not zero, return a symbol with the specified type. 233 Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; 234 Defined *getEnclosingFunction(uint64_t offset) const { 235 return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC); 236 } 237 238 // Returns a source location string. Used to construct an error message. 239 std::string getLocation(uint64_t offset) const; 240 std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; 241 std::string getObjMsg(uint64_t offset) const; 242 243 // Each section knows how to relocate itself. These functions apply 244 // relocations, assuming that Buf points to this section's copy in 245 // the mmap'ed output buffer. 246 template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd); 247 static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, 248 int64_t A, uint64_t P, const Symbol &Sym, 249 RelExpr Expr); 250 251 // The native ELF reloc data type is not very convenient to handle. 252 // So we convert ELF reloc records to our own records in Relocations.cpp. 253 // This vector contains such "cooked" relocations. 254 SmallVector<Relocation, 0> relocations; 255 256 void addReloc(const Relocation &r) { relocations.push_back(r); } 257 MutableArrayRef<Relocation> relocs() { return relocations; } 258 ArrayRef<Relocation> relocs() const { return relocations; } 259 260 union { 261 // These are modifiers to jump instructions that are necessary when basic 262 // block sections are enabled. Basic block sections creates opportunities 263 // to relax jump instructions at basic block boundaries after reordering the 264 // basic blocks. 265 JumpInstrMod *jumpInstrMod = nullptr; 266 267 // Auxiliary information for RISC-V and LoongArch linker relaxation. 268 // They do not use jumpInstrMod. 269 RelaxAux *relaxAux; 270 271 // The compressed content size when `compressed` is true. 272 size_t compressedSize; 273 }; 274 275 // A function compiled with -fsplit-stack calling a function 276 // compiled without -fsplit-stack needs its prologue adjusted. Find 277 // such functions and adjust their prologues. This is very similar 278 // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more 279 // information. 280 template <typename ELFT> 281 void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end); 282 283 284 template <typename T> llvm::ArrayRef<T> getDataAs() const { 285 size_t s = content().size(); 286 assert(s % sizeof(T) == 0); 287 return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); 288 } 289 290 protected: 291 template <typename ELFT> 292 void parseCompressedHeader(); 293 void decompress() const; 294 }; 295 296 // SectionPiece represents a piece of splittable section contents. 297 // We allocate a lot of these and binary search on them. This means that they 298 // have to be as compact as possible, which is why we don't store the size (can 299 // be found by looking at the next one). 300 struct SectionPiece { 301 SectionPiece() = default; 302 SectionPiece(size_t off, uint32_t hash, bool live) 303 : inputOff(off), live(live), hash(hash >> 1) {} 304 305 uint32_t inputOff; 306 LLVM_PREFERRED_TYPE(bool) 307 uint32_t live : 1; 308 uint32_t hash : 31; 309 uint64_t outputOff = 0; 310 }; 311 312 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); 313 314 // This corresponds to a SHF_MERGE section of an input file. 315 class MergeInputSection : public InputSectionBase { 316 public: 317 template <class ELFT> 318 MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 319 StringRef name); 320 MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize, 321 ArrayRef<uint8_t> data, StringRef name); 322 323 static bool classof(const SectionBase *s) { return s->kind() == Merge; } 324 void splitIntoPieces(); 325 326 // Translate an offset in the input section to an offset in the parent 327 // MergeSyntheticSection. 328 uint64_t getParentOffset(uint64_t offset) const; 329 330 // Splittable sections are handled as a sequence of data 331 // rather than a single large blob of data. 332 SmallVector<SectionPiece, 0> pieces; 333 334 // Returns I'th piece's data. This function is very hot when 335 // string merging is enabled, so we want to inline. 336 LLVM_ATTRIBUTE_ALWAYS_INLINE 337 llvm::CachedHashStringRef getData(size_t i) const { 338 size_t begin = pieces[i].inputOff; 339 size_t end = 340 (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; 341 return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash}; 342 } 343 344 // Returns the SectionPiece at a given input section offset. 345 SectionPiece &getSectionPiece(uint64_t offset); 346 const SectionPiece &getSectionPiece(uint64_t offset) const { 347 return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); 348 } 349 350 SyntheticSection *getParent() const { 351 return cast_or_null<SyntheticSection>(parent); 352 } 353 354 private: 355 void splitStrings(StringRef s, size_t size); 356 void splitNonStrings(ArrayRef<uint8_t> a, size_t size); 357 }; 358 359 struct EhSectionPiece { 360 EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, 361 unsigned firstRelocation) 362 : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} 363 364 ArrayRef<uint8_t> data() const { 365 return {sec->content().data() + this->inputOff, size}; 366 } 367 368 size_t inputOff; 369 ssize_t outputOff = -1; 370 InputSectionBase *sec; 371 uint32_t size; 372 unsigned firstRelocation; 373 }; 374 375 // This corresponds to a .eh_frame section of an input file. 376 class EhInputSection : public InputSectionBase { 377 public: 378 template <class ELFT> 379 EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 380 StringRef name); 381 static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } 382 template <class ELFT> void split(); 383 template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); 384 385 // Splittable sections are handled as a sequence of data 386 // rather than a single large blob of data. 387 SmallVector<EhSectionPiece, 0> cies, fdes; 388 389 SyntheticSection *getParent() const; 390 uint64_t getParentOffset(uint64_t offset) const; 391 }; 392 393 // This is a section that is added directly to an output section 394 // instead of needing special combination via a synthetic section. This 395 // includes all input sections with the exceptions of SHF_MERGE and 396 // .eh_frame. It also includes the synthetic sections themselves. 397 class InputSection : public InputSectionBase { 398 public: 399 InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign, 400 ArrayRef<uint8_t> data, StringRef name, Kind k = Regular); 401 template <class ELFT> 402 InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 403 StringRef name); 404 405 static bool classof(const SectionBase *s) { 406 return s->kind() == SectionBase::Regular || 407 s->kind() == SectionBase::Synthetic || 408 s->kind() == SectionBase::Spill; 409 } 410 411 // Write this section to a mmap'ed file, assuming Buf is pointing to 412 // beginning of the output section. 413 template <class ELFT> void writeTo(uint8_t *buf); 414 415 OutputSection *getParent() const { 416 return reinterpret_cast<OutputSection *>(parent); 417 } 418 419 // This variable has two usages. Initially, it represents an index in the 420 // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER 421 // sections. After assignAddresses is called, it represents the offset from 422 // the beginning of the output section this section was assigned to. 423 uint64_t outSecOff = 0; 424 425 InputSectionBase *getRelocatedSection() const; 426 427 template <class ELFT, class RelTy> 428 void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels); 429 430 // Points to the canonical section. If ICF folds two sections, repl pointer of 431 // one section points to the other. 432 InputSection *repl = this; 433 434 // Used by ICF. 435 uint32_t eqClass[2] = {0, 0}; 436 437 // Called by ICF to merge two input sections. 438 void replace(InputSection *other); 439 440 static InputSection discarded; 441 442 private: 443 template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); 444 445 template <class ELFT, class RelTy, class RelIt> 446 void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); 447 448 template <class ELFT> void copyShtGroup(uint8_t *buf); 449 }; 450 451 // A marker for a potential spill location for another input section. This 452 // broadly acts as if it were the original section until address assignment. 453 // Then it is either replaced with the real input section or removed. 454 class PotentialSpillSection : public InputSection { 455 public: 456 // The containing input section description; used to quickly replace this stub 457 // with the actual section. 458 InputSectionDescription *isd; 459 460 // Next potential spill location for the same source input section. 461 PotentialSpillSection *next = nullptr; 462 463 PotentialSpillSection(const InputSectionBase &source, 464 InputSectionDescription &isd); 465 466 static bool classof(const SectionBase *sec) { 467 return sec->kind() == InputSectionBase::Spill; 468 } 469 }; 470 471 static_assert(sizeof(InputSection) <= 160, "InputSection is too big"); 472 473 class SyntheticSection : public InputSection { 474 public: 475 SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign, 476 StringRef name) 477 : InputSection(ctx.internalFile, flags, type, addralign, {}, name, 478 InputSectionBase::Synthetic) {} 479 480 virtual ~SyntheticSection() = default; 481 virtual size_t getSize() const = 0; 482 virtual bool updateAllocSize() { return false; } 483 // If the section has the SHF_ALLOC flag and the size may be changed if 484 // thunks are added, update the section size. 485 virtual bool isNeeded() const { return true; } 486 virtual void finalizeContents() {} 487 virtual void writeTo(uint8_t *buf) = 0; 488 489 static bool classof(const SectionBase *sec) { 490 return sec->kind() == InputSectionBase::Synthetic; 491 } 492 }; 493 494 inline bool isStaticRelSecType(uint32_t type) { 495 return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || 496 type == llvm::ELF::SHT_REL; 497 } 498 499 inline bool isDebugSection(const InputSectionBase &sec) { 500 return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && 501 sec.name.starts_with(".debug"); 502 } 503 504 // The set of TOC entries (.toc + addend) for which we should not apply 505 // toc-indirect to toc-relative relaxation. const Symbol * refers to the 506 // STT_SECTION symbol associated to the .toc input section. 507 extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax; 508 509 } // namespace elf 510 511 std::string toString(const elf::InputSectionBase *); 512 } // namespace lld 513 514 #endif 515