//===- InputSection.h -------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLD_MACHO_INPUT_SECTION_H #define LLD_MACHO_INPUT_SECTION_H #include "Config.h" #include "Relocations.h" #include "Symbols.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/BinaryFormat/MachO.h" namespace lld { namespace macho { class InputFile; class OutputSection; class InputSection { public: enum Kind { ConcatKind, CStringLiteralKind, WordLiteralKind, }; Kind kind() const { return shared->sectionKind; } virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } virtual bool empty() const { return data.empty(); } InputFile *getFile() const { return shared->file; } StringRef getName() const { return shared->name; } StringRef getSegName() const { return shared->segname; } uint32_t getFlags() const { return shared->flags; } uint64_t getFileSize() const; // Translates \p off -- an offset relative to this InputSection -- into an // offset from the beginning of its parent OutputSection. virtual uint64_t getOffset(uint64_t off) const = 0; // The offset from the beginning of the file. uint64_t getVA(uint64_t off) const; // Whether the data at \p off in this InputSection is live. virtual bool isLive(uint64_t off) const = 0; virtual void markLive(uint64_t off) = 0; virtual InputSection *canonical() { return this; } virtual const InputSection *canonical() const { return this; } OutputSection *parent = nullptr; uint32_t align = 1; // is address assigned? bool isFinal = false; ArrayRef data; std::vector relocs; // The symbols that belong to this InputSection, sorted by value. With // .subsections_via_symbols, there is typically only one element here. llvm::TinyPtrVector symbols; protected: // The fields in this struct are immutable. Since we create a lot of // InputSections with identical values for them (due to // .subsections_via_symbols), factoring them out into a shared struct reduces // memory consumption and makes copying cheaper. struct Shared { InputFile *file; StringRef name; StringRef segname; uint32_t flags; Kind sectionKind; Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags, Kind kind) : file(file), name(name), segname(segname), flags(flags), sectionKind(kind) {} }; InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file, ArrayRef data, uint32_t align, uint32_t flags) : align(align), data(data), shared(make(file, name, segname, flags, kind)) {} InputSection(const InputSection &rhs) : align(rhs.align), data(rhs.data), shared(rhs.shared) {} const Shared *const shared; }; // ConcatInputSections are combined into (Concat)OutputSections through simple // concatenation, in contrast with literal sections which may have their // contents merged before output. class ConcatInputSection final : public InputSection { public: ConcatInputSection(StringRef segname, StringRef name, InputFile *file, ArrayRef data, uint32_t align = 1, uint32_t flags = 0) : InputSection(ConcatKind, segname, name, file, data, align, flags) {} ConcatInputSection(StringRef segname, StringRef name) : ConcatInputSection(segname, name, /*file=*/nullptr, /*data=*/{}, /*align=*/1, /*flags=*/0) {} uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } uint64_t getVA() const { return InputSection::getVA(0); } // ConcatInputSections are entirely live or dead, so the offset is irrelevant. bool isLive(uint64_t off) const override { return live; } void markLive(uint64_t off) override { live = true; } bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } bool isHashableForICF() const; void hashForICF(); void writeTo(uint8_t *buf); void foldIdentical(ConcatInputSection *redundant); ConcatInputSection *canonical() override { return replacement ? replacement : this; } const InputSection *canonical() const override { return replacement ? replacement : this; } static bool classof(const InputSection *isec) { return isec->kind() == ConcatKind; } // Points to the surviving section after this one is folded by ICF ConcatInputSection *replacement = nullptr; // Equivalence-class ID for ICF uint64_t icfEqClass[2] = {0, 0}; // With subsections_via_symbols, most symbols have their own InputSection, // and for weak symbols (e.g. from inline functions), only the // InputSection from one translation unit will make it to the output, // while all copies in other translation units are coalesced into the // first and not copied to the output. bool wasCoalesced = false; bool live = !config->deadStrip; bool hasCallSites = false; // This variable has two usages. Initially, it represents the input order. // After assignAddresses is called, it represents the offset from the // beginning of the output section this section was assigned to. uint64_t outSecOff = 0; }; // Helper functions to make it easy to sprinkle asserts. inline bool shouldOmitFromOutput(InputSection *isec) { return isa(isec) && cast(isec)->shouldOmitFromOutput(); } inline bool isCoalescedWeak(InputSection *isec) { return isa(isec) && cast(isec)->isCoalescedWeak(); } // We allocate a lot of these and binary search on them, so they should be as // compact as possible. Hence the use of 31 rather than 64 bits for the hash. struct StringPiece { // Offset from the start of the containing input section. uint32_t inSecOff; uint32_t live : 1; // Only set if deduplicating literals uint32_t hash : 31; // Offset from the start of the containing output section. uint64_t outSecOff = 0; StringPiece(uint64_t off, uint32_t hash) : inSecOff(off), live(!config->deadStrip), hash(hash) {} }; static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!"); // CStringInputSections are composed of multiple null-terminated string // literals, which we represent using StringPieces. These literals can be // deduplicated and tail-merged, so translating offsets between the input and // outputs sections is more complicated. // // NOTE: One significant difference between LLD and ld64 is that we merge all // cstring literals, even those referenced directly by non-private symbols. // ld64 is more conservative and does not do that. This was mostly done for // implementation simplicity; if we find programs that need the more // conservative behavior we can certainly implement that. class CStringInputSection final : public InputSection { public: CStringInputSection(StringRef segname, StringRef name, InputFile *file, ArrayRef data, uint32_t align, uint32_t flags) : InputSection(CStringLiteralKind, segname, name, file, data, align, flags) {} uint64_t getOffset(uint64_t off) const override; bool isLive(uint64_t off) const override { return getStringPiece(off).live; } void markLive(uint64_t off) override { getStringPiece(off).live = true; } // Find the StringPiece that contains this offset. StringPiece &getStringPiece(uint64_t off); const StringPiece &getStringPiece(uint64_t off) const; // Split at each null byte. void splitIntoPieces(); LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef getStringRef(size_t i) const { size_t begin = pieces[i].inSecOff; size_t end = (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff; return toStringRef(data.slice(begin, end - begin)); } // Returns i'th piece as a CachedHashStringRef. This function is very hot when // string merging is enabled, so we want to inline. LLVM_ATTRIBUTE_ALWAYS_INLINE llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { assert(config->dedupLiterals); return {getStringRef(i), pieces[i].hash}; } static bool classof(const InputSection *isec) { return isec->kind() == CStringLiteralKind; } std::vector pieces; }; class WordLiteralInputSection final : public InputSection { public: WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file, ArrayRef data, uint32_t align, uint32_t flags); uint64_t getOffset(uint64_t off) const override; bool isLive(uint64_t off) const override { return live[off >> power2LiteralSize]; } void markLive(uint64_t off) override { live[off >> power2LiteralSize] = true; } static bool classof(const InputSection *isec) { return isec->kind() == WordLiteralKind; } private: unsigned power2LiteralSize; // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. llvm::BitVector live; }; inline uint8_t sectionType(uint32_t flags) { return flags & llvm::MachO::SECTION_TYPE; } inline bool isZeroFill(uint32_t flags) { return llvm::MachO::isVirtualSection(sectionType(flags)); } inline bool isThreadLocalVariables(uint32_t flags) { return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; } // These sections contain the data for initializing thread-local variables. inline bool isThreadLocalData(uint32_t flags) { return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; } inline bool isDebugSection(uint32_t flags) { return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == llvm::MachO::S_ATTR_DEBUG; } inline bool isWordLiteralSection(uint32_t flags) { return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; } bool isCodeSection(const InputSection *); bool isCfStringSection(const InputSection *); extern std::vector inputSections; namespace section_names { constexpr const char authGot[] = "__auth_got"; constexpr const char authPtr[] = "__auth_ptr"; constexpr const char binding[] = "__binding"; constexpr const char bitcodeBundle[] = "__bundle"; constexpr const char cString[] = "__cstring"; constexpr const char cfString[] = "__cfstring"; constexpr const char codeSignature[] = "__code_signature"; constexpr const char common[] = "__common"; constexpr const char compactUnwind[] = "__compact_unwind"; constexpr const char data[] = "__data"; constexpr const char debugAbbrev[] = "__debug_abbrev"; constexpr const char debugInfo[] = "__debug_info"; constexpr const char debugStr[] = "__debug_str"; constexpr const char ehFrame[] = "__eh_frame"; constexpr const char gccExceptTab[] = "__gcc_except_tab"; constexpr const char export_[] = "__export"; constexpr const char dataInCode[] = "__data_in_code"; constexpr const char functionStarts[] = "__func_starts"; constexpr const char got[] = "__got"; constexpr const char header[] = "__mach_header"; constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; constexpr const char const_[] = "__const"; constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; constexpr const char lazyBinding[] = "__lazy_binding"; constexpr const char literals[] = "__literals"; constexpr const char moduleInitFunc[] = "__mod_init_func"; constexpr const char moduleTermFunc[] = "__mod_term_func"; constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; constexpr const char objcCatList[] = "__objc_catlist"; constexpr const char objcClassList[] = "__objc_classlist"; constexpr const char objcConst[] = "__objc_const"; constexpr const char objcImageInfo[] = "__objc_imageinfo"; constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; constexpr const char objcProtoList[] = "__objc_protolist"; constexpr const char pageZero[] = "__pagezero"; constexpr const char pointers[] = "__pointers"; constexpr const char rebase[] = "__rebase"; constexpr const char staticInit[] = "__StaticInit"; constexpr const char stringTable[] = "__string_table"; constexpr const char stubHelper[] = "__stub_helper"; constexpr const char stubs[] = "__stubs"; constexpr const char swift[] = "__swift"; constexpr const char symbolTable[] = "__symbol_table"; constexpr const char textCoalNt[] = "__textcoal_nt"; constexpr const char text[] = "__text"; constexpr const char threadPtrs[] = "__thread_ptrs"; constexpr const char threadVars[] = "__thread_vars"; constexpr const char unwindInfo[] = "__unwind_info"; constexpr const char weakBinding[] = "__weak_binding"; constexpr const char zeroFill[] = "__zerofill"; } // namespace section_names } // namespace macho std::string toString(const macho::InputSection *); } // namespace lld #endif