1 //===- InputSection.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_SECTION_H 10 #define LLD_MACHO_INPUT_SECTION_H 11 12 #include "Config.h" 13 #include "Relocations.h" 14 #include "Symbols.h" 15 16 #include "lld/Common/LLVM.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/TinyPtrVector.h" 22 #include "llvm/BinaryFormat/MachO.h" 23 24 namespace lld { 25 namespace macho { 26 27 class InputFile; 28 class OutputSection; 29 30 class InputSection { 31 public: 32 enum Kind { 33 ConcatKind, 34 CStringLiteralKind, 35 WordLiteralKind, 36 }; 37 38 Kind kind() const { return shared->sectionKind; } 39 virtual ~InputSection() = default; 40 virtual uint64_t getSize() const { return data.size(); } 41 virtual bool empty() const { return data.empty(); } 42 InputFile *getFile() const { return shared->file; } 43 StringRef getName() const { return shared->name; } 44 StringRef getSegName() const { return shared->segname; } 45 uint32_t getFlags() const { return shared->flags; } 46 uint64_t getFileSize() const; 47 // Translates \p off -- an offset relative to this InputSection -- into an 48 // offset from the beginning of its parent OutputSection. 49 virtual uint64_t getOffset(uint64_t off) const = 0; 50 // The offset from the beginning of the file. 51 uint64_t getVA(uint64_t off) const; 52 // Whether the data at \p off in this InputSection is live. 53 virtual bool isLive(uint64_t off) const = 0; 54 virtual void markLive(uint64_t off) = 0; 55 virtual InputSection *canonical() { return this; } 56 virtual const InputSection *canonical() const { return this; } 57 58 OutputSection *parent = nullptr; 59 60 uint32_t align = 1; 61 // is address assigned? 62 bool isFinal = false; 63 64 ArrayRef<uint8_t> data; 65 std::vector<Reloc> relocs; 66 // The symbols that belong to this InputSection, sorted by value. With 67 // .subsections_via_symbols, there is typically only one element here. 68 llvm::TinyPtrVector<Defined *> symbols; 69 70 protected: 71 // The fields in this struct are immutable. Since we create a lot of 72 // InputSections with identical values for them (due to 73 // .subsections_via_symbols), factoring them out into a shared struct reduces 74 // memory consumption and makes copying cheaper. 75 struct Shared { 76 InputFile *file; 77 StringRef name; 78 StringRef segname; 79 uint32_t flags; 80 Kind sectionKind; 81 Shared(InputFile *file, StringRef name, StringRef segname, uint32_t flags, 82 Kind kind) 83 : file(file), name(name), segname(segname), flags(flags), 84 sectionKind(kind) {} 85 }; 86 87 InputSection(Kind kind, StringRef segname, StringRef name, InputFile *file, 88 ArrayRef<uint8_t> data, uint32_t align, uint32_t flags) 89 : align(align), data(data), 90 shared(make<Shared>(file, name, segname, flags, kind)) {} 91 92 InputSection(const InputSection &rhs) 93 : align(rhs.align), data(rhs.data), shared(rhs.shared) {} 94 95 const Shared *const shared; 96 }; 97 98 // ConcatInputSections are combined into (Concat)OutputSections through simple 99 // concatenation, in contrast with literal sections which may have their 100 // contents merged before output. 101 class ConcatInputSection final : public InputSection { 102 public: 103 ConcatInputSection(StringRef segname, StringRef name, InputFile *file, 104 ArrayRef<uint8_t> data, uint32_t align = 1, 105 uint32_t flags = 0) 106 : InputSection(ConcatKind, segname, name, file, data, align, flags) {} 107 108 ConcatInputSection(StringRef segname, StringRef name) 109 : ConcatInputSection(segname, name, /*file=*/nullptr, 110 /*data=*/{}, 111 /*align=*/1, /*flags=*/0) {} 112 113 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } 114 uint64_t getVA() const { return InputSection::getVA(0); } 115 // ConcatInputSections are entirely live or dead, so the offset is irrelevant. 116 bool isLive(uint64_t off) const override { return live; } 117 void markLive(uint64_t off) override { live = true; } 118 bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } 119 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } 120 bool isHashableForICF() const; 121 void hashForICF(); 122 void writeTo(uint8_t *buf); 123 124 void foldIdentical(ConcatInputSection *redundant); 125 ConcatInputSection *canonical() override { 126 return replacement ? replacement : this; 127 } 128 const InputSection *canonical() const override { 129 return replacement ? replacement : this; 130 } 131 132 static bool classof(const InputSection *isec) { 133 return isec->kind() == ConcatKind; 134 } 135 136 // Points to the surviving section after this one is folded by ICF 137 ConcatInputSection *replacement = nullptr; 138 // Equivalence-class ID for ICF 139 uint64_t icfEqClass[2] = {0, 0}; 140 141 // With subsections_via_symbols, most symbols have their own InputSection, 142 // and for weak symbols (e.g. from inline functions), only the 143 // InputSection from one translation unit will make it to the output, 144 // while all copies in other translation units are coalesced into the 145 // first and not copied to the output. 146 bool wasCoalesced = false; 147 bool live = !config->deadStrip; 148 bool hasCallSites = false; 149 // This variable has two usages. Initially, it represents the input order. 150 // After assignAddresses is called, it represents the offset from the 151 // beginning of the output section this section was assigned to. 152 uint64_t outSecOff = 0; 153 }; 154 155 // Helper functions to make it easy to sprinkle asserts. 156 157 inline bool shouldOmitFromOutput(InputSection *isec) { 158 return isa<ConcatInputSection>(isec) && 159 cast<ConcatInputSection>(isec)->shouldOmitFromOutput(); 160 } 161 162 inline bool isCoalescedWeak(InputSection *isec) { 163 return isa<ConcatInputSection>(isec) && 164 cast<ConcatInputSection>(isec)->isCoalescedWeak(); 165 } 166 167 // We allocate a lot of these and binary search on them, so they should be as 168 // compact as possible. Hence the use of 31 rather than 64 bits for the hash. 169 struct StringPiece { 170 // Offset from the start of the containing input section. 171 uint32_t inSecOff; 172 uint32_t live : 1; 173 // Only set if deduplicating literals 174 uint32_t hash : 31; 175 // Offset from the start of the containing output section. 176 uint64_t outSecOff = 0; 177 178 StringPiece(uint64_t off, uint32_t hash) 179 : inSecOff(off), live(!config->deadStrip), hash(hash) {} 180 }; 181 182 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!"); 183 184 // CStringInputSections are composed of multiple null-terminated string 185 // literals, which we represent using StringPieces. These literals can be 186 // deduplicated and tail-merged, so translating offsets between the input and 187 // outputs sections is more complicated. 188 // 189 // NOTE: One significant difference between LLD and ld64 is that we merge all 190 // cstring literals, even those referenced directly by non-private symbols. 191 // ld64 is more conservative and does not do that. This was mostly done for 192 // implementation simplicity; if we find programs that need the more 193 // conservative behavior we can certainly implement that. 194 class CStringInputSection final : public InputSection { 195 public: 196 CStringInputSection(StringRef segname, StringRef name, InputFile *file, 197 ArrayRef<uint8_t> data, uint32_t align, uint32_t flags) 198 : InputSection(CStringLiteralKind, segname, name, file, data, align, 199 flags) {} 200 uint64_t getOffset(uint64_t off) const override; 201 bool isLive(uint64_t off) const override { return getStringPiece(off).live; } 202 void markLive(uint64_t off) override { getStringPiece(off).live = true; } 203 // Find the StringPiece that contains this offset. 204 StringPiece &getStringPiece(uint64_t off); 205 const StringPiece &getStringPiece(uint64_t off) const; 206 // Split at each null byte. 207 void splitIntoPieces(); 208 209 LLVM_ATTRIBUTE_ALWAYS_INLINE 210 StringRef getStringRef(size_t i) const { 211 size_t begin = pieces[i].inSecOff; 212 size_t end = 213 (pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff; 214 return toStringRef(data.slice(begin, end - begin)); 215 } 216 217 // Returns i'th piece as a CachedHashStringRef. This function is very hot when 218 // string merging is enabled, so we want to inline. 219 LLVM_ATTRIBUTE_ALWAYS_INLINE 220 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { 221 assert(config->dedupLiterals); 222 return {getStringRef(i), pieces[i].hash}; 223 } 224 225 static bool classof(const InputSection *isec) { 226 return isec->kind() == CStringLiteralKind; 227 } 228 229 std::vector<StringPiece> pieces; 230 }; 231 232 class WordLiteralInputSection final : public InputSection { 233 public: 234 WordLiteralInputSection(StringRef segname, StringRef name, InputFile *file, 235 ArrayRef<uint8_t> data, uint32_t align, 236 uint32_t flags); 237 uint64_t getOffset(uint64_t off) const override; 238 bool isLive(uint64_t off) const override { 239 return live[off >> power2LiteralSize]; 240 } 241 void markLive(uint64_t off) override { 242 live[off >> power2LiteralSize] = true; 243 } 244 245 static bool classof(const InputSection *isec) { 246 return isec->kind() == WordLiteralKind; 247 } 248 249 private: 250 unsigned power2LiteralSize; 251 // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. 252 llvm::BitVector live; 253 }; 254 255 inline uint8_t sectionType(uint32_t flags) { 256 return flags & llvm::MachO::SECTION_TYPE; 257 } 258 259 inline bool isZeroFill(uint32_t flags) { 260 return llvm::MachO::isVirtualSection(sectionType(flags)); 261 } 262 263 inline bool isThreadLocalVariables(uint32_t flags) { 264 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; 265 } 266 267 // These sections contain the data for initializing thread-local variables. 268 inline bool isThreadLocalData(uint32_t flags) { 269 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || 270 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; 271 } 272 273 inline bool isDebugSection(uint32_t flags) { 274 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == 275 llvm::MachO::S_ATTR_DEBUG; 276 } 277 278 inline bool isWordLiteralSection(uint32_t flags) { 279 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || 280 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || 281 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; 282 } 283 284 bool isCodeSection(const InputSection *); 285 286 bool isCfStringSection(const InputSection *); 287 288 extern std::vector<ConcatInputSection *> inputSections; 289 290 namespace section_names { 291 292 constexpr const char authGot[] = "__auth_got"; 293 constexpr const char authPtr[] = "__auth_ptr"; 294 constexpr const char binding[] = "__binding"; 295 constexpr const char bitcodeBundle[] = "__bundle"; 296 constexpr const char cString[] = "__cstring"; 297 constexpr const char cfString[] = "__cfstring"; 298 constexpr const char codeSignature[] = "__code_signature"; 299 constexpr const char common[] = "__common"; 300 constexpr const char compactUnwind[] = "__compact_unwind"; 301 constexpr const char data[] = "__data"; 302 constexpr const char debugAbbrev[] = "__debug_abbrev"; 303 constexpr const char debugInfo[] = "__debug_info"; 304 constexpr const char debugStr[] = "__debug_str"; 305 constexpr const char ehFrame[] = "__eh_frame"; 306 constexpr const char gccExceptTab[] = "__gcc_except_tab"; 307 constexpr const char export_[] = "__export"; 308 constexpr const char dataInCode[] = "__data_in_code"; 309 constexpr const char functionStarts[] = "__func_starts"; 310 constexpr const char got[] = "__got"; 311 constexpr const char header[] = "__mach_header"; 312 constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; 313 constexpr const char const_[] = "__const"; 314 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; 315 constexpr const char lazyBinding[] = "__lazy_binding"; 316 constexpr const char literals[] = "__literals"; 317 constexpr const char moduleInitFunc[] = "__mod_init_func"; 318 constexpr const char moduleTermFunc[] = "__mod_term_func"; 319 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; 320 constexpr const char objcCatList[] = "__objc_catlist"; 321 constexpr const char objcClassList[] = "__objc_classlist"; 322 constexpr const char objcConst[] = "__objc_const"; 323 constexpr const char objcImageInfo[] = "__objc_imageinfo"; 324 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; 325 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; 326 constexpr const char objcProtoList[] = "__objc_protolist"; 327 constexpr const char pageZero[] = "__pagezero"; 328 constexpr const char pointers[] = "__pointers"; 329 constexpr const char rebase[] = "__rebase"; 330 constexpr const char staticInit[] = "__StaticInit"; 331 constexpr const char stringTable[] = "__string_table"; 332 constexpr const char stubHelper[] = "__stub_helper"; 333 constexpr const char stubs[] = "__stubs"; 334 constexpr const char swift[] = "__swift"; 335 constexpr const char symbolTable[] = "__symbol_table"; 336 constexpr const char textCoalNt[] = "__textcoal_nt"; 337 constexpr const char text[] = "__text"; 338 constexpr const char threadPtrs[] = "__thread_ptrs"; 339 constexpr const char threadVars[] = "__thread_vars"; 340 constexpr const char unwindInfo[] = "__unwind_info"; 341 constexpr const char weakBinding[] = "__weak_binding"; 342 constexpr const char zeroFill[] = "__zerofill"; 343 344 } // namespace section_names 345 346 } // namespace macho 347 348 std::string toString(const macho::InputSection *); 349 350 } // namespace lld 351 352 #endif 353