1 //===- InputSection.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_INPUT_SECTION_H 10 #define LLD_MACHO_INPUT_SECTION_H 11 12 #include "Config.h" 13 #include "Relocations.h" 14 #include "Symbols.h" 15 16 #include "lld/Common/LLVM.h" 17 #include "lld/Common/Memory.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/BitVector.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/TinyPtrVector.h" 22 #include "llvm/BinaryFormat/MachO.h" 23 24 namespace lld { 25 namespace macho { 26 27 class InputFile; 28 class OutputSection; 29 30 class InputSection { 31 public: 32 enum Kind : uint8_t { 33 ConcatKind, 34 CStringLiteralKind, 35 WordLiteralKind, 36 }; 37 38 Kind kind() const { return sectionKind; } 39 virtual ~InputSection() = default; 40 virtual uint64_t getSize() const { return data.size(); } 41 virtual bool empty() const { return data.empty(); } 42 InputFile *getFile() const { return section.file; } 43 StringRef getName() const { return section.name; } 44 StringRef getSegName() const { return section.segname; } 45 uint32_t getFlags() const { return section.flags; } 46 uint64_t getFileSize() const; 47 // Translates \p off -- an offset relative to this InputSection -- into an 48 // offset from the beginning of its parent OutputSection. 49 virtual uint64_t getOffset(uint64_t off) const = 0; 50 // The offset from the beginning of the file. 51 uint64_t getVA(uint64_t off) const; 52 // Return a user-friendly string for use in diagnostics. 53 // Format: /path/to/object.o:(symbol _func+0x123) 54 std::string getLocation(uint64_t off) const; 55 // Return the source line corresponding to an address, or the empty string. 56 // Format: Source.cpp:123 (/path/to/Source.cpp:123) 57 std::string getSourceLocation(uint64_t off) const; 58 // Whether the data at \p off in this InputSection is live. 59 virtual bool isLive(uint64_t off) const = 0; 60 virtual void markLive(uint64_t off) = 0; 61 virtual InputSection *canonical() { return this; } 62 virtual const InputSection *canonical() const { return this; } 63 64 protected: 65 InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data, 66 uint32_t align) 67 : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align), 68 data(data), section(section) {} 69 70 InputSection(const InputSection &rhs) 71 : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false), 72 align(rhs.align), data(rhs.data), section(rhs.section) {} 73 74 Kind sectionKind; 75 76 public: 77 // is address assigned? 78 bool isFinal = false; 79 // keep the address of the symbol(s) in this section unique in the final 80 // binary ? 81 bool keepUnique : 1; 82 // Does this section have symbols at offsets other than zero? (NOTE: only 83 // applies to ConcatInputSections.) 84 bool hasAltEntry : 1; 85 uint32_t align = 1; 86 87 OutputSection *parent = nullptr; 88 ArrayRef<uint8_t> data; 89 std::vector<Reloc> relocs; 90 // The symbols that belong to this InputSection, sorted by value. With 91 // .subsections_via_symbols, there is typically only one element here. 92 llvm::TinyPtrVector<Defined *> symbols; 93 94 protected: 95 const Section §ion; 96 97 const Defined *getContainingSymbol(uint64_t off) const; 98 }; 99 100 // ConcatInputSections are combined into (Concat)OutputSections through simple 101 // concatenation, in contrast with literal sections which may have their 102 // contents merged before output. 103 class ConcatInputSection final : public InputSection { 104 public: 105 ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data, 106 uint32_t align = 1) 107 : InputSection(ConcatKind, section, data, align) {} 108 109 uint64_t getOffset(uint64_t off) const override { return outSecOff + off; } 110 uint64_t getVA() const { return InputSection::getVA(0); } 111 // ConcatInputSections are entirely live or dead, so the offset is irrelevant. 112 bool isLive(uint64_t off) const override { return live; } 113 void markLive(uint64_t off) override { live = true; } 114 bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } 115 bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } 116 void writeTo(uint8_t *buf); 117 118 void foldIdentical(ConcatInputSection *redundant); 119 ConcatInputSection *canonical() override { 120 return replacement ? replacement : this; 121 } 122 const InputSection *canonical() const override { 123 return replacement ? replacement : this; 124 } 125 126 static bool classof(const InputSection *isec) { 127 return isec->kind() == ConcatKind; 128 } 129 130 // Points to the surviving section after this one is folded by ICF 131 ConcatInputSection *replacement = nullptr; 132 // Equivalence-class ID for ICF 133 uint32_t icfEqClass[2] = {0, 0}; 134 135 // With subsections_via_symbols, most symbols have their own InputSection, 136 // and for weak symbols (e.g. from inline functions), only the 137 // InputSection from one translation unit will make it to the output, 138 // while all copies in other translation units are coalesced into the 139 // first and not copied to the output. 140 bool wasCoalesced = false; 141 bool live = !config->deadStrip; 142 bool hasCallSites = false; 143 // This variable has two usages. Initially, it represents the input order. 144 // After assignAddresses is called, it represents the offset from the 145 // beginning of the output section this section was assigned to. 146 uint64_t outSecOff = 0; 147 }; 148 149 // Initialize a fake InputSection that does not belong to any InputFile. 150 ConcatInputSection *makeSyntheticInputSection(StringRef segName, 151 StringRef sectName, 152 uint32_t flags = 0, 153 ArrayRef<uint8_t> data = {}, 154 uint32_t align = 1); 155 156 // Helper functions to make it easy to sprinkle asserts. 157 158 inline bool shouldOmitFromOutput(InputSection *isec) { 159 return isa<ConcatInputSection>(isec) && 160 cast<ConcatInputSection>(isec)->shouldOmitFromOutput(); 161 } 162 163 inline bool isCoalescedWeak(InputSection *isec) { 164 return isa<ConcatInputSection>(isec) && 165 cast<ConcatInputSection>(isec)->isCoalescedWeak(); 166 } 167 168 // We allocate a lot of these and binary search on them, so they should be as 169 // compact as possible. Hence the use of 31 rather than 64 bits for the hash. 170 struct StringPiece { 171 // Offset from the start of the containing input section. 172 uint32_t inSecOff; 173 uint32_t live : 1; 174 // Only set if deduplicating literals 175 uint32_t hash : 31; 176 // Offset from the start of the containing output section. 177 uint64_t outSecOff = 0; 178 179 StringPiece(uint64_t off, uint32_t hash) 180 : inSecOff(off), live(!config->deadStrip), hash(hash) {} 181 }; 182 183 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!"); 184 185 // CStringInputSections are composed of multiple null-terminated string 186 // literals, which we represent using StringPieces. These literals can be 187 // deduplicated and tail-merged, so translating offsets between the input and 188 // outputs sections is more complicated. 189 // 190 // NOTE: One significant difference between LLD and ld64 is that we merge all 191 // cstring literals, even those referenced directly by non-private symbols. 192 // ld64 is more conservative and does not do that. This was mostly done for 193 // implementation simplicity; if we find programs that need the more 194 // conservative behavior we can certainly implement that. 195 class CStringInputSection final : public InputSection { 196 public: 197 CStringInputSection(const Section §ion, ArrayRef<uint8_t> data, 198 uint32_t align, bool dedupLiterals) 199 : InputSection(CStringLiteralKind, section, data, align), 200 deduplicateLiterals(dedupLiterals) {} 201 202 uint64_t getOffset(uint64_t off) const override; 203 bool isLive(uint64_t off) const override { return getStringPiece(off).live; } 204 void markLive(uint64_t off) override { getStringPiece(off).live = true; } 205 // Find the StringPiece that contains this offset. 206 StringPiece &getStringPiece(uint64_t off); 207 const StringPiece &getStringPiece(uint64_t off) const; 208 // Split at each null byte. 209 void splitIntoPieces(); 210 211 LLVM_ATTRIBUTE_ALWAYS_INLINE 212 StringRef getStringRef(size_t i) const { 213 size_t begin = pieces[i].inSecOff; 214 // The endpoint should be *at* the null terminator, not after. This matches 215 // the behavior of StringRef(const char *Str). 216 size_t end = 217 ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1; 218 return toStringRef(data.slice(begin, end - begin)); 219 } 220 221 // Returns i'th piece as a CachedHashStringRef. This function is very hot when 222 // string merging is enabled, so we want to inline. 223 LLVM_ATTRIBUTE_ALWAYS_INLINE 224 llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const { 225 assert(deduplicateLiterals); 226 return {getStringRef(i), pieces[i].hash}; 227 } 228 229 static bool classof(const InputSection *isec) { 230 return isec->kind() == CStringLiteralKind; 231 } 232 233 bool deduplicateLiterals = false; 234 std::vector<StringPiece> pieces; 235 }; 236 237 class WordLiteralInputSection final : public InputSection { 238 public: 239 WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data, 240 uint32_t align); 241 uint64_t getOffset(uint64_t off) const override; 242 bool isLive(uint64_t off) const override { 243 return live[off >> power2LiteralSize]; 244 } 245 void markLive(uint64_t off) override { 246 live[off >> power2LiteralSize] = true; 247 } 248 249 static bool classof(const InputSection *isec) { 250 return isec->kind() == WordLiteralKind; 251 } 252 253 private: 254 unsigned power2LiteralSize; 255 // The liveness of data[off] is tracked by live[off >> power2LiteralSize]. 256 llvm::BitVector live; 257 }; 258 259 inline uint8_t sectionType(uint32_t flags) { 260 return flags & llvm::MachO::SECTION_TYPE; 261 } 262 263 inline bool isZeroFill(uint32_t flags) { 264 return llvm::MachO::isVirtualSection(sectionType(flags)); 265 } 266 267 inline bool isThreadLocalVariables(uint32_t flags) { 268 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; 269 } 270 271 // These sections contain the data for initializing thread-local variables. 272 inline bool isThreadLocalData(uint32_t flags) { 273 return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR || 274 sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL; 275 } 276 277 inline bool isDebugSection(uint32_t flags) { 278 return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == 279 llvm::MachO::S_ATTR_DEBUG; 280 } 281 282 inline bool isWordLiteralSection(uint32_t flags) { 283 return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS || 284 sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS || 285 sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS; 286 } 287 288 bool isCodeSection(const InputSection *); 289 bool isCfStringSection(const InputSection *); 290 bool isClassRefsSection(const InputSection *); 291 bool isSelRefsSection(const InputSection *); 292 bool isEhFrameSection(const InputSection *); 293 bool isGccExceptTabSection(const InputSection *); 294 295 extern std::vector<ConcatInputSection *> inputSections; 296 297 namespace section_names { 298 299 constexpr const char authGot[] = "__auth_got"; 300 constexpr const char authPtr[] = "__auth_ptr"; 301 constexpr const char binding[] = "__binding"; 302 constexpr const char bitcodeBundle[] = "__bundle"; 303 constexpr const char cString[] = "__cstring"; 304 constexpr const char cfString[] = "__cfstring"; 305 constexpr const char cgProfile[] = "__cg_profile"; 306 constexpr const char chainFixups[] = "__chainfixups"; 307 constexpr const char codeSignature[] = "__code_signature"; 308 constexpr const char common[] = "__common"; 309 constexpr const char compactUnwind[] = "__compact_unwind"; 310 constexpr const char data[] = "__data"; 311 constexpr const char debugAbbrev[] = "__debug_abbrev"; 312 constexpr const char debugInfo[] = "__debug_info"; 313 constexpr const char debugLine[] = "__debug_line"; 314 constexpr const char debugStr[] = "__debug_str"; 315 constexpr const char debugStrOffs[] = "__debug_str_offs"; 316 constexpr const char ehFrame[] = "__eh_frame"; 317 constexpr const char gccExceptTab[] = "__gcc_except_tab"; 318 constexpr const char export_[] = "__export"; 319 constexpr const char dataInCode[] = "__data_in_code"; 320 constexpr const char functionStarts[] = "__func_starts"; 321 constexpr const char got[] = "__got"; 322 constexpr const char header[] = "__mach_header"; 323 constexpr const char indirectSymbolTable[] = "__ind_sym_tab"; 324 constexpr const char initOffsets[] = "__init_offsets"; 325 constexpr const char const_[] = "__const"; 326 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr"; 327 constexpr const char lazyBinding[] = "__lazy_binding"; 328 constexpr const char literals[] = "__literals"; 329 constexpr const char moduleInitFunc[] = "__mod_init_func"; 330 constexpr const char moduleTermFunc[] = "__mod_term_func"; 331 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr"; 332 constexpr const char objcCatList[] = "__objc_catlist"; 333 constexpr const char objcClassList[] = "__objc_classlist"; 334 constexpr const char objcClassRefs[] = "__objc_classrefs"; 335 constexpr const char objcConst[] = "__objc_const"; 336 constexpr const char objCImageInfo[] = "__objc_imageinfo"; 337 constexpr const char objcStubs[] = "__objc_stubs"; 338 constexpr const char objcSelrefs[] = "__objc_selrefs"; 339 constexpr const char objcMethname[] = "__objc_methname"; 340 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist"; 341 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist"; 342 constexpr const char objcProtoList[] = "__objc_protolist"; 343 constexpr const char pageZero[] = "__pagezero"; 344 constexpr const char pointers[] = "__pointers"; 345 constexpr const char rebase[] = "__rebase"; 346 constexpr const char staticInit[] = "__StaticInit"; 347 constexpr const char stringTable[] = "__string_table"; 348 constexpr const char stubHelper[] = "__stub_helper"; 349 constexpr const char stubs[] = "__stubs"; 350 constexpr const char swift[] = "__swift"; 351 constexpr const char symbolTable[] = "__symbol_table"; 352 constexpr const char textCoalNt[] = "__textcoal_nt"; 353 constexpr const char text[] = "__text"; 354 constexpr const char threadPtrs[] = "__thread_ptrs"; 355 constexpr const char threadVars[] = "__thread_vars"; 356 constexpr const char unwindInfo[] = "__unwind_info"; 357 constexpr const char weakBinding[] = "__weak_binding"; 358 constexpr const char zeroFill[] = "__zerofill"; 359 constexpr const char addrSig[] = "__llvm_addrsig"; 360 361 } // namespace section_names 362 363 } // namespace macho 364 365 std::string toString(const macho::InputSection *); 366 367 } // namespace lld 368 369 #endif 370