1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 16 #include "llvm/Object/Archive.h" 17 #include "llvm/Support/MathExtras.h" 18 19 namespace lld { 20 namespace macho { 21 22 class MachHeaderSection; 23 24 struct StringRefZ { 25 StringRefZ(const char *s) : data(s), size(-1) {} 26 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 27 28 const char *data; 29 const uint32_t size; 30 }; 31 32 class Symbol { 33 public: 34 enum Kind { 35 DefinedKind, 36 UndefinedKind, 37 CommonKind, 38 DylibKind, 39 LazyArchiveKind, 40 LazyObjectKind, 41 AliasKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const { return used; } 55 bool isLazy() const { 56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57 } 58 59 virtual uint64_t getVA() const { return 0; } 60 61 virtual bool isWeakDef() const { return false; } 62 63 // Only undefined or dylib symbols can be weak references. A weak reference 64 // need not be satisfied at runtime, e.g. due to the symbol not being 65 // available on a given target platform. 66 virtual bool isWeakRef() const { return false; } 67 68 virtual bool isTlv() const { return false; } 69 70 // Whether this symbol is in the GOT or TLVPointer sections. 71 bool isInGot() const { return gotIndex != UINT32_MAX; } 72 73 // Whether this symbol is in the StubsSection. 74 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75 76 uint64_t getStubVA() const; 77 uint64_t getLazyPtrVA() const; 78 uint64_t getGotVA() const; 79 uint64_t getTlvVA() const; 80 uint64_t resolveBranchVA() const { 81 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 82 return isInStubs() ? getStubVA() : getVA(); 83 } 84 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 85 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 86 87 // The index of this symbol in the GOT or the TLVPointer section, depending 88 // on whether it is a thread-local. A given symbol cannot be referenced by 89 // both these sections at once. 90 uint32_t gotIndex = UINT32_MAX; 91 uint32_t lazyBindOffset = UINT32_MAX; 92 uint32_t stubsHelperIndex = UINT32_MAX; 93 uint32_t stubsIndex = UINT32_MAX; 94 uint32_t symtabIndex = UINT32_MAX; 95 96 InputFile *getFile() const { return file; } 97 98 protected: 99 Symbol(Kind k, StringRefZ name, InputFile *file) 100 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 101 isUsedInRegularObj(!file || isa<ObjFile>(file)), 102 used(!config->deadStrip) {} 103 104 Kind symbolKind; 105 const char *nameData; 106 InputFile *file; 107 mutable uint32_t nameSize; 108 109 public: 110 // True if this symbol was referenced by a regular (non-bitcode) object. 111 bool isUsedInRegularObj : 1; 112 113 // True if this symbol is used from a live section. 114 bool used : 1; 115 }; 116 117 class Defined : public Symbol { 118 public: 119 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 120 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 121 bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip, 122 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false, 123 bool interposable = false); 124 125 bool isWeakDef() const override { return weakDef; } 126 bool isExternalWeakDef() const { 127 return isWeakDef() && isExternal() && !privateExtern; 128 } 129 bool isTlv() const override; 130 131 bool isExternal() const { return external; } 132 bool isAbsolute() const { return originalIsec == nullptr; } 133 134 uint64_t getVA() const override; 135 136 // Returns the object file that this symbol was defined in. This value differs 137 // from `getFile()` if the symbol originated from a bitcode file. 138 ObjFile *getObjectFile() const; 139 140 std::string getSourceLocation(); 141 142 // Get the canonical InputSection of the symbol. 143 InputSection *isec() const; 144 145 // Get the canonical unwind entry of the symbol. 146 ConcatInputSection *unwindEntry() const; 147 148 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 149 150 // Place the bitfields first so that they can get placed in the tail padding 151 // of the parent class, on platforms which support it. 152 bool overridesWeakDef : 1; 153 // Whether this symbol should appear in the output binary's export trie. 154 bool privateExtern : 1; 155 // Whether this symbol should appear in the output symbol table. 156 bool includeInSymtab : 1; 157 // Whether this symbol was folded into a different symbol during ICF. 158 bool wasIdenticalCodeFolded : 1; 159 // Symbols marked referencedDynamically won't be removed from the output's 160 // symbol table by tools like strip. In theory, this could be set on arbitrary 161 // symbols in input object files. In practice, it's used solely for the 162 // synthetic __mh_execute_header symbol. 163 // This is information for the static linker, and it's also written to the 164 // output file's symbol table for tools running later (such as `strip`). 165 bool referencedDynamically : 1; 166 // Set on symbols that should not be removed by dead code stripping. 167 // Set for example on `__attribute__((used))` globals, or on some Objective-C 168 // metadata. This is information only for the static linker and not written 169 // to the output. 170 bool noDeadStrip : 1; 171 // Whether references to this symbol can be interposed at runtime to point to 172 // a different symbol definition (with the same name). For example, if both 173 // dylib A and B define an interposable symbol _foo, and we load A before B at 174 // runtime, then all references to _foo within dylib B will point to the 175 // definition in dylib A. 176 // 177 // Only extern symbols may be interposable. 178 bool interposable : 1; 179 180 bool weakDefCanBeHidden : 1; 181 182 private: 183 const bool weakDef : 1; 184 const bool external : 1; 185 186 public: 187 // The native InputSection of the symbol. The symbol may be moved to another 188 // InputSection in which case originalIsec->canonical() will point to the new 189 // InputSection 190 InputSection *originalIsec; 191 // Contains the offset from the containing subsection. Note that this is 192 // different from nlist::n_value, which is the absolute address of the symbol. 193 uint64_t value; 194 // size is only calculated for regular (non-bitcode) symbols. 195 uint64_t size; 196 // This can be a subsection of either __compact_unwind or __eh_frame. 197 ConcatInputSection *originalUnwindEntry = nullptr; 198 }; 199 200 // This enum does double-duty: as a symbol property, it indicates whether & how 201 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 202 // of referenced symbols contained within the file. If there are both weak 203 // and strong references to the same file, we will count the file as 204 // strongly-referenced. 205 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 206 207 class Undefined : public Symbol { 208 public: 209 Undefined(StringRefZ name, InputFile *file, RefState refState, 210 bool wasBitcodeSymbol) 211 : Symbol(UndefinedKind, name, file), refState(refState), 212 wasBitcodeSymbol(wasBitcodeSymbol) { 213 assert(refState != RefState::Unreferenced); 214 } 215 216 bool isWeakRef() const override { return refState == RefState::Weak; } 217 218 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 219 220 RefState refState : 2; 221 bool wasBitcodeSymbol; 222 }; 223 224 // On Unix, it is traditionally allowed to write variable definitions without 225 // initialization expressions (such as "int foo;") to header files. These are 226 // called tentative definitions. 227 // 228 // Using tentative definitions is usually considered a bad practice; you should 229 // write only declarations (such as "extern int foo;") to header files. 230 // Nevertheless, the linker and the compiler have to do something to support 231 // bad code by allowing duplicate definitions for this particular case. 232 // 233 // The compiler creates common symbols when it sees tentative definitions. 234 // (You can suppress this behavior and let the compiler create a regular 235 // defined symbol by passing -fno-common. -fno-common is the default in clang 236 // as of LLVM 11.0.) When linking the final binary, if there are remaining 237 // common symbols after name resolution is complete, the linker converts them 238 // to regular defined symbols in a __common section. 239 class CommonSymbol : public Symbol { 240 public: 241 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 242 bool isPrivateExtern) 243 : Symbol(CommonKind, name, file), size(size), 244 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 245 privateExtern(isPrivateExtern) { 246 // TODO: cap maximum alignment 247 } 248 249 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 250 251 const uint64_t size; 252 const uint32_t align; 253 const bool privateExtern; 254 }; 255 256 class DylibSymbol : public Symbol { 257 public: 258 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 259 RefState refState, bool isTlv) 260 : Symbol(DylibKind, name, file), shouldReexport(false), 261 refState(refState), weakDef(isWeakDef), tlv(isTlv) { 262 if (file && refState > RefState::Unreferenced) 263 file->numReferencedSymbols++; 264 } 265 266 uint64_t getVA() const override; 267 bool isWeakDef() const override { return weakDef; } 268 269 // Symbols from weak libraries/frameworks are also weakly-referenced. 270 bool isWeakRef() const override { 271 return refState == RefState::Weak || 272 (file && getFile()->umbrella->forceWeakImport); 273 } 274 bool isReferenced() const { return refState != RefState::Unreferenced; } 275 bool isTlv() const override { return tlv; } 276 bool isDynamicLookup() const { return file == nullptr; } 277 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 278 279 DylibFile *getFile() const { 280 assert(!isDynamicLookup()); 281 return cast<DylibFile>(file); 282 } 283 284 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 285 286 RefState getRefState() const { return refState; } 287 288 void reference(RefState newState) { 289 assert(newState > RefState::Unreferenced); 290 if (refState == RefState::Unreferenced && file) 291 getFile()->numReferencedSymbols++; 292 refState = std::max(refState, newState); 293 } 294 295 void unreference() { 296 // dynamic_lookup symbols have no file. 297 if (refState > RefState::Unreferenced && file) { 298 assert(getFile()->numReferencedSymbols > 0); 299 getFile()->numReferencedSymbols--; 300 } 301 } 302 303 bool shouldReexport : 1; 304 private: 305 RefState refState : 2; 306 const bool weakDef : 1; 307 const bool tlv : 1; 308 }; 309 310 class LazyArchive : public Symbol { 311 public: 312 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 313 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 314 315 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 316 void fetchArchiveMember(); 317 318 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 319 320 private: 321 const llvm::object::Archive::Symbol sym; 322 }; 323 324 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 325 // --end-lib. 326 class LazyObject : public Symbol { 327 public: 328 LazyObject(InputFile &file, StringRef name) 329 : Symbol(LazyObjectKind, name, &file) { 330 isUsedInRegularObj = false; 331 } 332 333 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 334 }; 335 336 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs, 337 // then all AliasSymbol instances will be converted into one of the other Symbol 338 // types after `createAliases()` runs. 339 class AliasSymbol final : public Symbol { 340 public: 341 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName, 342 bool isPrivateExtern) 343 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern), 344 aliasedName(aliasedName) {} 345 346 StringRef getAliasedName() const { return aliasedName; } 347 348 static bool classof(const Symbol *s) { return s->kind() == AliasKind; } 349 350 const bool privateExtern; 351 352 private: 353 StringRef aliasedName; 354 }; 355 356 union SymbolUnion { 357 alignas(Defined) char a[sizeof(Defined)]; 358 alignas(Undefined) char b[sizeof(Undefined)]; 359 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 360 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 361 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 362 alignas(LazyObject) char f[sizeof(LazyObject)]; 363 alignas(AliasSymbol) char g[sizeof(AliasSymbol)]; 364 }; 365 366 template <typename T, typename... ArgT> 367 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 368 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 369 static_assert(alignof(T) <= alignof(SymbolUnion), 370 "SymbolUnion not aligned enough"); 371 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 372 "Not a Symbol"); 373 374 bool isUsedInRegularObj = s->isUsedInRegularObj; 375 bool used = s->used; 376 T *sym = new (s) T(std::forward<ArgT>(arg)...); 377 sym->isUsedInRegularObj |= isUsedInRegularObj; 378 sym->used |= used; 379 return sym; 380 } 381 382 // Can a symbol's address only be resolved at runtime? 383 inline bool needsBinding(const Symbol *sym) { 384 if (isa<DylibSymbol>(sym)) 385 return true; 386 if (const auto *defined = dyn_cast<Defined>(sym)) 387 return defined->isExternalWeakDef() || defined->interposable; 388 return false; 389 } 390 391 // Symbols with `l` or `L` as a prefix are linker-private and never appear in 392 // the output. 393 inline bool isPrivateLabel(StringRef name) { 394 return name.starts_with("l") || name.starts_with("L"); 395 } 396 } // namespace macho 397 398 std::string toString(const macho::Symbol &); 399 std::string toMachOString(const llvm::object::Archive::Symbol &); 400 401 } // namespace lld 402 403 #endif 404