1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 16 #include "llvm/Object/Archive.h" 17 #include "llvm/Support/MathExtras.h" 18 19 namespace lld { 20 namespace macho { 21 22 class MachHeaderSection; 23 24 struct StringRefZ { 25 StringRefZ(const char *s) : data(s), size(-1) {} 26 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 27 28 const char *data; 29 const uint32_t size; 30 }; 31 32 class Symbol { 33 public: 34 enum Kind { 35 DefinedKind, 36 UndefinedKind, 37 CommonKind, 38 DylibKind, 39 LazyArchiveKind, 40 LazyObjectKind, 41 AliasKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const { return used; } 55 bool isLazy() const { 56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57 } 58 59 virtual uint64_t getVA() const { return 0; } 60 61 virtual bool isWeakDef() const { return false; } 62 63 // Only undefined or dylib symbols can be weak references. A weak reference 64 // need not be satisfied at runtime, e.g. due to the symbol not being 65 // available on a given target platform. 66 virtual bool isWeakRef() const { return false; } 67 68 virtual bool isTlv() const { return false; } 69 70 // Whether this symbol is in the GOT or TLVPointer sections. 71 bool isInGot() const { return gotIndex != UINT32_MAX; } 72 73 // Whether this symbol is in the StubsSection. 74 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75 76 uint64_t getStubVA() const; 77 uint64_t getLazyPtrVA() const; 78 uint64_t getGotVA() const; 79 uint64_t getTlvVA() const; 80 uint64_t resolveBranchVA() const { 81 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 82 return isInStubs() ? getStubVA() : getVA(); 83 } 84 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 85 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 86 87 // The index of this symbol in the GOT or the TLVPointer section, depending 88 // on whether it is a thread-local. A given symbol cannot be referenced by 89 // both these sections at once. 90 uint32_t gotIndex = UINT32_MAX; 91 uint32_t lazyBindOffset = UINT32_MAX; 92 uint32_t stubsHelperIndex = UINT32_MAX; 93 uint32_t stubsIndex = UINT32_MAX; 94 uint32_t symtabIndex = UINT32_MAX; 95 96 InputFile *getFile() const { return file; } 97 98 protected: 99 Symbol(Kind k, StringRefZ name, InputFile *file) 100 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 101 isUsedInRegularObj(!file || isa<ObjFile>(file)), 102 used(!config->deadStrip) {} 103 104 Kind symbolKind; 105 const char *nameData; 106 InputFile *file; 107 mutable uint32_t nameSize; 108 109 public: 110 // True if this symbol was referenced by a regular (non-bitcode) object. 111 bool isUsedInRegularObj : 1; 112 113 // True if this symbol is used from a live section. 114 bool used : 1; 115 }; 116 117 class Defined : public Symbol { 118 public: 119 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 120 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 121 bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip, 122 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false, 123 bool interposable = false); 124 125 bool isWeakDef() const override { return weakDef; } 126 bool isExternalWeakDef() const { 127 return isWeakDef() && isExternal() && !privateExtern; 128 } 129 bool isTlv() const override; 130 131 bool isExternal() const { return external; } 132 bool isAbsolute() const { return isec == nullptr; } 133 134 uint64_t getVA() const override; 135 136 // Returns the object file that this symbol was defined in. This value differs 137 // from `getFile()` if the symbol originated from a bitcode file. 138 ObjFile *getObjectFile() const; 139 140 std::string getSourceLocation(); 141 142 // Ensure this symbol's pointers to InputSections point to their canonical 143 // copies. 144 void canonicalize(); 145 146 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 147 148 // Place the bitfields first so that they can get placed in the tail padding 149 // of the parent class, on platforms which support it. 150 bool overridesWeakDef : 1; 151 // Whether this symbol should appear in the output binary's export trie. 152 bool privateExtern : 1; 153 // Whether this symbol should appear in the output symbol table. 154 bool includeInSymtab : 1; 155 // Whether this symbol was folded into a different symbol during ICF. 156 bool wasIdenticalCodeFolded : 1; 157 // Symbols marked referencedDynamically won't be removed from the output's 158 // symbol table by tools like strip. In theory, this could be set on arbitrary 159 // symbols in input object files. In practice, it's used solely for the 160 // synthetic __mh_execute_header symbol. 161 // This is information for the static linker, and it's also written to the 162 // output file's symbol table for tools running later (such as `strip`). 163 bool referencedDynamically : 1; 164 // Set on symbols that should not be removed by dead code stripping. 165 // Set for example on `__attribute__((used))` globals, or on some Objective-C 166 // metadata. This is information only for the static linker and not written 167 // to the output. 168 bool noDeadStrip : 1; 169 // Whether references to this symbol can be interposed at runtime to point to 170 // a different symbol definition (with the same name). For example, if both 171 // dylib A and B define an interposable symbol _foo, and we load A before B at 172 // runtime, then all references to _foo within dylib B will point to the 173 // definition in dylib A. 174 // 175 // Only extern symbols may be interposable. 176 bool interposable : 1; 177 178 bool weakDefCanBeHidden : 1; 179 180 private: 181 const bool weakDef : 1; 182 const bool external : 1; 183 184 public: 185 InputSection *isec; 186 // Contains the offset from the containing subsection. Note that this is 187 // different from nlist::n_value, which is the absolute address of the symbol. 188 uint64_t value; 189 // size is only calculated for regular (non-bitcode) symbols. 190 uint64_t size; 191 // This can be a subsection of either __compact_unwind or __eh_frame. 192 ConcatInputSection *unwindEntry = nullptr; 193 }; 194 195 // This enum does double-duty: as a symbol property, it indicates whether & how 196 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 197 // of referenced symbols contained within the file. If there are both weak 198 // and strong references to the same file, we will count the file as 199 // strongly-referenced. 200 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 201 202 class Undefined : public Symbol { 203 public: 204 Undefined(StringRefZ name, InputFile *file, RefState refState, 205 bool wasBitcodeSymbol) 206 : Symbol(UndefinedKind, name, file), refState(refState), 207 wasBitcodeSymbol(wasBitcodeSymbol) { 208 assert(refState != RefState::Unreferenced); 209 } 210 211 bool isWeakRef() const override { return refState == RefState::Weak; } 212 213 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 214 215 RefState refState : 2; 216 bool wasBitcodeSymbol; 217 }; 218 219 // On Unix, it is traditionally allowed to write variable definitions without 220 // initialization expressions (such as "int foo;") to header files. These are 221 // called tentative definitions. 222 // 223 // Using tentative definitions is usually considered a bad practice; you should 224 // write only declarations (such as "extern int foo;") to header files. 225 // Nevertheless, the linker and the compiler have to do something to support 226 // bad code by allowing duplicate definitions for this particular case. 227 // 228 // The compiler creates common symbols when it sees tentative definitions. 229 // (You can suppress this behavior and let the compiler create a regular 230 // defined symbol by passing -fno-common. -fno-common is the default in clang 231 // as of LLVM 11.0.) When linking the final binary, if there are remaining 232 // common symbols after name resolution is complete, the linker converts them 233 // to regular defined symbols in a __common section. 234 class CommonSymbol : public Symbol { 235 public: 236 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 237 bool isPrivateExtern) 238 : Symbol(CommonKind, name, file), size(size), 239 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 240 privateExtern(isPrivateExtern) { 241 // TODO: cap maximum alignment 242 } 243 244 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 245 246 const uint64_t size; 247 const uint32_t align; 248 const bool privateExtern; 249 }; 250 251 class DylibSymbol : public Symbol { 252 public: 253 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 254 RefState refState, bool isTlv) 255 : Symbol(DylibKind, name, file), shouldReexport(false), 256 refState(refState), weakDef(isWeakDef), tlv(isTlv) { 257 if (file && refState > RefState::Unreferenced) 258 file->numReferencedSymbols++; 259 } 260 261 uint64_t getVA() const override; 262 bool isWeakDef() const override { return weakDef; } 263 264 // Symbols from weak libraries/frameworks are also weakly-referenced. 265 bool isWeakRef() const override { 266 return refState == RefState::Weak || 267 (file && getFile()->umbrella->forceWeakImport); 268 } 269 bool isReferenced() const { return refState != RefState::Unreferenced; } 270 bool isTlv() const override { return tlv; } 271 bool isDynamicLookup() const { return file == nullptr; } 272 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 273 274 DylibFile *getFile() const { 275 assert(!isDynamicLookup()); 276 return cast<DylibFile>(file); 277 } 278 279 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 280 281 RefState getRefState() const { return refState; } 282 283 void reference(RefState newState) { 284 assert(newState > RefState::Unreferenced); 285 if (refState == RefState::Unreferenced && file) 286 getFile()->numReferencedSymbols++; 287 refState = std::max(refState, newState); 288 } 289 290 void unreference() { 291 // dynamic_lookup symbols have no file. 292 if (refState > RefState::Unreferenced && file) { 293 assert(getFile()->numReferencedSymbols > 0); 294 getFile()->numReferencedSymbols--; 295 } 296 } 297 298 bool shouldReexport : 1; 299 private: 300 RefState refState : 2; 301 const bool weakDef : 1; 302 const bool tlv : 1; 303 }; 304 305 class LazyArchive : public Symbol { 306 public: 307 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 308 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 309 310 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 311 void fetchArchiveMember(); 312 313 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 314 315 private: 316 const llvm::object::Archive::Symbol sym; 317 }; 318 319 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 320 // --end-lib. 321 class LazyObject : public Symbol { 322 public: 323 LazyObject(InputFile &file, StringRef name) 324 : Symbol(LazyObjectKind, name, &file) { 325 isUsedInRegularObj = false; 326 } 327 328 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 329 }; 330 331 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs, 332 // then all AliasSymbol instances will be converted into one of the other Symbol 333 // types after `createAliases()` runs. 334 class AliasSymbol final : public Symbol { 335 public: 336 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName, 337 bool isPrivateExtern) 338 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern), 339 aliasedName(aliasedName) {} 340 341 StringRef getAliasedName() const { return aliasedName; } 342 343 static bool classof(const Symbol *s) { return s->kind() == AliasKind; } 344 345 const bool privateExtern; 346 347 private: 348 StringRef aliasedName; 349 }; 350 351 union SymbolUnion { 352 alignas(Defined) char a[sizeof(Defined)]; 353 alignas(Undefined) char b[sizeof(Undefined)]; 354 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 355 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 356 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 357 alignas(LazyObject) char f[sizeof(LazyObject)]; 358 alignas(AliasSymbol) char g[sizeof(AliasSymbol)]; 359 }; 360 361 template <typename T, typename... ArgT> 362 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 363 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 364 static_assert(alignof(T) <= alignof(SymbolUnion), 365 "SymbolUnion not aligned enough"); 366 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 367 "Not a Symbol"); 368 369 bool isUsedInRegularObj = s->isUsedInRegularObj; 370 bool used = s->used; 371 T *sym = new (s) T(std::forward<ArgT>(arg)...); 372 sym->isUsedInRegularObj |= isUsedInRegularObj; 373 sym->used |= used; 374 return sym; 375 } 376 377 // Can a symbol's address only be resolved at runtime? 378 inline bool needsBinding(const Symbol *sym) { 379 if (isa<DylibSymbol>(sym)) 380 return true; 381 if (const auto *defined = dyn_cast<Defined>(sym)) 382 return defined->isExternalWeakDef() || defined->interposable; 383 return false; 384 } 385 386 // Symbols with `l` or `L` as a prefix are linker-private and never appear in 387 // the output. 388 inline bool isPrivateLabel(StringRef name) { 389 return name.starts_with("l") || name.starts_with("L"); 390 } 391 } // namespace macho 392 393 std::string toString(const macho::Symbol &); 394 std::string toMachOString(const llvm::object::Archive::Symbol &); 395 396 } // namespace lld 397 398 #endif 399