1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 16 #include "llvm/Object/Archive.h" 17 #include "llvm/Support/MathExtras.h" 18 19 namespace lld { 20 namespace macho { 21 22 class MachHeaderSection; 23 24 struct StringRefZ { 25 StringRefZ(const char *s) : data(s), size(-1) {} 26 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 27 28 const char *data; 29 const uint32_t size; 30 }; 31 32 class Symbol { 33 public: 34 enum Kind { 35 DefinedKind, 36 UndefinedKind, 37 CommonKind, 38 DylibKind, 39 LazyArchiveKind, 40 LazyObjectKind, 41 AliasKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const { return used; } 55 bool isLazy() const { 56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57 } 58 59 virtual uint64_t getVA() const { return 0; } 60 61 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 62 63 // Only undefined or dylib symbols can be weak references. A weak reference 64 // need not be satisfied at runtime, e.g. due to the symbol not being 65 // available on a given target platform. 66 virtual bool isWeakRef() const { return false; } 67 68 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 69 70 // Whether this symbol is in the GOT or TLVPointer sections. 71 bool isInGot() const { return gotIndex != UINT32_MAX; } 72 73 // Whether this symbol is in the StubsSection. 74 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75 76 uint64_t getStubVA() const; 77 uint64_t getLazyPtrVA() const; 78 uint64_t getGotVA() const; 79 uint64_t getTlvVA() const; 80 uint64_t resolveBranchVA() const { 81 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 82 return isInStubs() ? getStubVA() : getVA(); 83 } 84 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 85 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 86 87 // The index of this symbol in the GOT or the TLVPointer section, depending 88 // on whether it is a thread-local. A given symbol cannot be referenced by 89 // both these sections at once. 90 uint32_t gotIndex = UINT32_MAX; 91 uint32_t lazyBindOffset = UINT32_MAX; 92 uint32_t stubsHelperIndex = UINT32_MAX; 93 uint32_t stubsIndex = UINT32_MAX; 94 uint32_t symtabIndex = UINT32_MAX; 95 96 InputFile *getFile() const { return file; } 97 98 protected: 99 Symbol(Kind k, StringRefZ name, InputFile *file) 100 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 101 isUsedInRegularObj(!file || isa<ObjFile>(file)), 102 used(!config->deadStrip) {} 103 104 Kind symbolKind; 105 const char *nameData; 106 InputFile *file; 107 mutable uint32_t nameSize; 108 109 public: 110 // True if this symbol was referenced by a regular (non-bitcode) object. 111 bool isUsedInRegularObj : 1; 112 113 // True if this symbol is used from a live section. 114 bool used : 1; 115 }; 116 117 class Defined : public Symbol { 118 public: 119 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 120 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 121 bool includeInSymtab, bool isThumb, bool isReferencedDynamically, 122 bool noDeadStrip, bool canOverrideWeakDef = false, 123 bool isWeakDefCanBeHidden = false, bool interposable = false); 124 125 bool isWeakDef() const override { return weakDef; } 126 bool isExternalWeakDef() const { 127 return isWeakDef() && isExternal() && !privateExtern; 128 } 129 bool isTlv() const override; 130 131 bool isExternal() const { return external; } 132 bool isAbsolute() const { return isec == nullptr; } 133 134 uint64_t getVA() const override; 135 136 // Returns the object file that this symbol was defined in. This value differs 137 // from `getFile()` if the symbol originated from a bitcode file. 138 ObjFile *getObjectFile() const; 139 140 std::string getSourceLocation(); 141 142 // Ensure this symbol's pointers to InputSections point to their canonical 143 // copies. 144 void canonicalize(); 145 146 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 147 148 // Place the bitfields first so that they can get placed in the tail padding 149 // of the parent class, on platforms which support it. 150 bool overridesWeakDef : 1; 151 // Whether this symbol should appear in the output binary's export trie. 152 bool privateExtern : 1; 153 // Whether this symbol should appear in the output symbol table. 154 bool includeInSymtab : 1; 155 // Whether this symbol was folded into a different symbol during ICF. 156 bool wasIdenticalCodeFolded : 1; 157 // Only relevant when compiling for Thumb-supporting arm32 archs. 158 bool thumb : 1; 159 // Symbols marked referencedDynamically won't be removed from the output's 160 // symbol table by tools like strip. In theory, this could be set on arbitrary 161 // symbols in input object files. In practice, it's used solely for the 162 // synthetic __mh_execute_header symbol. 163 // This is information for the static linker, and it's also written to the 164 // output file's symbol table for tools running later (such as `strip`). 165 bool referencedDynamically : 1; 166 // Set on symbols that should not be removed by dead code stripping. 167 // Set for example on `__attribute__((used))` globals, or on some Objective-C 168 // metadata. This is information only for the static linker and not written 169 // to the output. 170 bool noDeadStrip : 1; 171 // Whether references to this symbol can be interposed at runtime to point to 172 // a different symbol definition (with the same name). For example, if both 173 // dylib A and B define an interposable symbol _foo, and we load A before B at 174 // runtime, then all references to _foo within dylib B will point to the 175 // definition in dylib A. 176 // 177 // Only extern symbols may be interposable. 178 bool interposable : 1; 179 180 bool weakDefCanBeHidden : 1; 181 182 private: 183 const bool weakDef : 1; 184 const bool external : 1; 185 186 public: 187 InputSection *isec; 188 // Contains the offset from the containing subsection. Note that this is 189 // different from nlist::n_value, which is the absolute address of the symbol. 190 uint64_t value; 191 // size is only calculated for regular (non-bitcode) symbols. 192 uint64_t size; 193 // This can be a subsection of either __compact_unwind or __eh_frame. 194 ConcatInputSection *unwindEntry = nullptr; 195 }; 196 197 // This enum does double-duty: as a symbol property, it indicates whether & how 198 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 199 // of referenced symbols contained within the file. If there are both weak 200 // and strong references to the same file, we will count the file as 201 // strongly-referenced. 202 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 203 204 class Undefined : public Symbol { 205 public: 206 Undefined(StringRefZ name, InputFile *file, RefState refState, 207 bool wasBitcodeSymbol) 208 : Symbol(UndefinedKind, name, file), refState(refState), 209 wasBitcodeSymbol(wasBitcodeSymbol) { 210 assert(refState != RefState::Unreferenced); 211 } 212 213 bool isWeakRef() const override { return refState == RefState::Weak; } 214 215 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 216 217 RefState refState : 2; 218 bool wasBitcodeSymbol; 219 }; 220 221 // On Unix, it is traditionally allowed to write variable definitions without 222 // initialization expressions (such as "int foo;") to header files. These are 223 // called tentative definitions. 224 // 225 // Using tentative definitions is usually considered a bad practice; you should 226 // write only declarations (such as "extern int foo;") to header files. 227 // Nevertheless, the linker and the compiler have to do something to support 228 // bad code by allowing duplicate definitions for this particular case. 229 // 230 // The compiler creates common symbols when it sees tentative definitions. 231 // (You can suppress this behavior and let the compiler create a regular 232 // defined symbol by passing -fno-common. -fno-common is the default in clang 233 // as of LLVM 11.0.) When linking the final binary, if there are remaining 234 // common symbols after name resolution is complete, the linker converts them 235 // to regular defined symbols in a __common section. 236 class CommonSymbol : public Symbol { 237 public: 238 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 239 bool isPrivateExtern) 240 : Symbol(CommonKind, name, file), size(size), 241 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 242 privateExtern(isPrivateExtern) { 243 // TODO: cap maximum alignment 244 } 245 246 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 247 248 const uint64_t size; 249 const uint32_t align; 250 const bool privateExtern; 251 }; 252 253 class DylibSymbol : public Symbol { 254 public: 255 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 256 RefState refState, bool isTlv) 257 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 258 tlv(isTlv) { 259 if (file && refState > RefState::Unreferenced) 260 file->numReferencedSymbols++; 261 } 262 263 uint64_t getVA() const override; 264 bool isWeakDef() const override { return weakDef; } 265 266 // Symbols from weak libraries/frameworks are also weakly-referenced. 267 bool isWeakRef() const override { 268 return refState == RefState::Weak || 269 (file && getFile()->umbrella->forceWeakImport); 270 } 271 bool isReferenced() const { return refState != RefState::Unreferenced; } 272 bool isTlv() const override { return tlv; } 273 bool isDynamicLookup() const { return file == nullptr; } 274 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 275 276 DylibFile *getFile() const { 277 assert(!isDynamicLookup()); 278 return cast<DylibFile>(file); 279 } 280 281 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 282 283 RefState getRefState() const { return refState; } 284 285 void reference(RefState newState) { 286 assert(newState > RefState::Unreferenced); 287 if (refState == RefState::Unreferenced && file) 288 getFile()->numReferencedSymbols++; 289 refState = std::max(refState, newState); 290 } 291 292 void unreference() { 293 // dynamic_lookup symbols have no file. 294 if (refState > RefState::Unreferenced && file) { 295 assert(getFile()->numReferencedSymbols > 0); 296 getFile()->numReferencedSymbols--; 297 } 298 } 299 300 private: 301 RefState refState : 2; 302 const bool weakDef : 1; 303 const bool tlv : 1; 304 }; 305 306 class LazyArchive : public Symbol { 307 public: 308 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 309 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 310 311 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 312 void fetchArchiveMember(); 313 314 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 315 316 private: 317 const llvm::object::Archive::Symbol sym; 318 }; 319 320 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 321 // --end-lib. 322 class LazyObject : public Symbol { 323 public: 324 LazyObject(InputFile &file, StringRef name) 325 : Symbol(LazyObjectKind, name, &file) { 326 isUsedInRegularObj = false; 327 } 328 329 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 330 }; 331 332 // Represents N_INDR symbols. Note that if we are given valid, linkable inputs, 333 // then all AliasSymbol instances will be converted into one of the other Symbol 334 // types after `createAliases()` runs. 335 class AliasSymbol final : public Symbol { 336 public: 337 AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName, 338 bool isPrivateExtern) 339 : Symbol(AliasKind, name, file), privateExtern(isPrivateExtern), 340 aliasedName(aliasedName) {} 341 342 StringRef getAliasedName() const { return aliasedName; } 343 344 static bool classof(const Symbol *s) { return s->kind() == AliasKind; } 345 346 const bool privateExtern; 347 348 private: 349 StringRef aliasedName; 350 }; 351 352 union SymbolUnion { 353 alignas(Defined) char a[sizeof(Defined)]; 354 alignas(Undefined) char b[sizeof(Undefined)]; 355 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 356 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 357 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 358 alignas(LazyObject) char f[sizeof(LazyObject)]; 359 alignas(AliasSymbol) char g[sizeof(AliasSymbol)]; 360 }; 361 362 template <typename T, typename... ArgT> 363 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 364 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 365 static_assert(alignof(T) <= alignof(SymbolUnion), 366 "SymbolUnion not aligned enough"); 367 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 368 "Not a Symbol"); 369 370 bool isUsedInRegularObj = s->isUsedInRegularObj; 371 bool used = s->used; 372 T *sym = new (s) T(std::forward<ArgT>(arg)...); 373 sym->isUsedInRegularObj |= isUsedInRegularObj; 374 sym->used |= used; 375 return sym; 376 } 377 378 // Can a symbol's address only be resolved at runtime? 379 inline bool needsBinding(const Symbol *sym) { 380 if (isa<DylibSymbol>(sym)) 381 return true; 382 if (const auto *defined = dyn_cast<Defined>(sym)) 383 return defined->isExternalWeakDef() || defined->interposable; 384 return false; 385 } 386 } // namespace macho 387 388 std::string toString(const macho::Symbol &); 389 std::string toMachOString(const llvm::object::Archive::Symbol &); 390 391 } // namespace lld 392 393 #endif 394