1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Support/MathExtras.h" 19 20 namespace lld { 21 namespace macho { 22 23 class MachHeaderSection; 24 25 struct StringRefZ { 26 StringRefZ(const char *s) : data(s), size(-1) {} 27 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 28 29 const char *data; 30 const uint32_t size; 31 }; 32 33 class Symbol { 34 public: 35 enum Kind { 36 DefinedKind, 37 UndefinedKind, 38 CommonKind, 39 DylibKind, 40 LazyArchiveKind, 41 LazyObjectKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const { return used; } 55 bool isLazy() const { 56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57 } 58 59 virtual uint64_t getVA() const { return 0; } 60 61 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 62 63 // Only undefined or dylib symbols can be weak references. A weak reference 64 // need not be satisfied at runtime, e.g. due to the symbol not being 65 // available on a given target platform. 66 virtual bool isWeakRef() const { return false; } 67 68 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 69 70 // Whether this symbol is in the GOT or TLVPointer sections. 71 bool isInGot() const { return gotIndex != UINT32_MAX; } 72 73 // Whether this symbol is in the StubsSection. 74 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75 76 uint64_t getStubVA() const; 77 uint64_t getGotVA() const; 78 uint64_t getTlvVA() const; 79 uint64_t resolveBranchVA() const { 80 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 81 return isInStubs() ? getStubVA() : getVA(); 82 } 83 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 84 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 85 86 // The index of this symbol in the GOT or the TLVPointer section, depending 87 // on whether it is a thread-local. A given symbol cannot be referenced by 88 // both these sections at once. 89 uint32_t gotIndex = UINT32_MAX; 90 uint32_t lazyBindOffset = UINT32_MAX; 91 uint32_t stubsHelperIndex = UINT32_MAX; 92 uint32_t stubsIndex = UINT32_MAX; 93 uint32_t symtabIndex = UINT32_MAX; 94 95 InputFile *getFile() const { return file; } 96 97 protected: 98 Symbol(Kind k, StringRefZ name, InputFile *file) 99 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 100 isUsedInRegularObj(!file || isa<ObjFile>(file)), 101 used(!config->deadStrip) {} 102 103 Kind symbolKind; 104 const char *nameData; 105 InputFile *file; 106 mutable uint32_t nameSize; 107 108 public: 109 // True if this symbol was referenced by a regular (non-bitcode) object. 110 bool isUsedInRegularObj : 1; 111 112 // True if this symbol is used from a live section. 113 bool used : 1; 114 }; 115 116 class Defined : public Symbol { 117 public: 118 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 119 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 120 bool includeInSymtab, bool isThumb, bool isReferencedDynamically, 121 bool noDeadStrip, bool canOverrideWeakDef = false, 122 bool isWeakDefCanBeHidden = false, bool interposable = false); 123 124 bool isWeakDef() const override { return weakDef; } 125 bool isExternalWeakDef() const { 126 return isWeakDef() && isExternal() && !privateExtern; 127 } 128 bool isTlv() const override; 129 130 bool isExternal() const { return external; } 131 bool isAbsolute() const { return isec == nullptr; } 132 133 uint64_t getVA() const override; 134 135 std::string getSourceLocation(); 136 137 // Ensure this symbol's pointers to InputSections point to their canonical 138 // copies. 139 void canonicalize(); 140 141 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 142 143 // Place the bitfields first so that they can get placed in the tail padding 144 // of the parent class, on platforms which support it. 145 bool overridesWeakDef : 1; 146 // Whether this symbol should appear in the output binary's export trie. 147 bool privateExtern : 1; 148 // Whether this symbol should appear in the output symbol table. 149 bool includeInSymtab : 1; 150 // Whether this symbol was folded into a different symbol during ICF. 151 bool wasIdenticalCodeFolded : 1; 152 // Only relevant when compiling for Thumb-supporting arm32 archs. 153 bool thumb : 1; 154 // Symbols marked referencedDynamically won't be removed from the output's 155 // symbol table by tools like strip. In theory, this could be set on arbitrary 156 // symbols in input object files. In practice, it's used solely for the 157 // synthetic __mh_execute_header symbol. 158 // This is information for the static linker, and it's also written to the 159 // output file's symbol table for tools running later (such as `strip`). 160 bool referencedDynamically : 1; 161 // Set on symbols that should not be removed by dead code stripping. 162 // Set for example on `__attribute__((used))` globals, or on some Objective-C 163 // metadata. This is information only for the static linker and not written 164 // to the output. 165 bool noDeadStrip : 1; 166 // Whether references to this symbol can be interposed at runtime to point to 167 // a different symbol definition (with the same name). For example, if both 168 // dylib A and B define an interposable symbol _foo, and we load A before B at 169 // runtime, then all references to _foo within dylib B will point to the 170 // definition in dylib A. 171 // 172 // Only extern symbols may be interposable. 173 bool interposable : 1; 174 175 bool weakDefCanBeHidden : 1; 176 177 private: 178 const bool weakDef : 1; 179 const bool external : 1; 180 181 public: 182 InputSection *isec; 183 // Contains the offset from the containing subsection. Note that this is 184 // different from nlist::n_value, which is the absolute address of the symbol. 185 uint64_t value; 186 // size is only calculated for regular (non-bitcode) symbols. 187 uint64_t size; 188 // This can be a subsection of either __compact_unwind or __eh_frame. 189 ConcatInputSection *unwindEntry = nullptr; 190 }; 191 192 // This enum does double-duty: as a symbol property, it indicates whether & how 193 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 194 // of referenced symbols contained within the file. If there are both weak 195 // and strong references to the same file, we will count the file as 196 // strongly-referenced. 197 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 198 199 class Undefined : public Symbol { 200 public: 201 Undefined(StringRefZ name, InputFile *file, RefState refState) 202 : Symbol(UndefinedKind, name, file), refState(refState) { 203 assert(refState != RefState::Unreferenced); 204 } 205 206 bool isWeakRef() const override { return refState == RefState::Weak; } 207 208 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 209 210 RefState refState : 2; 211 }; 212 213 // On Unix, it is traditionally allowed to write variable definitions without 214 // initialization expressions (such as "int foo;") to header files. These are 215 // called tentative definitions. 216 // 217 // Using tentative definitions is usually considered a bad practice; you should 218 // write only declarations (such as "extern int foo;") to header files. 219 // Nevertheless, the linker and the compiler have to do something to support 220 // bad code by allowing duplicate definitions for this particular case. 221 // 222 // The compiler creates common symbols when it sees tentative definitions. 223 // (You can suppress this behavior and let the compiler create a regular 224 // defined symbol by passing -fno-common. -fno-common is the default in clang 225 // as of LLVM 11.0.) When linking the final binary, if there are remaining 226 // common symbols after name resolution is complete, the linker converts them 227 // to regular defined symbols in a __common section. 228 class CommonSymbol : public Symbol { 229 public: 230 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 231 bool isPrivateExtern) 232 : Symbol(CommonKind, name, file), size(size), 233 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 234 privateExtern(isPrivateExtern) { 235 // TODO: cap maximum alignment 236 } 237 238 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 239 240 const uint64_t size; 241 const uint32_t align; 242 const bool privateExtern; 243 }; 244 245 class DylibSymbol : public Symbol { 246 public: 247 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 248 RefState refState, bool isTlv) 249 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 250 tlv(isTlv) { 251 if (file && refState > RefState::Unreferenced) 252 file->numReferencedSymbols++; 253 } 254 255 uint64_t getVA() const override; 256 bool isWeakDef() const override { return weakDef; } 257 258 // Symbols from weak libraries/frameworks are also weakly-referenced. 259 bool isWeakRef() const override { 260 return refState == RefState::Weak || 261 (file && getFile()->umbrella->forceWeakImport); 262 } 263 bool isReferenced() const { return refState != RefState::Unreferenced; } 264 bool isTlv() const override { return tlv; } 265 bool isDynamicLookup() const { return file == nullptr; } 266 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 267 268 DylibFile *getFile() const { 269 assert(!isDynamicLookup()); 270 return cast<DylibFile>(file); 271 } 272 273 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 274 275 RefState getRefState() const { return refState; } 276 277 void reference(RefState newState) { 278 assert(newState > RefState::Unreferenced); 279 if (refState == RefState::Unreferenced && file) 280 getFile()->numReferencedSymbols++; 281 refState = std::max(refState, newState); 282 } 283 284 void unreference() { 285 // dynamic_lookup symbols have no file. 286 if (refState > RefState::Unreferenced && file) { 287 assert(getFile()->numReferencedSymbols > 0); 288 getFile()->numReferencedSymbols--; 289 } 290 } 291 292 private: 293 RefState refState : 2; 294 const bool weakDef : 1; 295 const bool tlv : 1; 296 }; 297 298 class LazyArchive : public Symbol { 299 public: 300 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 301 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 302 303 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 304 void fetchArchiveMember(); 305 306 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 307 308 private: 309 const llvm::object::Archive::Symbol sym; 310 }; 311 312 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 313 // --end-lib. 314 class LazyObject : public Symbol { 315 public: 316 LazyObject(InputFile &file, StringRef name) 317 : Symbol(LazyObjectKind, name, &file) { 318 isUsedInRegularObj = false; 319 } 320 321 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 322 }; 323 324 union SymbolUnion { 325 alignas(Defined) char a[sizeof(Defined)]; 326 alignas(Undefined) char b[sizeof(Undefined)]; 327 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 328 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 329 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 330 alignas(LazyObject) char f[sizeof(LazyObject)]; 331 }; 332 333 template <typename T, typename... ArgT> 334 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 335 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 336 static_assert(alignof(T) <= alignof(SymbolUnion), 337 "SymbolUnion not aligned enough"); 338 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 339 "Not a Symbol"); 340 341 bool isUsedInRegularObj = s->isUsedInRegularObj; 342 bool used = s->used; 343 T *sym = new (s) T(std::forward<ArgT>(arg)...); 344 sym->isUsedInRegularObj |= isUsedInRegularObj; 345 sym->used |= used; 346 return sym; 347 } 348 349 } // namespace macho 350 351 std::string toString(const macho::Symbol &); 352 std::string toMachOString(const llvm::object::Archive::Symbol &); 353 354 } // namespace lld 355 356 #endif 357