1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Support/MathExtras.h" 19 20 namespace lld { 21 namespace macho { 22 23 class MachHeaderSection; 24 25 struct StringRefZ { 26 StringRefZ(const char *s) : data(s), size(-1) {} 27 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 28 29 const char *data; 30 const uint32_t size; 31 }; 32 33 class Symbol { 34 public: 35 enum Kind { 36 DefinedKind, 37 UndefinedKind, 38 CommonKind, 39 DylibKind, 40 LazyArchiveKind, 41 LazyObjectKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const { return used; } 55 bool isLazy() const { 56 return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind; 57 } 58 59 virtual uint64_t getVA() const { return 0; } 60 61 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 62 63 // Only undefined or dylib symbols can be weak references. A weak reference 64 // need not be satisfied at runtime, e.g. due to the symbol not being 65 // available on a given target platform. 66 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 67 68 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 69 70 // Whether this symbol is in the GOT or TLVPointer sections. 71 bool isInGot() const { return gotIndex != UINT32_MAX; } 72 73 // Whether this symbol is in the StubsSection. 74 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 75 76 uint64_t getStubVA() const; 77 uint64_t getGotVA() const; 78 uint64_t getTlvVA() const; 79 uint64_t resolveBranchVA() const { 80 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 81 return isInStubs() ? getStubVA() : getVA(); 82 } 83 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 84 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 85 86 // The index of this symbol in the GOT or the TLVPointer section, depending 87 // on whether it is a thread-local. A given symbol cannot be referenced by 88 // both these sections at once. 89 uint32_t gotIndex = UINT32_MAX; 90 91 uint32_t stubsIndex = UINT32_MAX; 92 93 uint32_t symtabIndex = UINT32_MAX; 94 95 InputFile *getFile() const { return file; } 96 97 protected: 98 Symbol(Kind k, StringRefZ name, InputFile *file) 99 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 100 isUsedInRegularObj(!file || isa<ObjFile>(file)), 101 used(!config->deadStrip) {} 102 103 Kind symbolKind; 104 const char *nameData; 105 InputFile *file; 106 mutable uint32_t nameSize; 107 108 public: 109 // True if this symbol was referenced by a regular (non-bitcode) object. 110 bool isUsedInRegularObj : 1; 111 112 // True if an undefined or dylib symbol is used from a live section. 113 bool used : 1; 114 }; 115 116 class Defined : public Symbol { 117 public: 118 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 119 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 120 bool isThumb, bool isReferencedDynamically, bool noDeadStrip, 121 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); 122 123 bool isWeakDef() const override { return weakDef; } 124 bool isExternalWeakDef() const { 125 return isWeakDef() && isExternal() && !privateExtern; 126 } 127 bool isTlv() const override; 128 129 bool isExternal() const { return external; } 130 bool isAbsolute() const { return isec == nullptr; } 131 132 uint64_t getVA() const override; 133 134 // Ensure this symbol's pointers to InputSections point to their canonical 135 // copies. 136 void canonicalize(); 137 138 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 139 140 // Place the bitfields first so that they can get placed in the tail padding 141 // of the parent class, on platforms which support it. 142 bool overridesWeakDef : 1; 143 // Whether this symbol should appear in the output binary's export trie. 144 bool privateExtern : 1; 145 // Whether this symbol should appear in the output symbol table. 146 bool includeInSymtab : 1; 147 // Only relevant when compiling for Thumb-supporting arm32 archs. 148 bool thumb : 1; 149 // Symbols marked referencedDynamically won't be removed from the output's 150 // symbol table by tools like strip. In theory, this could be set on arbitrary 151 // symbols in input object files. In practice, it's used solely for the 152 // synthetic __mh_execute_header symbol. 153 // This is information for the static linker, and it's also written to the 154 // output file's symbol table for tools running later (such as `strip`). 155 bool referencedDynamically : 1; 156 // Set on symbols that should not be removed by dead code stripping. 157 // Set for example on `__attribute__((used))` globals, or on some Objective-C 158 // metadata. This is information only for the static linker and not written 159 // to the output. 160 bool noDeadStrip : 1; 161 162 bool weakDefCanBeHidden : 1; 163 164 private: 165 const bool weakDef : 1; 166 const bool external : 1; 167 168 public: 169 InputSection *isec; 170 // Contains the offset from the containing subsection. Note that this is 171 // different from nlist::n_value, which is the absolute address of the symbol. 172 uint64_t value; 173 // size is only calculated for regular (non-bitcode) symbols. 174 uint64_t size; 175 ConcatInputSection *unwindEntry = nullptr; 176 }; 177 178 // This enum does double-duty: as a symbol property, it indicates whether & how 179 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 180 // of referenced symbols contained within the file. If there are both weak 181 // and strong references to the same file, we will count the file as 182 // strongly-referenced. 183 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 184 185 class Undefined : public Symbol { 186 public: 187 Undefined(StringRefZ name, InputFile *file, RefState refState) 188 : Symbol(UndefinedKind, name, file), refState(refState) { 189 assert(refState != RefState::Unreferenced); 190 } 191 192 bool isWeakRef() const override { return refState == RefState::Weak; } 193 194 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 195 196 RefState refState : 2; 197 }; 198 199 // On Unix, it is traditionally allowed to write variable definitions without 200 // initialization expressions (such as "int foo;") to header files. These are 201 // called tentative definitions. 202 // 203 // Using tentative definitions is usually considered a bad practice; you should 204 // write only declarations (such as "extern int foo;") to header files. 205 // Nevertheless, the linker and the compiler have to do something to support 206 // bad code by allowing duplicate definitions for this particular case. 207 // 208 // The compiler creates common symbols when it sees tentative definitions. 209 // (You can suppress this behavior and let the compiler create a regular 210 // defined symbol by passing -fno-common. -fno-common is the default in clang 211 // as of LLVM 11.0.) When linking the final binary, if there are remaining 212 // common symbols after name resolution is complete, the linker converts them 213 // to regular defined symbols in a __common section. 214 class CommonSymbol : public Symbol { 215 public: 216 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 217 bool isPrivateExtern) 218 : Symbol(CommonKind, name, file), size(size), 219 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 220 privateExtern(isPrivateExtern) { 221 // TODO: cap maximum alignment 222 } 223 224 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 225 226 const uint64_t size; 227 const uint32_t align; 228 const bool privateExtern; 229 }; 230 231 class DylibSymbol : public Symbol { 232 public: 233 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 234 RefState refState, bool isTlv) 235 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 236 tlv(isTlv) { 237 if (file && refState > RefState::Unreferenced) 238 file->numReferencedSymbols++; 239 } 240 241 uint64_t getVA() const override; 242 bool isWeakDef() const override { return weakDef; } 243 244 // Symbols from weak libraries/frameworks are also weakly-referenced. 245 bool isWeakRef() const override { 246 return refState == RefState::Weak || 247 (file && getFile()->umbrella->forceWeakImport); 248 } 249 bool isReferenced() const { return refState != RefState::Unreferenced; } 250 bool isTlv() const override { return tlv; } 251 bool isDynamicLookup() const { return file == nullptr; } 252 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 253 254 DylibFile *getFile() const { 255 assert(!isDynamicLookup()); 256 return cast<DylibFile>(file); 257 } 258 259 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 260 261 uint32_t stubsHelperIndex = UINT32_MAX; 262 uint32_t lazyBindOffset = UINT32_MAX; 263 264 RefState getRefState() const { return refState; } 265 266 void reference(RefState newState) { 267 assert(newState > RefState::Unreferenced); 268 if (refState == RefState::Unreferenced && file) 269 getFile()->numReferencedSymbols++; 270 refState = std::max(refState, newState); 271 } 272 273 void unreference() { 274 // dynamic_lookup symbols have no file. 275 if (refState > RefState::Unreferenced && file) { 276 assert(getFile()->numReferencedSymbols > 0); 277 getFile()->numReferencedSymbols--; 278 } 279 } 280 281 private: 282 RefState refState : 2; 283 const bool weakDef : 1; 284 const bool tlv : 1; 285 }; 286 287 class LazyArchive : public Symbol { 288 public: 289 LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 290 : Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {} 291 292 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 293 void fetchArchiveMember(); 294 295 static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; } 296 297 private: 298 const llvm::object::Archive::Symbol sym; 299 }; 300 301 // A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and 302 // --end-lib. 303 class LazyObject : public Symbol { 304 public: 305 LazyObject(InputFile &file, StringRef name) 306 : Symbol(LazyObjectKind, name, &file) { 307 isUsedInRegularObj = false; 308 } 309 310 static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } 311 }; 312 313 union SymbolUnion { 314 alignas(Defined) char a[sizeof(Defined)]; 315 alignas(Undefined) char b[sizeof(Undefined)]; 316 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 317 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 318 alignas(LazyArchive) char e[sizeof(LazyArchive)]; 319 alignas(LazyObject) char f[sizeof(LazyObject)]; 320 }; 321 322 template <typename T, typename... ArgT> 323 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 324 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 325 static_assert(alignof(T) <= alignof(SymbolUnion), 326 "SymbolUnion not aligned enough"); 327 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 328 "Not a Symbol"); 329 330 bool isUsedInRegularObj = s->isUsedInRegularObj; 331 bool used = s->used; 332 T *sym = new (s) T(std::forward<ArgT>(arg)...); 333 sym->isUsedInRegularObj |= isUsedInRegularObj; 334 sym->used |= used; 335 return sym; 336 } 337 338 } // namespace macho 339 340 std::string toString(const macho::Symbol &); 341 std::string toMachOString(const llvm::object::Archive::Symbol &); 342 343 } // namespace lld 344 345 #endif 346