1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "Config.h" 13 #include "InputFiles.h" 14 #include "Target.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Support/MathExtras.h" 19 20 namespace lld { 21 namespace macho { 22 23 class MachHeaderSection; 24 25 struct StringRefZ { 26 StringRefZ(const char *s) : data(s), size(-1) {} 27 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 28 29 const char *data; 30 const uint32_t size; 31 }; 32 33 class Symbol { 34 public: 35 enum Kind { 36 DefinedKind, 37 UndefinedKind, 38 CommonKind, 39 DylibKind, 40 LazyKind, 41 }; 42 43 virtual ~Symbol() {} 44 45 Kind kind() const { return symbolKind; } 46 47 StringRef getName() const { 48 if (nameSize == (uint32_t)-1) 49 nameSize = strlen(nameData); 50 return {nameData, nameSize}; 51 } 52 53 bool isLive() const { return used; } 54 55 virtual uint64_t getVA() const { return 0; } 56 57 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 58 59 // Only undefined or dylib symbols can be weak references. A weak reference 60 // need not be satisfied at runtime, e.g. due to the symbol not being 61 // available on a given target platform. 62 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 63 64 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 65 66 // Whether this symbol is in the GOT or TLVPointer sections. 67 bool isInGot() const { return gotIndex != UINT32_MAX; } 68 69 // Whether this symbol is in the StubsSection. 70 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 71 72 uint64_t getStubVA() const; 73 uint64_t getGotVA() const; 74 uint64_t getTlvVA() const; 75 uint64_t resolveBranchVA() const { 76 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 77 return isInStubs() ? getStubVA() : getVA(); 78 } 79 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 80 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 81 82 // The index of this symbol in the GOT or the TLVPointer section, depending 83 // on whether it is a thread-local. A given symbol cannot be referenced by 84 // both these sections at once. 85 uint32_t gotIndex = UINT32_MAX; 86 87 uint32_t stubsIndex = UINT32_MAX; 88 89 uint32_t symtabIndex = UINT32_MAX; 90 91 InputFile *getFile() const { return file; } 92 93 protected: 94 Symbol(Kind k, StringRefZ name, InputFile *file) 95 : symbolKind(k), nameData(name.data), file(file), nameSize(name.size), 96 isUsedInRegularObj(!file || isa<ObjFile>(file)), 97 used(!config->deadStrip) {} 98 99 Kind symbolKind; 100 const char *nameData; 101 InputFile *file; 102 mutable uint32_t nameSize; 103 104 public: 105 // True if this symbol was referenced by a regular (non-bitcode) object. 106 bool isUsedInRegularObj : 1; 107 108 // True if an undefined or dylib symbol is used from a live section. 109 bool used : 1; 110 }; 111 112 class Defined : public Symbol { 113 public: 114 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 115 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 116 bool isThumb, bool isReferencedDynamically, bool noDeadStrip, 117 bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false); 118 119 bool isWeakDef() const override { return weakDef; } 120 bool isExternalWeakDef() const { 121 return isWeakDef() && isExternal() && !privateExtern; 122 } 123 bool isTlv() const override; 124 125 bool isExternal() const { return external; } 126 bool isAbsolute() const { return isec == nullptr; } 127 128 uint64_t getVA() const override; 129 130 // Ensure this symbol's pointers to InputSections point to their canonical 131 // copies. 132 void canonicalize(); 133 134 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 135 136 // Place the bitfields first so that they can get placed in the tail padding 137 // of the parent class, on platforms which support it. 138 bool overridesWeakDef : 1; 139 // Whether this symbol should appear in the output binary's export trie. 140 bool privateExtern : 1; 141 // Whether this symbol should appear in the output symbol table. 142 bool includeInSymtab : 1; 143 // Only relevant when compiling for Thumb-supporting arm32 archs. 144 bool thumb : 1; 145 // Symbols marked referencedDynamically won't be removed from the output's 146 // symbol table by tools like strip. In theory, this could be set on arbitrary 147 // symbols in input object files. In practice, it's used solely for the 148 // synthetic __mh_execute_header symbol. 149 // This is information for the static linker, and it's also written to the 150 // output file's symbol table for tools running later (such as `strip`). 151 bool referencedDynamically : 1; 152 // Set on symbols that should not be removed by dead code stripping. 153 // Set for example on `__attribute__((used))` globals, or on some Objective-C 154 // metadata. This is information only for the static linker and not written 155 // to the output. 156 bool noDeadStrip : 1; 157 158 bool weakDefCanBeHidden : 1; 159 160 private: 161 const bool weakDef : 1; 162 const bool external : 1; 163 164 public: 165 InputSection *isec; 166 // Contains the offset from the containing subsection. Note that this is 167 // different from nlist::n_value, which is the absolute address of the symbol. 168 uint64_t value; 169 // size is only calculated for regular (non-bitcode) symbols. 170 uint64_t size; 171 ConcatInputSection *unwindEntry = nullptr; 172 }; 173 174 // This enum does double-duty: as a symbol property, it indicates whether & how 175 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 176 // of referenced symbols contained within the file. If there are both weak 177 // and strong references to the same file, we will count the file as 178 // strongly-referenced. 179 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 180 181 class Undefined : public Symbol { 182 public: 183 Undefined(StringRefZ name, InputFile *file, RefState refState) 184 : Symbol(UndefinedKind, name, file), refState(refState) { 185 assert(refState != RefState::Unreferenced); 186 } 187 188 bool isWeakRef() const override { return refState == RefState::Weak; } 189 190 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 191 192 RefState refState : 2; 193 }; 194 195 // On Unix, it is traditionally allowed to write variable definitions without 196 // initialization expressions (such as "int foo;") to header files. These are 197 // called tentative definitions. 198 // 199 // Using tentative definitions is usually considered a bad practice; you should 200 // write only declarations (such as "extern int foo;") to header files. 201 // Nevertheless, the linker and the compiler have to do something to support 202 // bad code by allowing duplicate definitions for this particular case. 203 // 204 // The compiler creates common symbols when it sees tentative definitions. 205 // (You can suppress this behavior and let the compiler create a regular 206 // defined symbol by passing -fno-common. -fno-common is the default in clang 207 // as of LLVM 11.0.) When linking the final binary, if there are remaining 208 // common symbols after name resolution is complete, the linker converts them 209 // to regular defined symbols in a __common section. 210 class CommonSymbol : public Symbol { 211 public: 212 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 213 bool isPrivateExtern) 214 : Symbol(CommonKind, name, file), size(size), 215 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 216 privateExtern(isPrivateExtern) { 217 // TODO: cap maximum alignment 218 } 219 220 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 221 222 const uint64_t size; 223 const uint32_t align; 224 const bool privateExtern; 225 }; 226 227 class DylibSymbol : public Symbol { 228 public: 229 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 230 RefState refState, bool isTlv) 231 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 232 tlv(isTlv) { 233 if (file && refState > RefState::Unreferenced) 234 file->numReferencedSymbols++; 235 } 236 237 uint64_t getVA() const override; 238 bool isWeakDef() const override { return weakDef; } 239 240 // Symbols from weak libraries/frameworks are also weakly-referenced. 241 bool isWeakRef() const override { 242 return refState == RefState::Weak || 243 (file && getFile()->umbrella->forceWeakImport); 244 } 245 bool isReferenced() const { return refState != RefState::Unreferenced; } 246 bool isTlv() const override { return tlv; } 247 bool isDynamicLookup() const { return file == nullptr; } 248 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 249 250 DylibFile *getFile() const { 251 assert(!isDynamicLookup()); 252 return cast<DylibFile>(file); 253 } 254 255 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 256 257 uint32_t stubsHelperIndex = UINT32_MAX; 258 uint32_t lazyBindOffset = UINT32_MAX; 259 260 RefState getRefState() const { return refState; } 261 262 void reference(RefState newState) { 263 assert(newState > RefState::Unreferenced); 264 if (refState == RefState::Unreferenced && file) 265 getFile()->numReferencedSymbols++; 266 refState = std::max(refState, newState); 267 } 268 269 void unreference() { 270 // dynamic_lookup symbols have no file. 271 if (refState > RefState::Unreferenced && file) { 272 assert(getFile()->numReferencedSymbols > 0); 273 getFile()->numReferencedSymbols--; 274 } 275 } 276 277 private: 278 RefState refState : 2; 279 const bool weakDef : 1; 280 const bool tlv : 1; 281 }; 282 283 class LazySymbol : public Symbol { 284 public: 285 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 286 : Symbol(LazyKind, sym.getName(), file), sym(sym) {} 287 288 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 289 void fetchArchiveMember(); 290 291 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 292 293 private: 294 const llvm::object::Archive::Symbol sym; 295 }; 296 297 union SymbolUnion { 298 alignas(Defined) char a[sizeof(Defined)]; 299 alignas(Undefined) char b[sizeof(Undefined)]; 300 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 301 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 302 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 303 }; 304 305 template <typename T, typename... ArgT> 306 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 307 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 308 static_assert(alignof(T) <= alignof(SymbolUnion), 309 "SymbolUnion not aligned enough"); 310 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 311 "Not a Symbol"); 312 313 bool isUsedInRegularObj = s->isUsedInRegularObj; 314 bool used = s->used; 315 T *sym = new (s) T(std::forward<ArgT>(arg)...); 316 sym->isUsedInRegularObj |= isUsedInRegularObj; 317 sym->used |= used; 318 return sym; 319 } 320 321 } // namespace macho 322 323 std::string toString(const macho::Symbol &); 324 std::string toMachOString(const llvm::object::Archive::Symbol &); 325 326 } // namespace lld 327 328 #endif 329