1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_MACHO_SYMBOLS_H 10 #define LLD_MACHO_SYMBOLS_H 11 12 #include "InputFiles.h" 13 #include "InputSection.h" 14 #include "Target.h" 15 #include "lld/Common/ErrorHandler.h" 16 #include "lld/Common/Strings.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Support/MathExtras.h" 19 20 namespace lld { 21 namespace macho { 22 23 class InputSection; 24 class MachHeaderSection; 25 26 struct StringRefZ { 27 StringRefZ(const char *s) : data(s), size(-1) {} 28 StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} 29 30 const char *data; 31 const uint32_t size; 32 }; 33 34 class Symbol { 35 public: 36 enum Kind { 37 DefinedKind, 38 UndefinedKind, 39 CommonKind, 40 DylibKind, 41 LazyKind, 42 }; 43 44 virtual ~Symbol() {} 45 46 Kind kind() const { return symbolKind; } 47 48 StringRef getName() const { 49 if (nameSize == (uint32_t)-1) 50 nameSize = strlen(nameData); 51 return {nameData, nameSize}; 52 } 53 54 bool isLive() const; 55 56 virtual uint64_t getVA() const { return 0; } 57 58 virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } 59 60 // Only undefined or dylib symbols can be weak references. A weak reference 61 // need not be satisfied at runtime, e.g. due to the symbol not being 62 // available on a given target platform. 63 virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } 64 65 virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } 66 67 // Whether this symbol is in the GOT or TLVPointer sections. 68 bool isInGot() const { return gotIndex != UINT32_MAX; } 69 70 // Whether this symbol is in the StubsSection. 71 bool isInStubs() const { return stubsIndex != UINT32_MAX; } 72 73 uint64_t getStubVA() const; 74 uint64_t getGotVA() const; 75 uint64_t getTlvVA() const; 76 uint64_t resolveBranchVA() const { 77 assert(isa<Defined>(this) || isa<DylibSymbol>(this)); 78 return isInStubs() ? getStubVA() : getVA(); 79 } 80 uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); } 81 uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); } 82 83 // The index of this symbol in the GOT or the TLVPointer section, depending 84 // on whether it is a thread-local. A given symbol cannot be referenced by 85 // both these sections at once. 86 uint32_t gotIndex = UINT32_MAX; 87 88 uint32_t stubsIndex = UINT32_MAX; 89 90 uint32_t symtabIndex = UINT32_MAX; 91 92 InputFile *getFile() const { return file; } 93 94 protected: 95 Symbol(Kind k, StringRefZ name, InputFile *file) 96 : symbolKind(k), nameData(name.data), nameSize(name.size), file(file), 97 isUsedInRegularObj(!file || isa<ObjFile>(file)), 98 used(!config->deadStrip) {} 99 100 Kind symbolKind; 101 const char *nameData; 102 mutable uint32_t nameSize; 103 InputFile *file; 104 105 public: 106 // True if this symbol was referenced by a regular (non-bitcode) object. 107 bool isUsedInRegularObj : 1; 108 109 // True if an undefined or dylib symbol is used from a live section. 110 bool used : 1; 111 }; 112 113 class Defined : public Symbol { 114 public: 115 Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value, 116 uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern, 117 bool isThumb, bool isReferencedDynamically, bool noDeadStrip) 118 : Symbol(DefinedKind, name, file), isec(isec), value(value), size(size), 119 overridesWeakDef(false), privateExtern(isPrivateExtern), 120 includeInSymtab(true), thumb(isThumb), 121 referencedDynamically(isReferencedDynamically), 122 noDeadStrip(noDeadStrip), weakDef(isWeakDef), external(isExternal) { 123 if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) 124 concatIsec->numRefs++; 125 } 126 127 bool isWeakDef() const override { return weakDef; } 128 bool isExternalWeakDef() const { 129 return isWeakDef() && isExternal() && !privateExtern; 130 } 131 bool isTlv() const override { 132 return !isAbsolute() && isThreadLocalVariables(isec->getFlags()); 133 } 134 135 bool isExternal() const { return external; } 136 bool isAbsolute() const { return isec == nullptr; } 137 138 uint64_t getVA() const override; 139 140 static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } 141 142 InputSection *isec; 143 // Contains the offset from the containing subsection. Note that this is 144 // different from nlist::n_value, which is the absolute address of the symbol. 145 uint64_t value; 146 // size is only calculated for regular (non-bitcode) symbols. 147 uint64_t size; 148 149 bool overridesWeakDef : 1; 150 // Whether this symbol should appear in the output binary's export trie. 151 bool privateExtern : 1; 152 // Whether this symbol should appear in the output symbol table. 153 bool includeInSymtab : 1; 154 // Only relevant when compiling for Thumb-supporting arm32 archs. 155 bool thumb : 1; 156 // Symbols marked referencedDynamically won't be removed from the output's 157 // symbol table by tools like strip. In theory, this could be set on arbitrary 158 // symbols in input object files. In practice, it's used solely for the 159 // synthetic __mh_execute_header symbol. 160 // This is information for the static linker, and it's also written to the 161 // output file's symbol table for tools running later (such as `strip`). 162 bool referencedDynamically : 1; 163 // Set on symbols that should not be removed by dead code stripping. 164 // Set for example on `__attribute__((used))` globals, or on some Objective-C 165 // metadata. This is information only for the static linker and not written 166 // to the output. 167 bool noDeadStrip : 1; 168 169 private: 170 const bool weakDef : 1; 171 const bool external : 1; 172 }; 173 174 // This enum does double-duty: as a symbol property, it indicates whether & how 175 // a dylib symbol is referenced. As a DylibFile property, it indicates the kind 176 // of referenced symbols contained within the file. If there are both weak 177 // and strong references to the same file, we will count the file as 178 // strongly-referenced. 179 enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; 180 181 class Undefined : public Symbol { 182 public: 183 Undefined(StringRefZ name, InputFile *file, RefState refState) 184 : Symbol(UndefinedKind, name, file), refState(refState) { 185 assert(refState != RefState::Unreferenced); 186 } 187 188 bool isWeakRef() const override { return refState == RefState::Weak; } 189 190 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 191 192 RefState refState : 2; 193 }; 194 195 // On Unix, it is traditionally allowed to write variable definitions without 196 // initialization expressions (such as "int foo;") to header files. These are 197 // called tentative definitions. 198 // 199 // Using tentative definitions is usually considered a bad practice; you should 200 // write only declarations (such as "extern int foo;") to header files. 201 // Nevertheless, the linker and the compiler have to do something to support 202 // bad code by allowing duplicate definitions for this particular case. 203 // 204 // The compiler creates common symbols when it sees tentative definitions. 205 // (You can suppress this behavior and let the compiler create a regular 206 // defined symbol by passing -fno-common. -fno-common is the default in clang 207 // as of LLVM 11.0.) When linking the final binary, if there are remaining 208 // common symbols after name resolution is complete, the linker converts them 209 // to regular defined symbols in a __common section. 210 class CommonSymbol : public Symbol { 211 public: 212 CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align, 213 bool isPrivateExtern) 214 : Symbol(CommonKind, name, file), size(size), 215 align(align != 1 ? align : llvm::PowerOf2Ceil(size)), 216 privateExtern(isPrivateExtern) { 217 // TODO: cap maximum alignment 218 } 219 220 static bool classof(const Symbol *s) { return s->kind() == CommonKind; } 221 222 const uint64_t size; 223 const uint32_t align; 224 const bool privateExtern; 225 }; 226 227 class DylibSymbol : public Symbol { 228 public: 229 DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, 230 RefState refState, bool isTlv) 231 : Symbol(DylibKind, name, file), refState(refState), weakDef(isWeakDef), 232 tlv(isTlv) { 233 if (file && refState > RefState::Unreferenced) 234 file->numReferencedSymbols++; 235 } 236 237 uint64_t getVA() const override; 238 bool isWeakDef() const override { return weakDef; } 239 bool isWeakRef() const override { return refState == RefState::Weak; } 240 bool isReferenced() const { return refState != RefState::Unreferenced; } 241 bool isTlv() const override { return tlv; } 242 bool isDynamicLookup() const { return file == nullptr; } 243 bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } 244 245 DylibFile *getFile() const { 246 assert(!isDynamicLookup()); 247 return cast<DylibFile>(file); 248 } 249 250 static bool classof(const Symbol *s) { return s->kind() == DylibKind; } 251 252 uint32_t stubsHelperIndex = UINT32_MAX; 253 uint32_t lazyBindOffset = UINT32_MAX; 254 255 RefState getRefState() const { return refState; } 256 257 void reference(RefState newState) { 258 assert(newState > RefState::Unreferenced); 259 if (refState == RefState::Unreferenced && file) 260 getFile()->numReferencedSymbols++; 261 refState = std::max(refState, newState); 262 } 263 264 void unreference() { 265 // dynamic_lookup symbols have no file. 266 if (refState > RefState::Unreferenced && file) { 267 assert(getFile()->numReferencedSymbols > 0); 268 getFile()->numReferencedSymbols--; 269 } 270 } 271 272 private: 273 RefState refState : 2; 274 const bool weakDef : 1; 275 const bool tlv : 1; 276 }; 277 278 class LazySymbol : public Symbol { 279 public: 280 LazySymbol(ArchiveFile *file, const llvm::object::Archive::Symbol &sym) 281 : Symbol(LazyKind, sym.getName(), file), sym(sym) {} 282 283 ArchiveFile *getFile() const { return cast<ArchiveFile>(file); } 284 void fetchArchiveMember(); 285 286 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 287 288 private: 289 const llvm::object::Archive::Symbol sym; 290 }; 291 292 union SymbolUnion { 293 alignas(Defined) char a[sizeof(Defined)]; 294 alignas(Undefined) char b[sizeof(Undefined)]; 295 alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; 296 alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; 297 alignas(LazySymbol) char e[sizeof(LazySymbol)]; 298 }; 299 300 template <typename T, typename... ArgT> 301 T *replaceSymbol(Symbol *s, ArgT &&...arg) { 302 static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); 303 static_assert(alignof(T) <= alignof(SymbolUnion), 304 "SymbolUnion not aligned enough"); 305 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 306 "Not a Symbol"); 307 308 bool isUsedInRegularObj = s->isUsedInRegularObj; 309 bool used = s->used; 310 T *sym = new (s) T(std::forward<ArgT>(arg)...); 311 sym->isUsedInRegularObj |= isUsedInRegularObj; 312 sym->used |= used; 313 return sym; 314 } 315 316 } // namespace macho 317 318 std::string toString(const macho::Symbol &); 319 std::string toMachOString(const llvm::object::Archive::Symbol &); 320 321 } // namespace lld 322 323 #endif 324