1 //===- Symbols.h ------------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_COFF_SYMBOLS_H 10 #define LLD_COFF_SYMBOLS_H 11 12 #include "Chunks.h" 13 #include "Config.h" 14 #include "lld/Common/LLVM.h" 15 #include "lld/Common/Memory.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/Object/Archive.h" 18 #include "llvm/Object/COFF.h" 19 #include <atomic> 20 #include <memory> 21 #include <vector> 22 23 namespace lld { 24 25 std::string toString(coff::Symbol &b); 26 27 // There are two different ways to convert an Archive::Symbol to a string: 28 // One for Microsoft name mangling and one for Itanium name mangling. 29 // Call the functions toCOFFString and toELFString, not just toString. 30 std::string toCOFFString(const coff::Archive::Symbol &b); 31 32 namespace coff { 33 34 using llvm::object::Archive; 35 using llvm::object::COFFSymbolRef; 36 using llvm::object::coff_import_header; 37 using llvm::object::coff_symbol_generic; 38 39 class ArchiveFile; 40 class InputFile; 41 class ObjFile; 42 class SymbolTable; 43 44 // The base class for real symbol classes. 45 class Symbol { 46 public: 47 enum Kind { 48 // The order of these is significant. We start with the regular defined 49 // symbols as those are the most prevalent and the zero tag is the cheapest 50 // to set. Among the defined kinds, the lower the kind is preferred over 51 // the higher kind when testing whether one symbol should take precedence 52 // over another. 53 DefinedRegularKind = 0, 54 DefinedCommonKind, 55 DefinedLocalImportKind, 56 DefinedImportThunkKind, 57 DefinedImportDataKind, 58 DefinedAbsoluteKind, 59 DefinedSyntheticKind, 60 61 UndefinedKind, 62 LazyKind, 63 64 LastDefinedCOFFKind = DefinedCommonKind, 65 LastDefinedKind = DefinedSyntheticKind, 66 }; 67 68 Kind kind() const { return static_cast<Kind>(symbolKind); } 69 70 // Returns the symbol name. 71 StringRef getName(); 72 73 void replaceKeepingName(Symbol *other, size_t size); 74 75 // Returns the file from which this symbol was created. 76 InputFile *getFile(); 77 78 // Indicates that this symbol will be included in the final image. Only valid 79 // after calling markLive. 80 bool isLive() const; 81 82 protected: 83 friend SymbolTable; 84 explicit Symbol(Kind k, StringRef n = "") 85 : symbolKind(k), isExternal(true), isCOMDAT(false), 86 writtenToSymtab(false), pendingArchiveLoad(false), isGCRoot(false), 87 isRuntimePseudoReloc(false), nameSize(n.size()), 88 nameData(n.empty() ? nullptr : n.data()) {} 89 90 const unsigned symbolKind : 8; 91 unsigned isExternal : 1; 92 93 public: 94 // This bit is used by the \c DefinedRegular subclass. 95 unsigned isCOMDAT : 1; 96 97 // This bit is used by Writer::createSymbolAndStringTable() to prevent 98 // symbols from being written to the symbol table more than once. 99 unsigned writtenToSymtab : 1; 100 101 // True if this symbol was referenced by a regular (non-bitcode) object. 102 unsigned isUsedInRegularObj : 1; 103 104 // True if we've seen both a lazy and an undefined symbol with this symbol 105 // name, which means that we have enqueued an archive member load and should 106 // not load any more archive members to resolve the same symbol. 107 unsigned pendingArchiveLoad : 1; 108 109 /// True if we've already added this symbol to the list of GC roots. 110 unsigned isGCRoot : 1; 111 112 unsigned isRuntimePseudoReloc : 1; 113 114 protected: 115 // Symbol name length. Assume symbol lengths fit in a 32-bit integer. 116 uint32_t nameSize; 117 118 const char *nameData; 119 }; 120 121 // The base class for any defined symbols, including absolute symbols, 122 // etc. 123 class Defined : public Symbol { 124 public: 125 Defined(Kind k, StringRef n) : Symbol(k, n) {} 126 127 static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; } 128 129 // Returns the RVA (relative virtual address) of this symbol. The 130 // writer sets and uses RVAs. 131 uint64_t getRVA(); 132 133 // Returns the chunk containing this symbol. Absolute symbols and __ImageBase 134 // do not have chunks, so this may return null. 135 Chunk *getChunk(); 136 }; 137 138 // Symbols defined via a COFF object file or bitcode file. For COFF files, this 139 // stores a coff_symbol_generic*, and names of internal symbols are lazily 140 // loaded through that. For bitcode files, Sym is nullptr and the name is stored 141 // as a decomposed StringRef. 142 class DefinedCOFF : public Defined { 143 friend Symbol; 144 145 public: 146 DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s) 147 : Defined(k, n), file(f), sym(s) {} 148 149 static bool classof(const Symbol *s) { 150 return s->kind() <= LastDefinedCOFFKind; 151 } 152 153 InputFile *getFile() { return file; } 154 155 COFFSymbolRef getCOFFSymbol(); 156 157 InputFile *file; 158 159 protected: 160 const coff_symbol_generic *sym; 161 }; 162 163 // Regular defined symbols read from object file symbol tables. 164 class DefinedRegular : public DefinedCOFF { 165 public: 166 DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT, 167 bool isExternal = false, 168 const coff_symbol_generic *s = nullptr, 169 SectionChunk *c = nullptr) 170 : DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) { 171 this->isExternal = isExternal; 172 this->isCOMDAT = isCOMDAT; 173 } 174 175 static bool classof(const Symbol *s) { 176 return s->kind() == DefinedRegularKind; 177 } 178 179 uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; } 180 SectionChunk *getChunk() const { return *data; } 181 uint32_t getValue() const { return sym->Value; } 182 183 SectionChunk **data; 184 }; 185 186 class DefinedCommon : public DefinedCOFF { 187 public: 188 DefinedCommon(InputFile *f, StringRef n, uint64_t size, 189 const coff_symbol_generic *s = nullptr, 190 CommonChunk *c = nullptr) 191 : DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) { 192 this->isExternal = true; 193 } 194 195 static bool classof(const Symbol *s) { 196 return s->kind() == DefinedCommonKind; 197 } 198 199 uint64_t getRVA() { return data->getRVA(); } 200 CommonChunk *getChunk() { return data; } 201 202 private: 203 friend SymbolTable; 204 uint64_t getSize() const { return size; } 205 CommonChunk *data; 206 uint64_t size; 207 }; 208 209 // Absolute symbols. 210 class DefinedAbsolute : public Defined { 211 public: 212 DefinedAbsolute(StringRef n, COFFSymbolRef s) 213 : Defined(DefinedAbsoluteKind, n), va(s.getValue()) { 214 isExternal = s.isExternal(); 215 } 216 217 DefinedAbsolute(StringRef n, uint64_t v) 218 : Defined(DefinedAbsoluteKind, n), va(v) {} 219 220 static bool classof(const Symbol *s) { 221 return s->kind() == DefinedAbsoluteKind; 222 } 223 224 uint64_t getRVA() { return va - config->imageBase; } 225 void setVA(uint64_t v) { va = v; } 226 227 // Section index relocations against absolute symbols resolve to 228 // this 16 bit number, and it is the largest valid section index 229 // plus one. This variable keeps it. 230 static uint16_t numOutputSections; 231 232 private: 233 uint64_t va; 234 }; 235 236 // This symbol is used for linker-synthesized symbols like __ImageBase and 237 // __safe_se_handler_table. 238 class DefinedSynthetic : public Defined { 239 public: 240 explicit DefinedSynthetic(StringRef name, Chunk *c) 241 : Defined(DefinedSyntheticKind, name), c(c) {} 242 243 static bool classof(const Symbol *s) { 244 return s->kind() == DefinedSyntheticKind; 245 } 246 247 // A null chunk indicates that this is __ImageBase. Otherwise, this is some 248 // other synthesized chunk, like SEHTableChunk. 249 uint32_t getRVA() { return c ? c->getRVA() : 0; } 250 Chunk *getChunk() { return c; } 251 252 private: 253 Chunk *c; 254 }; 255 256 // This class represents a symbol defined in an archive file. It is 257 // created from an archive file header, and it knows how to load an 258 // object file from an archive to replace itself with a defined 259 // symbol. If the resolver finds both Undefined and Lazy for 260 // the same name, it will ask the Lazy to load a file. 261 class Lazy : public Symbol { 262 public: 263 Lazy(ArchiveFile *f, const Archive::Symbol s) 264 : Symbol(LazyKind, s.getName()), file(f), sym(s) {} 265 266 static bool classof(const Symbol *s) { return s->kind() == LazyKind; } 267 268 MemoryBufferRef getMemberBuffer(); 269 270 ArchiveFile *file; 271 272 private: 273 friend SymbolTable; 274 275 private: 276 const Archive::Symbol sym; 277 }; 278 279 // Undefined symbols. 280 class Undefined : public Symbol { 281 public: 282 explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {} 283 284 static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } 285 286 // An undefined symbol can have a fallback symbol which gives an 287 // undefined symbol a second chance if it would remain undefined. 288 // If it remains undefined, it'll be replaced with whatever the 289 // Alias pointer points to. 290 Symbol *weakAlias = nullptr; 291 292 // If this symbol is external weak, try to resolve it to a defined 293 // symbol by searching the chain of fallback symbols. Returns the symbol if 294 // successful, otherwise returns null. 295 Defined *getWeakAlias(); 296 }; 297 298 // Windows-specific classes. 299 300 // This class represents a symbol imported from a DLL. This has two 301 // names for internal use and external use. The former is used for 302 // name resolution, and the latter is used for the import descriptor 303 // table in an output. The former has "__imp_" prefix. 304 class DefinedImportData : public Defined { 305 public: 306 DefinedImportData(StringRef n, ImportFile *f) 307 : Defined(DefinedImportDataKind, n), file(f) { 308 } 309 310 static bool classof(const Symbol *s) { 311 return s->kind() == DefinedImportDataKind; 312 } 313 314 uint64_t getRVA() { return file->location->getRVA(); } 315 Chunk *getChunk() { return file->location; } 316 void setLocation(Chunk *addressTable) { file->location = addressTable; } 317 318 StringRef getDLLName() { return file->dllName; } 319 StringRef getExternalName() { return file->externalName; } 320 uint16_t getOrdinal() { return file->hdr->OrdinalHint; } 321 322 ImportFile *file; 323 }; 324 325 // This class represents a symbol for a jump table entry which jumps 326 // to a function in a DLL. Linker are supposed to create such symbols 327 // without "__imp_" prefix for all function symbols exported from 328 // DLLs, so that you can call DLL functions as regular functions with 329 // a regular name. A function pointer is given as a DefinedImportData. 330 class DefinedImportThunk : public Defined { 331 public: 332 DefinedImportThunk(StringRef name, DefinedImportData *s, uint16_t machine); 333 334 static bool classof(const Symbol *s) { 335 return s->kind() == DefinedImportThunkKind; 336 } 337 338 uint64_t getRVA() { return data->getRVA(); } 339 Chunk *getChunk() { return data; } 340 341 DefinedImportData *wrappedSym; 342 343 private: 344 Chunk *data; 345 }; 346 347 // If you have a symbol "foo" in your object file, a symbol name 348 // "__imp_foo" becomes automatically available as a pointer to "foo". 349 // This class is for such automatically-created symbols. 350 // Yes, this is an odd feature. We didn't intend to implement that. 351 // This is here just for compatibility with MSVC. 352 class DefinedLocalImport : public Defined { 353 public: 354 DefinedLocalImport(StringRef n, Defined *s) 355 : Defined(DefinedLocalImportKind, n), data(make<LocalImportChunk>(s)) {} 356 357 static bool classof(const Symbol *s) { 358 return s->kind() == DefinedLocalImportKind; 359 } 360 361 uint64_t getRVA() { return data->getRVA(); } 362 Chunk *getChunk() { return data; } 363 364 private: 365 LocalImportChunk *data; 366 }; 367 368 inline uint64_t Defined::getRVA() { 369 switch (kind()) { 370 case DefinedAbsoluteKind: 371 return cast<DefinedAbsolute>(this)->getRVA(); 372 case DefinedSyntheticKind: 373 return cast<DefinedSynthetic>(this)->getRVA(); 374 case DefinedImportDataKind: 375 return cast<DefinedImportData>(this)->getRVA(); 376 case DefinedImportThunkKind: 377 return cast<DefinedImportThunk>(this)->getRVA(); 378 case DefinedLocalImportKind: 379 return cast<DefinedLocalImport>(this)->getRVA(); 380 case DefinedCommonKind: 381 return cast<DefinedCommon>(this)->getRVA(); 382 case DefinedRegularKind: 383 return cast<DefinedRegular>(this)->getRVA(); 384 case LazyKind: 385 case UndefinedKind: 386 llvm_unreachable("Cannot get the address for an undefined symbol."); 387 } 388 llvm_unreachable("unknown symbol kind"); 389 } 390 391 inline Chunk *Defined::getChunk() { 392 switch (kind()) { 393 case DefinedRegularKind: 394 return cast<DefinedRegular>(this)->getChunk(); 395 case DefinedAbsoluteKind: 396 return nullptr; 397 case DefinedSyntheticKind: 398 return cast<DefinedSynthetic>(this)->getChunk(); 399 case DefinedImportDataKind: 400 return cast<DefinedImportData>(this)->getChunk(); 401 case DefinedImportThunkKind: 402 return cast<DefinedImportThunk>(this)->getChunk(); 403 case DefinedLocalImportKind: 404 return cast<DefinedLocalImport>(this)->getChunk(); 405 case DefinedCommonKind: 406 return cast<DefinedCommon>(this)->getChunk(); 407 case LazyKind: 408 case UndefinedKind: 409 llvm_unreachable("Cannot get the chunk of an undefined symbol."); 410 } 411 llvm_unreachable("unknown symbol kind"); 412 } 413 414 // A buffer class that is large enough to hold any Symbol-derived 415 // object. We allocate memory using this class and instantiate a symbol 416 // using the placement new. 417 union SymbolUnion { 418 alignas(DefinedRegular) char a[sizeof(DefinedRegular)]; 419 alignas(DefinedCommon) char b[sizeof(DefinedCommon)]; 420 alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)]; 421 alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)]; 422 alignas(Lazy) char e[sizeof(Lazy)]; 423 alignas(Undefined) char f[sizeof(Undefined)]; 424 alignas(DefinedImportData) char g[sizeof(DefinedImportData)]; 425 alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)]; 426 alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)]; 427 }; 428 429 template <typename T, typename... ArgT> 430 void replaceSymbol(Symbol *s, ArgT &&... arg) { 431 static_assert(std::is_trivially_destructible<T>(), 432 "Symbol types must be trivially destructible"); 433 static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small"); 434 static_assert(alignof(T) <= alignof(SymbolUnion), 435 "SymbolUnion not aligned enough"); 436 assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr && 437 "Not a Symbol"); 438 new (s) T(std::forward<ArgT>(arg)...); 439 } 440 } // namespace coff 441 442 } // namespace lld 443 444 #endif 445