1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/InputFile.h" 10 11 #include "llvm/BinaryFormat/Magic.h" 12 #include "llvm/DebugInfo/CodeView/CodeView.h" 13 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 14 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 15 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 16 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 17 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h" 18 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h" 19 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 20 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 22 #include "llvm/DebugInfo/PDB/Native/RawError.h" 23 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 24 #include "llvm/DebugInfo/PDB/PDB.h" 25 #include "llvm/Object/COFF.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/FormatVariadic.h" 28 29 using namespace llvm; 30 using namespace llvm::codeview; 31 using namespace llvm::object; 32 using namespace llvm::pdb; 33 34 InputFile::InputFile() = default; 35 InputFile::~InputFile() = default; 36 37 Expected<ModuleDebugStreamRef> 38 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName, 39 uint32_t Index) { 40 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream(); 41 if (!DbiOrErr) 42 return DbiOrErr.takeError(); 43 DbiStream &Dbi = *DbiOrErr; 44 const auto &Modules = Dbi.modules(); 45 if (Index >= Modules.getModuleCount()) 46 return make_error<RawError>(raw_error_code::index_out_of_bounds, 47 "Invalid module index"); 48 49 auto Modi = Modules.getModuleDescriptor(Index); 50 51 ModuleName = Modi.getModuleName(); 52 53 uint16_t ModiStream = Modi.getModuleStreamIndex(); 54 if (ModiStream == kInvalidStreamIndex) 55 return make_error<RawError>(raw_error_code::no_stream, 56 "Module stream not present"); 57 58 auto ModStreamData = File.createIndexedStream(ModiStream); 59 60 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 61 if (auto EC = ModS.reload()) 62 return make_error<RawError>(raw_error_code::corrupt_file, 63 "Invalid module stream"); 64 65 return std::move(ModS); 66 } 67 68 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File, 69 uint32_t Index) { 70 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream(); 71 if (!DbiOrErr) 72 return DbiOrErr.takeError(); 73 DbiStream &Dbi = *DbiOrErr; 74 const auto &Modules = Dbi.modules(); 75 auto Modi = Modules.getModuleDescriptor(Index); 76 77 uint16_t ModiStream = Modi.getModuleStreamIndex(); 78 if (ModiStream == kInvalidStreamIndex) 79 return make_error<RawError>(raw_error_code::no_stream, 80 "Module stream not present"); 81 82 auto ModStreamData = File.createIndexedStream(ModiStream); 83 84 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 85 if (Error Err = ModS.reload()) 86 return make_error<RawError>(raw_error_code::corrupt_file, 87 "Invalid module stream"); 88 89 return std::move(ModS); 90 } 91 92 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 93 StringRef Name, 94 BinaryStreamReader &Reader) { 95 if (Expected<StringRef> NameOrErr = Section.getName()) { 96 if (*NameOrErr != Name) 97 return false; 98 } else { 99 consumeError(NameOrErr.takeError()); 100 return false; 101 } 102 103 Expected<StringRef> ContentsOrErr = Section.getContents(); 104 if (!ContentsOrErr) { 105 consumeError(ContentsOrErr.takeError()); 106 return false; 107 } 108 109 Reader = BinaryStreamReader(*ContentsOrErr, support::little); 110 uint32_t Magic; 111 if (Reader.bytesRemaining() < sizeof(uint32_t)) 112 return false; 113 cantFail(Reader.readInteger(Magic)); 114 if (Magic != COFF::DEBUG_SECTION_MAGIC) 115 return false; 116 return true; 117 } 118 119 static inline bool isDebugSSection(object::SectionRef Section, 120 DebugSubsectionArray &Subsections) { 121 BinaryStreamReader Reader; 122 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 123 return false; 124 125 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 126 return true; 127 } 128 129 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 130 BinaryStreamReader Reader; 131 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 132 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 133 return false; 134 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 135 return true; 136 } 137 138 static std::string formatChecksumKind(FileChecksumKind Kind) { 139 switch (Kind) { 140 RETURN_CASE(FileChecksumKind, None, "None"); 141 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 142 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 143 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 144 } 145 return formatUnknownEnum(Kind); 146 } 147 148 template <typename... Args> 149 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) { 150 if (Append) 151 Printer.format(std::forward<Args>(args)...); 152 else 153 Printer.formatLine(std::forward<Args>(args)...); 154 } 155 156 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 157 if (!File) 158 return; 159 160 if (File->isPdb()) 161 initializeForPdb(GroupIndex); 162 else { 163 Name = ".debug$S"; 164 uint32_t I = 0; 165 for (const auto &S : File->obj().sections()) { 166 DebugSubsectionArray SS; 167 if (!isDebugSSection(S, SS)) 168 continue; 169 170 if (!SC.hasChecksums() || !SC.hasStrings()) 171 SC.initialize(SS); 172 173 if (I == GroupIndex) 174 Subsections = SS; 175 176 if (SC.hasChecksums() && SC.hasStrings()) 177 break; 178 } 179 rebuildChecksumMap(); 180 } 181 } 182 183 StringRef SymbolGroup::name() const { return Name; } 184 185 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 186 Subsections = SS; 187 } 188 189 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 190 191 void SymbolGroup::initializeForPdb(uint32_t Modi) { 192 assert(File && File->isPdb()); 193 194 // PDB always uses the same string table, but each module has its own 195 // checksums. So we only set the strings if they're not already set. 196 if (!SC.hasStrings()) { 197 auto StringTable = File->pdb().getStringTable(); 198 if (StringTable) 199 SC.setStrings(StringTable->getStringTable()); 200 else 201 consumeError(StringTable.takeError()); 202 } 203 204 SC.resetChecksums(); 205 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 206 if (!MDS) { 207 consumeError(MDS.takeError()); 208 return; 209 } 210 211 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 212 Subsections = DebugStream->getSubsectionsArray(); 213 SC.initialize(Subsections); 214 rebuildChecksumMap(); 215 } 216 217 void SymbolGroup::rebuildChecksumMap() { 218 if (!SC.hasChecksums()) 219 return; 220 221 for (const auto &Entry : SC.checksums()) { 222 auto S = SC.strings().getString(Entry.FileNameOffset); 223 if (!S) 224 continue; 225 ChecksumsByFile[*S] = Entry; 226 } 227 } 228 229 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 230 assert(File && File->isPdb() && DebugStream); 231 return *DebugStream; 232 } 233 234 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 235 return SC.strings().getString(Offset); 236 } 237 238 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const { 239 StringRef Name; 240 if (!SC.hasChecksums()) { 241 return std::move(Name); 242 } 243 244 auto Iter = SC.checksums().getArray().at(Offset); 245 if (Iter == SC.checksums().getArray().end()) { 246 return std::move(Name); 247 } 248 249 uint32_t FO = Iter->FileNameOffset; 250 auto ExpectedFile = getNameFromStringTable(FO); 251 if (!ExpectedFile) { 252 return std::move(Name); 253 } 254 255 return *ExpectedFile; 256 } 257 258 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 259 bool Append) const { 260 auto FC = ChecksumsByFile.find(File); 261 if (FC == ChecksumsByFile.end()) { 262 formatInternal(Printer, Append, "- (no checksum) {0}", File); 263 return; 264 } 265 266 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 267 formatChecksumKind(FC->getValue().Kind), 268 toHex(FC->getValue().Checksum), File); 269 } 270 271 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 272 uint32_t Offset, 273 bool Append) const { 274 if (!SC.hasChecksums()) { 275 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 276 return; 277 } 278 279 auto Iter = SC.checksums().getArray().at(Offset); 280 if (Iter == SC.checksums().getArray().end()) { 281 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 282 return; 283 } 284 285 uint32_t FO = Iter->FileNameOffset; 286 auto ExpectedFile = getNameFromStringTable(FO); 287 if (!ExpectedFile) { 288 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 289 consumeError(ExpectedFile.takeError()); 290 return; 291 } 292 if (Iter->Kind == FileChecksumKind::None) { 293 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 294 } else { 295 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 296 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 297 } 298 } 299 300 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 301 InputFile IF; 302 if (!llvm::sys::fs::exists(Path)) 303 return make_error<StringError>(formatv("File {0} not found", Path), 304 inconvertibleErrorCode()); 305 306 file_magic Magic; 307 if (auto EC = identify_magic(Path, Magic)) 308 return make_error<StringError>( 309 formatv("Unable to identify file type for file {0}", Path), EC); 310 311 if (Magic == file_magic::coff_object) { 312 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 313 if (!BinaryOrErr) 314 return BinaryOrErr.takeError(); 315 316 IF.CoffObject = std::move(*BinaryOrErr); 317 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 318 return std::move(IF); 319 } 320 321 if (Magic == file_magic::pdb) { 322 std::unique_ptr<IPDBSession> Session; 323 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 324 return std::move(Err); 325 326 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 327 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 328 329 return std::move(IF); 330 } 331 332 if (!AllowUnknownFile) 333 return make_error<StringError>( 334 formatv("File {0} is not a supported file type", Path), 335 inconvertibleErrorCode()); 336 337 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, 338 /*RequiresNullTerminator=*/false); 339 if (!Result) 340 return make_error<StringError>( 341 formatv("File {0} could not be opened", Path), Result.getError()); 342 343 IF.UnknownFile = std::move(*Result); 344 IF.PdbOrObj = IF.UnknownFile.get(); 345 return std::move(IF); 346 } 347 348 PDBFile &InputFile::pdb() { 349 assert(isPdb()); 350 return *PdbOrObj.get<PDBFile *>(); 351 } 352 353 const PDBFile &InputFile::pdb() const { 354 assert(isPdb()); 355 return *PdbOrObj.get<PDBFile *>(); 356 } 357 358 object::COFFObjectFile &InputFile::obj() { 359 assert(isObj()); 360 return *PdbOrObj.get<object::COFFObjectFile *>(); 361 } 362 363 const object::COFFObjectFile &InputFile::obj() const { 364 assert(isObj()); 365 return *PdbOrObj.get<object::COFFObjectFile *>(); 366 } 367 368 MemoryBuffer &InputFile::unknown() { 369 assert(isUnknown()); 370 return *PdbOrObj.get<MemoryBuffer *>(); 371 } 372 373 const MemoryBuffer &InputFile::unknown() const { 374 assert(isUnknown()); 375 return *PdbOrObj.get<MemoryBuffer *>(); 376 } 377 378 StringRef InputFile::getFilePath() const { 379 if (isPdb()) 380 return pdb().getFilePath(); 381 if (isObj()) 382 return obj().getFileName(); 383 assert(isUnknown()); 384 return unknown().getBufferIdentifier(); 385 } 386 387 bool InputFile::hasTypes() const { 388 if (isPdb()) 389 return pdb().hasPDBTpiStream(); 390 391 for (const auto &Section : obj().sections()) { 392 CVTypeArray Types; 393 if (isDebugTSection(Section, Types)) 394 return true; 395 } 396 return false; 397 } 398 399 bool InputFile::hasIds() const { 400 if (isObj()) 401 return false; 402 return pdb().hasPDBIpiStream(); 403 } 404 405 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 406 407 bool InputFile::isObj() const { 408 return PdbOrObj.is<object::COFFObjectFile *>(); 409 } 410 411 bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } 412 413 codeview::LazyRandomTypeCollection & 414 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 415 if (Types && Kind == kTypes) 416 return *Types; 417 if (Ids && Kind == kIds) 418 return *Ids; 419 420 if (Kind == kIds) { 421 assert(isPdb() && pdb().hasPDBIpiStream()); 422 } 423 424 // If the collection was already initialized, we should have just returned it 425 // in step 1. 426 if (isPdb()) { 427 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 428 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 429 : pdb().getPDBTpiStream()); 430 431 auto &Array = Stream.typeArray(); 432 uint32_t Count = Stream.getNumTypeRecords(); 433 auto Offsets = Stream.getTypeIndexOffsets(); 434 Collection = 435 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 436 return *Collection; 437 } 438 439 assert(isObj()); 440 assert(Kind == kTypes); 441 assert(!Types); 442 443 for (const auto &Section : obj().sections()) { 444 CVTypeArray Records; 445 if (!isDebugTSection(Section, Records)) 446 continue; 447 448 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100); 449 return *Types; 450 } 451 452 Types = std::make_unique<LazyRandomTypeCollection>(100); 453 return *Types; 454 } 455 456 codeview::LazyRandomTypeCollection &InputFile::types() { 457 return getOrCreateTypeCollection(kTypes); 458 } 459 460 codeview::LazyRandomTypeCollection &InputFile::ids() { 461 // Object files have only one type stream that contains both types and ids. 462 // Similarly, some PDBs don't contain an IPI stream, and for those both types 463 // and IDs are in the same stream. 464 if (isObj() || !pdb().hasPDBIpiStream()) 465 return types(); 466 467 return getOrCreateTypeCollection(kIds); 468 } 469 470 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 471 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 472 symbol_groups_end()); 473 } 474 475 SymbolGroupIterator InputFile::symbol_groups_begin() { 476 return SymbolGroupIterator(*this); 477 } 478 479 SymbolGroupIterator InputFile::symbol_groups_end() { 480 return SymbolGroupIterator(); 481 } 482 483 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 484 485 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 486 if (File.isObj()) { 487 SectionIter = File.obj().section_begin(); 488 scanToNextDebugS(); 489 } 490 } 491 492 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 493 bool E = isEnd(); 494 bool RE = R.isEnd(); 495 if (E || RE) 496 return E == RE; 497 498 if (Value.File != R.Value.File) 499 return false; 500 return Index == R.Index; 501 } 502 503 const SymbolGroup &SymbolGroupIterator::operator*() const { 504 assert(!isEnd()); 505 return Value; 506 } 507 SymbolGroup &SymbolGroupIterator::operator*() { 508 assert(!isEnd()); 509 return Value; 510 } 511 512 SymbolGroupIterator &SymbolGroupIterator::operator++() { 513 assert(Value.File && !isEnd()); 514 ++Index; 515 if (isEnd()) 516 return *this; 517 518 if (Value.File->isPdb()) { 519 Value.updatePdbModi(Index); 520 return *this; 521 } 522 523 scanToNextDebugS(); 524 return *this; 525 } 526 527 void SymbolGroupIterator::scanToNextDebugS() { 528 assert(SectionIter); 529 auto End = Value.File->obj().section_end(); 530 auto &Iter = *SectionIter; 531 assert(!isEnd()); 532 533 while (++Iter != End) { 534 DebugSubsectionArray SS; 535 SectionRef SR = *Iter; 536 if (!isDebugSSection(SR, SS)) 537 continue; 538 539 Value.updateDebugS(SS); 540 return; 541 } 542 } 543 544 bool SymbolGroupIterator::isEnd() const { 545 if (!Value.File) 546 return true; 547 if (Value.File->isPdb()) { 548 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 549 uint32_t Count = Dbi.modules().getModuleCount(); 550 assert(Index <= Count); 551 return Index == Count; 552 } 553 554 assert(SectionIter); 555 return *SectionIter == Value.File->obj().section_end(); 556 } 557 558 static bool isMyCode(const SymbolGroup &Group) { 559 if (Group.getFile().isObj()) 560 return true; 561 562 StringRef Name = Group.name(); 563 if (Name.startswith("Import:")) 564 return false; 565 if (Name.endswith_insensitive(".dll")) 566 return false; 567 if (Name.equals_insensitive("* linker *")) 568 return false; 569 if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools")) 570 return false; 571 if (Name.startswith_insensitive("f:\\dd\\vctools\\crt")) 572 return false; 573 return true; 574 } 575 576 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, 577 const FilterOptions &Filters) { 578 if (Filters.JustMyCode && !isMyCode(Group)) 579 return false; 580 581 // If the arg was not specified on the command line, always dump all modules. 582 if (!Filters.DumpModi) 583 return true; 584 585 // Otherwise, only dump if this is the same module specified. 586 return (Filters.DumpModi == Idx); 587 } 588