1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/InputFile.h" 10 11 #include "llvm/ADT/StringExtras.h" 12 #include "llvm/BinaryFormat/Magic.h" 13 #include "llvm/DebugInfo/CodeView/CodeView.h" 14 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 15 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 16 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 17 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 18 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h" 19 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h" 20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h" 21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 23 #include "llvm/DebugInfo/PDB/Native/RawError.h" 24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 25 #include "llvm/DebugInfo/PDB/PDB.h" 26 #include "llvm/Object/COFF.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/FormatVariadic.h" 29 30 using namespace llvm; 31 using namespace llvm::codeview; 32 using namespace llvm::object; 33 using namespace llvm::pdb; 34 35 InputFile::InputFile() = default; 36 InputFile::~InputFile() = default; 37 38 Expected<ModuleDebugStreamRef> 39 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName, 40 uint32_t Index) { 41 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream(); 42 if (!DbiOrErr) 43 return DbiOrErr.takeError(); 44 DbiStream &Dbi = *DbiOrErr; 45 const auto &Modules = Dbi.modules(); 46 if (Index >= Modules.getModuleCount()) 47 return make_error<RawError>(raw_error_code::index_out_of_bounds, 48 "Invalid module index"); 49 50 auto Modi = Modules.getModuleDescriptor(Index); 51 52 ModuleName = Modi.getModuleName(); 53 54 uint16_t ModiStream = Modi.getModuleStreamIndex(); 55 if (ModiStream == kInvalidStreamIndex) 56 return make_error<RawError>(raw_error_code::no_stream, 57 "Module stream not present"); 58 59 auto ModStreamData = File.createIndexedStream(ModiStream); 60 61 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 62 if (auto EC = ModS.reload()) 63 return make_error<RawError>(raw_error_code::corrupt_file, 64 "Invalid module stream"); 65 66 return std::move(ModS); 67 } 68 69 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File, 70 uint32_t Index) { 71 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream(); 72 if (!DbiOrErr) 73 return DbiOrErr.takeError(); 74 DbiStream &Dbi = *DbiOrErr; 75 const auto &Modules = Dbi.modules(); 76 auto Modi = Modules.getModuleDescriptor(Index); 77 78 uint16_t ModiStream = Modi.getModuleStreamIndex(); 79 if (ModiStream == kInvalidStreamIndex) 80 return make_error<RawError>(raw_error_code::no_stream, 81 "Module stream not present"); 82 83 auto ModStreamData = File.createIndexedStream(ModiStream); 84 85 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 86 if (Error Err = ModS.reload()) 87 return make_error<RawError>(raw_error_code::corrupt_file, 88 "Invalid module stream"); 89 90 return std::move(ModS); 91 } 92 93 static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 94 StringRef Name, 95 BinaryStreamReader &Reader) { 96 if (Expected<StringRef> NameOrErr = Section.getName()) { 97 if (*NameOrErr != Name) 98 return false; 99 } else { 100 consumeError(NameOrErr.takeError()); 101 return false; 102 } 103 104 Expected<StringRef> ContentsOrErr = Section.getContents(); 105 if (!ContentsOrErr) { 106 consumeError(ContentsOrErr.takeError()); 107 return false; 108 } 109 110 Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little); 111 uint32_t Magic; 112 if (Reader.bytesRemaining() < sizeof(uint32_t)) 113 return false; 114 cantFail(Reader.readInteger(Magic)); 115 if (Magic != COFF::DEBUG_SECTION_MAGIC) 116 return false; 117 return true; 118 } 119 120 static inline bool isDebugSSection(object::SectionRef Section, 121 DebugSubsectionArray &Subsections) { 122 BinaryStreamReader Reader; 123 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 124 return false; 125 126 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 127 return true; 128 } 129 130 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 131 BinaryStreamReader Reader; 132 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 133 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 134 return false; 135 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 136 return true; 137 } 138 139 static std::string formatChecksumKind(FileChecksumKind Kind) { 140 switch (Kind) { 141 RETURN_CASE(FileChecksumKind, None, "None"); 142 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 143 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 144 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 145 } 146 return formatUnknownEnum(Kind); 147 } 148 149 template <typename... Args> 150 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) { 151 if (Append) 152 Printer.format(std::forward<Args>(args)...); 153 else 154 Printer.formatLine(std::forward<Args>(args)...); 155 } 156 157 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 158 if (!File) 159 return; 160 161 if (File->isPdb()) 162 initializeForPdb(GroupIndex); 163 else { 164 Name = ".debug$S"; 165 uint32_t I = 0; 166 for (const auto &S : File->obj().sections()) { 167 DebugSubsectionArray SS; 168 if (!isDebugSSection(S, SS)) 169 continue; 170 171 if (!SC.hasChecksums() || !SC.hasStrings()) 172 SC.initialize(SS); 173 174 if (I == GroupIndex) 175 Subsections = SS; 176 177 if (SC.hasChecksums() && SC.hasStrings()) 178 break; 179 } 180 rebuildChecksumMap(); 181 } 182 } 183 184 StringRef SymbolGroup::name() const { return Name; } 185 186 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 187 Subsections = SS; 188 } 189 190 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 191 192 void SymbolGroup::initializeForPdb(uint32_t Modi) { 193 assert(File && File->isPdb()); 194 195 // PDB always uses the same string table, but each module has its own 196 // checksums. So we only set the strings if they're not already set. 197 if (!SC.hasStrings()) { 198 auto StringTable = File->pdb().getStringTable(); 199 if (StringTable) 200 SC.setStrings(StringTable->getStringTable()); 201 else 202 consumeError(StringTable.takeError()); 203 } 204 205 SC.resetChecksums(); 206 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 207 if (!MDS) { 208 consumeError(MDS.takeError()); 209 return; 210 } 211 212 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 213 Subsections = DebugStream->getSubsectionsArray(); 214 SC.initialize(Subsections); 215 rebuildChecksumMap(); 216 } 217 218 void SymbolGroup::rebuildChecksumMap() { 219 if (!SC.hasChecksums()) 220 return; 221 222 for (const auto &Entry : SC.checksums()) { 223 auto S = SC.strings().getString(Entry.FileNameOffset); 224 if (!S) 225 continue; 226 ChecksumsByFile[*S] = Entry; 227 } 228 } 229 230 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 231 assert(File && File->isPdb() && DebugStream); 232 return *DebugStream; 233 } 234 235 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 236 return SC.strings().getString(Offset); 237 } 238 239 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const { 240 StringRef Name; 241 if (!SC.hasChecksums()) { 242 return std::move(Name); 243 } 244 245 auto Iter = SC.checksums().getArray().at(Offset); 246 if (Iter == SC.checksums().getArray().end()) { 247 return std::move(Name); 248 } 249 250 uint32_t FO = Iter->FileNameOffset; 251 auto ExpectedFile = getNameFromStringTable(FO); 252 if (!ExpectedFile) { 253 return std::move(Name); 254 } 255 256 return *ExpectedFile; 257 } 258 259 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 260 bool Append) const { 261 auto FC = ChecksumsByFile.find(File); 262 if (FC == ChecksumsByFile.end()) { 263 formatInternal(Printer, Append, "- (no checksum) {0}", File); 264 return; 265 } 266 267 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 268 formatChecksumKind(FC->getValue().Kind), 269 toHex(FC->getValue().Checksum), File); 270 } 271 272 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 273 uint32_t Offset, 274 bool Append) const { 275 if (!SC.hasChecksums()) { 276 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 277 return; 278 } 279 280 auto Iter = SC.checksums().getArray().at(Offset); 281 if (Iter == SC.checksums().getArray().end()) { 282 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 283 return; 284 } 285 286 uint32_t FO = Iter->FileNameOffset; 287 auto ExpectedFile = getNameFromStringTable(FO); 288 if (!ExpectedFile) { 289 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 290 consumeError(ExpectedFile.takeError()); 291 return; 292 } 293 if (Iter->Kind == FileChecksumKind::None) { 294 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 295 } else { 296 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 297 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 298 } 299 } 300 301 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 302 InputFile IF; 303 if (!llvm::sys::fs::exists(Path)) 304 return make_error<StringError>(formatv("File {0} not found", Path), 305 inconvertibleErrorCode()); 306 307 file_magic Magic; 308 if (auto EC = identify_magic(Path, Magic)) 309 return make_error<StringError>( 310 formatv("Unable to identify file type for file {0}", Path), EC); 311 312 if (Magic == file_magic::coff_object) { 313 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 314 if (!BinaryOrErr) 315 return BinaryOrErr.takeError(); 316 317 IF.CoffObject = std::move(*BinaryOrErr); 318 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 319 return std::move(IF); 320 } 321 322 if (Magic == file_magic::pdb) { 323 std::unique_ptr<IPDBSession> Session; 324 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 325 return std::move(Err); 326 327 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 328 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 329 330 return std::move(IF); 331 } 332 333 if (!AllowUnknownFile) 334 return make_error<StringError>( 335 formatv("File {0} is not a supported file type", Path), 336 inconvertibleErrorCode()); 337 338 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, 339 /*RequiresNullTerminator=*/false); 340 if (!Result) 341 return make_error<StringError>( 342 formatv("File {0} could not be opened", Path), Result.getError()); 343 344 IF.UnknownFile = std::move(*Result); 345 IF.PdbOrObj = IF.UnknownFile.get(); 346 return std::move(IF); 347 } 348 349 PDBFile &InputFile::pdb() { 350 assert(isPdb()); 351 return *cast<PDBFile *>(PdbOrObj); 352 } 353 354 const PDBFile &InputFile::pdb() const { 355 assert(isPdb()); 356 return *cast<PDBFile *>(PdbOrObj); 357 } 358 359 object::COFFObjectFile &InputFile::obj() { 360 assert(isObj()); 361 return *cast<object::COFFObjectFile *>(PdbOrObj); 362 } 363 364 const object::COFFObjectFile &InputFile::obj() const { 365 assert(isObj()); 366 return *cast<object::COFFObjectFile *>(PdbOrObj); 367 } 368 369 MemoryBuffer &InputFile::unknown() { 370 assert(isUnknown()); 371 return *cast<MemoryBuffer *>(PdbOrObj); 372 } 373 374 const MemoryBuffer &InputFile::unknown() const { 375 assert(isUnknown()); 376 return *cast<MemoryBuffer *>(PdbOrObj); 377 } 378 379 StringRef InputFile::getFilePath() const { 380 if (isPdb()) 381 return pdb().getFilePath(); 382 if (isObj()) 383 return obj().getFileName(); 384 assert(isUnknown()); 385 return unknown().getBufferIdentifier(); 386 } 387 388 bool InputFile::hasTypes() const { 389 if (isPdb()) 390 return pdb().hasPDBTpiStream(); 391 392 for (const auto &Section : obj().sections()) { 393 CVTypeArray Types; 394 if (isDebugTSection(Section, Types)) 395 return true; 396 } 397 return false; 398 } 399 400 bool InputFile::hasIds() const { 401 if (isObj()) 402 return false; 403 return pdb().hasPDBIpiStream(); 404 } 405 406 bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); } 407 408 bool InputFile::isObj() const { 409 return isa<object::COFFObjectFile *>(PdbOrObj); 410 } 411 412 bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); } 413 414 codeview::LazyRandomTypeCollection & 415 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 416 if (Types && Kind == kTypes) 417 return *Types; 418 if (Ids && Kind == kIds) 419 return *Ids; 420 421 if (Kind == kIds) { 422 assert(isPdb() && pdb().hasPDBIpiStream()); 423 } 424 425 // If the collection was already initialized, we should have just returned it 426 // in step 1. 427 if (isPdb()) { 428 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 429 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 430 : pdb().getPDBTpiStream()); 431 432 auto &Array = Stream.typeArray(); 433 uint32_t Count = Stream.getNumTypeRecords(); 434 auto Offsets = Stream.getTypeIndexOffsets(); 435 Collection = 436 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 437 return *Collection; 438 } 439 440 assert(isObj()); 441 assert(Kind == kTypes); 442 assert(!Types); 443 444 for (const auto &Section : obj().sections()) { 445 CVTypeArray Records; 446 if (!isDebugTSection(Section, Records)) 447 continue; 448 449 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100); 450 return *Types; 451 } 452 453 Types = std::make_unique<LazyRandomTypeCollection>(100); 454 return *Types; 455 } 456 457 codeview::LazyRandomTypeCollection &InputFile::types() { 458 return getOrCreateTypeCollection(kTypes); 459 } 460 461 codeview::LazyRandomTypeCollection &InputFile::ids() { 462 // Object files have only one type stream that contains both types and ids. 463 // Similarly, some PDBs don't contain an IPI stream, and for those both types 464 // and IDs are in the same stream. 465 if (isObj() || !pdb().hasPDBIpiStream()) 466 return types(); 467 468 return getOrCreateTypeCollection(kIds); 469 } 470 471 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 472 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 473 symbol_groups_end()); 474 } 475 476 SymbolGroupIterator InputFile::symbol_groups_begin() { 477 return SymbolGroupIterator(*this); 478 } 479 480 SymbolGroupIterator InputFile::symbol_groups_end() { 481 return SymbolGroupIterator(); 482 } 483 484 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 485 486 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 487 if (File.isObj()) { 488 SectionIter = File.obj().section_begin(); 489 scanToNextDebugS(); 490 } 491 } 492 493 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 494 bool E = isEnd(); 495 bool RE = R.isEnd(); 496 if (E || RE) 497 return E == RE; 498 499 if (Value.File != R.Value.File) 500 return false; 501 return Index == R.Index; 502 } 503 504 const SymbolGroup &SymbolGroupIterator::operator*() const { 505 assert(!isEnd()); 506 return Value; 507 } 508 SymbolGroup &SymbolGroupIterator::operator*() { 509 assert(!isEnd()); 510 return Value; 511 } 512 513 SymbolGroupIterator &SymbolGroupIterator::operator++() { 514 assert(Value.File && !isEnd()); 515 ++Index; 516 if (isEnd()) 517 return *this; 518 519 if (Value.File->isPdb()) { 520 Value.updatePdbModi(Index); 521 return *this; 522 } 523 524 scanToNextDebugS(); 525 return *this; 526 } 527 528 void SymbolGroupIterator::scanToNextDebugS() { 529 assert(SectionIter); 530 auto End = Value.File->obj().section_end(); 531 auto &Iter = *SectionIter; 532 assert(!isEnd()); 533 534 while (++Iter != End) { 535 DebugSubsectionArray SS; 536 SectionRef SR = *Iter; 537 if (!isDebugSSection(SR, SS)) 538 continue; 539 540 Value.updateDebugS(SS); 541 return; 542 } 543 } 544 545 bool SymbolGroupIterator::isEnd() const { 546 if (!Value.File) 547 return true; 548 if (Value.File->isPdb()) { 549 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 550 uint32_t Count = Dbi.modules().getModuleCount(); 551 assert(Index <= Count); 552 return Index == Count; 553 } 554 555 assert(SectionIter); 556 return *SectionIter == Value.File->obj().section_end(); 557 } 558 559 static bool isMyCode(const SymbolGroup &Group) { 560 if (Group.getFile().isObj()) 561 return true; 562 563 StringRef Name = Group.name(); 564 if (Name.starts_with("Import:")) 565 return false; 566 if (Name.ends_with_insensitive(".dll")) 567 return false; 568 if (Name.equals_insensitive("* linker *")) 569 return false; 570 if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools")) 571 return false; 572 if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt")) 573 return false; 574 return true; 575 } 576 577 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, 578 const FilterOptions &Filters) { 579 if (Filters.JustMyCode && !isMyCode(Group)) 580 return false; 581 582 // If the arg was not specified on the command line, always dump all modules. 583 if (!Filters.DumpModi) 584 return true; 585 586 // Otherwise, only dump if this is the same module specified. 587 return (Filters.DumpModi == Idx); 588 } 589