xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp (revision 3d9fd9fcb432750f3716b28f6ccb0104cd9d351a)
1  //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/DebugInfo/PDB/Native/InputFile.h"
10  
11  #include "llvm/ADT/StringExtras.h"
12  #include "llvm/BinaryFormat/Magic.h"
13  #include "llvm/DebugInfo/CodeView/CodeView.h"
14  #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15  #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
16  #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
17  #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
18  #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
19  #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
20  #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21  #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22  #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23  #include "llvm/DebugInfo/PDB/Native/RawError.h"
24  #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25  #include "llvm/DebugInfo/PDB/PDB.h"
26  #include "llvm/Object/COFF.h"
27  #include "llvm/Support/FileSystem.h"
28  #include "llvm/Support/FormatVariadic.h"
29  
30  using namespace llvm;
31  using namespace llvm::codeview;
32  using namespace llvm::object;
33  using namespace llvm::pdb;
34  
35  InputFile::InputFile() = default;
36  InputFile::~InputFile() = default;
37  
38  Expected<ModuleDebugStreamRef>
39  llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
40                                  uint32_t Index) {
41    Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
42    if (!DbiOrErr)
43      return DbiOrErr.takeError();
44    DbiStream &Dbi = *DbiOrErr;
45    const auto &Modules = Dbi.modules();
46    if (Index >= Modules.getModuleCount())
47      return make_error<RawError>(raw_error_code::index_out_of_bounds,
48                                  "Invalid module index");
49  
50    auto Modi = Modules.getModuleDescriptor(Index);
51  
52    ModuleName = Modi.getModuleName();
53  
54    uint16_t ModiStream = Modi.getModuleStreamIndex();
55    if (ModiStream == kInvalidStreamIndex)
56      return make_error<RawError>(raw_error_code::no_stream,
57                                  "Module stream not present");
58  
59    auto ModStreamData = File.createIndexedStream(ModiStream);
60  
61    ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62    if (auto EC = ModS.reload())
63      return make_error<RawError>(raw_error_code::corrupt_file,
64                                  "Invalid module stream");
65  
66    return std::move(ModS);
67  }
68  
69  Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
70                                                                 uint32_t Index) {
71    Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
72    if (!DbiOrErr)
73      return DbiOrErr.takeError();
74    DbiStream &Dbi = *DbiOrErr;
75    const auto &Modules = Dbi.modules();
76    auto Modi = Modules.getModuleDescriptor(Index);
77  
78    uint16_t ModiStream = Modi.getModuleStreamIndex();
79    if (ModiStream == kInvalidStreamIndex)
80      return make_error<RawError>(raw_error_code::no_stream,
81                                  "Module stream not present");
82  
83    auto ModStreamData = File.createIndexedStream(ModiStream);
84  
85    ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86    if (Error Err = ModS.reload())
87      return make_error<RawError>(raw_error_code::corrupt_file,
88                                  "Invalid module stream");
89  
90    return std::move(ModS);
91  }
92  
93  static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
94                                               StringRef Name,
95                                               BinaryStreamReader &Reader) {
96    if (Expected<StringRef> NameOrErr = Section.getName()) {
97      if (*NameOrErr != Name)
98        return false;
99    } else {
100      consumeError(NameOrErr.takeError());
101      return false;
102    }
103  
104    Expected<StringRef> ContentsOrErr = Section.getContents();
105    if (!ContentsOrErr) {
106      consumeError(ContentsOrErr.takeError());
107      return false;
108    }
109  
110    Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);
111    uint32_t Magic;
112    if (Reader.bytesRemaining() < sizeof(uint32_t))
113      return false;
114    cantFail(Reader.readInteger(Magic));
115    if (Magic != COFF::DEBUG_SECTION_MAGIC)
116      return false;
117    return true;
118  }
119  
120  static inline bool isDebugSSection(object::SectionRef Section,
121                                     DebugSubsectionArray &Subsections) {
122    BinaryStreamReader Reader;
123    if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
124      return false;
125  
126    cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
127    return true;
128  }
129  
130  static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131    BinaryStreamReader Reader;
132    if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
133        !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
134      return false;
135    cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
136    return true;
137  }
138  
139  static std::string formatChecksumKind(FileChecksumKind Kind) {
140    switch (Kind) {
141      RETURN_CASE(FileChecksumKind, None, "None");
142      RETURN_CASE(FileChecksumKind, MD5, "MD5");
143      RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
144      RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
145    }
146    return formatUnknownEnum(Kind);
147  }
148  
149  template <typename... Args>
150  static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
151    if (Append)
152      Printer.format(std::forward<Args>(args)...);
153    else
154      Printer.formatLine(std::forward<Args>(args)...);
155  }
156  
157  SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
158    if (!File)
159      return;
160  
161    if (File->isPdb())
162      initializeForPdb(GroupIndex);
163    else {
164      Name = ".debug$S";
165      uint32_t I = 0;
166      for (const auto &S : File->obj().sections()) {
167        DebugSubsectionArray SS;
168        if (!isDebugSSection(S, SS))
169          continue;
170  
171        if (!SC.hasChecksums() || !SC.hasStrings())
172          SC.initialize(SS);
173  
174        if (I == GroupIndex)
175          Subsections = SS;
176  
177        if (SC.hasChecksums() && SC.hasStrings())
178          break;
179      }
180      rebuildChecksumMap();
181    }
182  }
183  
184  StringRef SymbolGroup::name() const { return Name; }
185  
186  void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
187    Subsections = SS;
188  }
189  
190  void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
191  
192  void SymbolGroup::initializeForPdb(uint32_t Modi) {
193    assert(File && File->isPdb());
194  
195    // PDB always uses the same string table, but each module has its own
196    // checksums.  So we only set the strings if they're not already set.
197    if (!SC.hasStrings()) {
198      auto StringTable = File->pdb().getStringTable();
199      if (StringTable)
200        SC.setStrings(StringTable->getStringTable());
201      else
202        consumeError(StringTable.takeError());
203    }
204  
205    SC.resetChecksums();
206    auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
207    if (!MDS) {
208      consumeError(MDS.takeError());
209      return;
210    }
211  
212    DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
213    Subsections = DebugStream->getSubsectionsArray();
214    SC.initialize(Subsections);
215    rebuildChecksumMap();
216  }
217  
218  void SymbolGroup::rebuildChecksumMap() {
219    if (!SC.hasChecksums())
220      return;
221  
222    for (const auto &Entry : SC.checksums()) {
223      auto S = SC.strings().getString(Entry.FileNameOffset);
224      if (!S)
225        continue;
226      ChecksumsByFile[*S] = Entry;
227    }
228  }
229  
230  const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
231    assert(File && File->isPdb() && DebugStream);
232    return *DebugStream;
233  }
234  
235  Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
236    return SC.strings().getString(Offset);
237  }
238  
239  Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
240    StringRef Name;
241    if (!SC.hasChecksums()) {
242      return std::move(Name);
243    }
244  
245    auto Iter = SC.checksums().getArray().at(Offset);
246    if (Iter == SC.checksums().getArray().end()) {
247      return std::move(Name);
248    }
249  
250    uint32_t FO = Iter->FileNameOffset;
251    auto ExpectedFile = getNameFromStringTable(FO);
252    if (!ExpectedFile) {
253      return std::move(Name);
254    }
255  
256    return *ExpectedFile;
257  }
258  
259  void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
260                                       bool Append) const {
261    auto FC = ChecksumsByFile.find(File);
262    if (FC == ChecksumsByFile.end()) {
263      formatInternal(Printer, Append, "- (no checksum) {0}", File);
264      return;
265    }
266  
267    formatInternal(Printer, Append, "- ({0}: {1}) {2}",
268                   formatChecksumKind(FC->getValue().Kind),
269                   toHex(FC->getValue().Checksum), File);
270  }
271  
272  void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
273                                              uint32_t Offset,
274                                              bool Append) const {
275    if (!SC.hasChecksums()) {
276      formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
277      return;
278    }
279  
280    auto Iter = SC.checksums().getArray().at(Offset);
281    if (Iter == SC.checksums().getArray().end()) {
282      formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
283      return;
284    }
285  
286    uint32_t FO = Iter->FileNameOffset;
287    auto ExpectedFile = getNameFromStringTable(FO);
288    if (!ExpectedFile) {
289      formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
290      consumeError(ExpectedFile.takeError());
291      return;
292    }
293    if (Iter->Kind == FileChecksumKind::None) {
294      formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
295    } else {
296      formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
297                     formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
298    }
299  }
300  
301  Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
302    InputFile IF;
303    if (!llvm::sys::fs::exists(Path))
304      return make_error<StringError>(formatv("File {0} not found", Path),
305                                     inconvertibleErrorCode());
306  
307    file_magic Magic;
308    if (auto EC = identify_magic(Path, Magic))
309      return make_error<StringError>(
310          formatv("Unable to identify file type for file {0}", Path), EC);
311  
312    if (Magic == file_magic::coff_object) {
313      Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
314      if (!BinaryOrErr)
315        return BinaryOrErr.takeError();
316  
317      IF.CoffObject = std::move(*BinaryOrErr);
318      IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
319      return std::move(IF);
320    }
321  
322    if (Magic == file_magic::pdb) {
323      std::unique_ptr<IPDBSession> Session;
324      if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
325        return std::move(Err);
326  
327      IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
328      IF.PdbOrObj = &IF.PdbSession->getPDBFile();
329  
330      return std::move(IF);
331    }
332  
333    if (!AllowUnknownFile)
334      return make_error<StringError>(
335          formatv("File {0} is not a supported file type", Path),
336          inconvertibleErrorCode());
337  
338    auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
339                                        /*RequiresNullTerminator=*/false);
340    if (!Result)
341      return make_error<StringError>(
342          formatv("File {0} could not be opened", Path), Result.getError());
343  
344    IF.UnknownFile = std::move(*Result);
345    IF.PdbOrObj = IF.UnknownFile.get();
346    return std::move(IF);
347  }
348  
349  PDBFile &InputFile::pdb() {
350    assert(isPdb());
351    return *cast<PDBFile *>(PdbOrObj);
352  }
353  
354  const PDBFile &InputFile::pdb() const {
355    assert(isPdb());
356    return *cast<PDBFile *>(PdbOrObj);
357  }
358  
359  object::COFFObjectFile &InputFile::obj() {
360    assert(isObj());
361    return *cast<object::COFFObjectFile *>(PdbOrObj);
362  }
363  
364  const object::COFFObjectFile &InputFile::obj() const {
365    assert(isObj());
366    return *cast<object::COFFObjectFile *>(PdbOrObj);
367  }
368  
369  MemoryBuffer &InputFile::unknown() {
370    assert(isUnknown());
371    return *cast<MemoryBuffer *>(PdbOrObj);
372  }
373  
374  const MemoryBuffer &InputFile::unknown() const {
375    assert(isUnknown());
376    return *cast<MemoryBuffer *>(PdbOrObj);
377  }
378  
379  StringRef InputFile::getFilePath() const {
380    if (isPdb())
381      return pdb().getFilePath();
382    if (isObj())
383      return obj().getFileName();
384    assert(isUnknown());
385    return unknown().getBufferIdentifier();
386  }
387  
388  bool InputFile::hasTypes() const {
389    if (isPdb())
390      return pdb().hasPDBTpiStream();
391  
392    for (const auto &Section : obj().sections()) {
393      CVTypeArray Types;
394      if (isDebugTSection(Section, Types))
395        return true;
396    }
397    return false;
398  }
399  
400  bool InputFile::hasIds() const {
401    if (isObj())
402      return false;
403    return pdb().hasPDBIpiStream();
404  }
405  
406  bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
407  
408  bool InputFile::isObj() const {
409    return isa<object::COFFObjectFile *>(PdbOrObj);
410  }
411  
412  bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
413  
414  codeview::LazyRandomTypeCollection &
415  InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416    if (Types && Kind == kTypes)
417      return *Types;
418    if (Ids && Kind == kIds)
419      return *Ids;
420  
421    if (Kind == kIds) {
422      assert(isPdb() && pdb().hasPDBIpiStream());
423    }
424  
425    // If the collection was already initialized, we should have just returned it
426    // in step 1.
427    if (isPdb()) {
428      TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429      auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
430                                             : pdb().getPDBTpiStream());
431  
432      auto &Array = Stream.typeArray();
433      uint32_t Count = Stream.getNumTypeRecords();
434      auto Offsets = Stream.getTypeIndexOffsets();
435      Collection =
436          std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
437      return *Collection;
438    }
439  
440    assert(isObj());
441    assert(Kind == kTypes);
442    assert(!Types);
443  
444    for (const auto &Section : obj().sections()) {
445      CVTypeArray Records;
446      if (!isDebugTSection(Section, Records))
447        continue;
448  
449      Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
450      return *Types;
451    }
452  
453    Types = std::make_unique<LazyRandomTypeCollection>(100);
454    return *Types;
455  }
456  
457  codeview::LazyRandomTypeCollection &InputFile::types() {
458    return getOrCreateTypeCollection(kTypes);
459  }
460  
461  codeview::LazyRandomTypeCollection &InputFile::ids() {
462    // Object files have only one type stream that contains both types and ids.
463    // Similarly, some PDBs don't contain an IPI stream, and for those both types
464    // and IDs are in the same stream.
465    if (isObj() || !pdb().hasPDBIpiStream())
466      return types();
467  
468    return getOrCreateTypeCollection(kIds);
469  }
470  
471  iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
472    return make_range<SymbolGroupIterator>(symbol_groups_begin(),
473                                           symbol_groups_end());
474  }
475  
476  SymbolGroupIterator InputFile::symbol_groups_begin() {
477    return SymbolGroupIterator(*this);
478  }
479  
480  SymbolGroupIterator InputFile::symbol_groups_end() {
481    return SymbolGroupIterator();
482  }
483  
484  SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
485  
486  SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
487    if (File.isObj()) {
488      SectionIter = File.obj().section_begin();
489      scanToNextDebugS();
490    }
491  }
492  
493  bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
494    bool E = isEnd();
495    bool RE = R.isEnd();
496    if (E || RE)
497      return E == RE;
498  
499    if (Value.File != R.Value.File)
500      return false;
501    return Index == R.Index;
502  }
503  
504  const SymbolGroup &SymbolGroupIterator::operator*() const {
505    assert(!isEnd());
506    return Value;
507  }
508  SymbolGroup &SymbolGroupIterator::operator*() {
509    assert(!isEnd());
510    return Value;
511  }
512  
513  SymbolGroupIterator &SymbolGroupIterator::operator++() {
514    assert(Value.File && !isEnd());
515    ++Index;
516    if (isEnd())
517      return *this;
518  
519    if (Value.File->isPdb()) {
520      Value.updatePdbModi(Index);
521      return *this;
522    }
523  
524    scanToNextDebugS();
525    return *this;
526  }
527  
528  void SymbolGroupIterator::scanToNextDebugS() {
529    assert(SectionIter);
530    auto End = Value.File->obj().section_end();
531    auto &Iter = *SectionIter;
532    assert(!isEnd());
533  
534    while (++Iter != End) {
535      DebugSubsectionArray SS;
536      SectionRef SR = *Iter;
537      if (!isDebugSSection(SR, SS))
538        continue;
539  
540      Value.updateDebugS(SS);
541      return;
542    }
543  }
544  
545  bool SymbolGroupIterator::isEnd() const {
546    if (!Value.File)
547      return true;
548    if (Value.File->isPdb()) {
549      DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
550      uint32_t Count = Dbi.modules().getModuleCount();
551      assert(Index <= Count);
552      return Index == Count;
553    }
554  
555    assert(SectionIter);
556    return *SectionIter == Value.File->obj().section_end();
557  }
558  
559  static bool isMyCode(const SymbolGroup &Group) {
560    if (Group.getFile().isObj())
561      return true;
562  
563    StringRef Name = Group.name();
564    if (Name.starts_with("Import:"))
565      return false;
566    if (Name.ends_with_insensitive(".dll"))
567      return false;
568    if (Name.equals_insensitive("* linker *"))
569      return false;
570    if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
571      return false;
572    if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
573      return false;
574    return true;
575  }
576  
577  bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
578                                        const FilterOptions &Filters) {
579    if (Filters.JustMyCode && !isMyCode(Group))
580      return false;
581  
582    // If the arg was not specified on the command line, always dump all modules.
583    if (!Filters.DumpModi)
584      return true;
585  
586    // Otherwise, only dump if this is the same module specified.
587    return (Filters.DumpModi == Idx);
588  }
589