xref: /freebsd/contrib/llvm-project/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp (revision c364ccf9ce2d1d0fc24247aa771cf52e5dfb532a)
1  //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
10  #include "llvm/ADT/ArrayRef.h"
11  #include "llvm/ADT/STLExtras.h"
12  #include "llvm/DebugInfo/MSF/MSFCommon.h"
13  #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
14  #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
15  #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
16  #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
17  #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h"
18  #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19  #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20  #include "llvm/DebugInfo/PDB/Native/RawError.h"
21  #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22  #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23  #include "llvm/Support/BinaryStream.h"
24  #include "llvm/Support/BinaryStreamArray.h"
25  #include "llvm/Support/BinaryStreamReader.h"
26  #include "llvm/Support/Endian.h"
27  #include "llvm/Support/Error.h"
28  #include "llvm/Support/Path.h"
29  #include <algorithm>
30  #include <cassert>
31  #include <cstdint>
32  
33  using namespace llvm;
34  using namespace llvm::codeview;
35  using namespace llvm::msf;
36  using namespace llvm::pdb;
37  
38  namespace {
39  typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40  } // end anonymous namespace
41  
42  PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43                   BumpPtrAllocator &Allocator)
44      : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45  
46  PDBFile::~PDBFile() = default;
47  
48  StringRef PDBFile::getFilePath() const { return FilePath; }
49  
50  StringRef PDBFile::getFileDirectory() const {
51    return sys::path::parent_path(FilePath);
52  }
53  
54  uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55  
56  uint32_t PDBFile::getFreeBlockMapBlock() const {
57    return ContainerLayout.SB->FreeBlockMapBlock;
58  }
59  
60  uint32_t PDBFile::getBlockCount() const {
61    return ContainerLayout.SB->NumBlocks;
62  }
63  
64  uint32_t PDBFile::getNumDirectoryBytes() const {
65    return ContainerLayout.SB->NumDirectoryBytes;
66  }
67  
68  uint32_t PDBFile::getBlockMapIndex() const {
69    return ContainerLayout.SB->BlockMapAddr;
70  }
71  
72  uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73  
74  uint32_t PDBFile::getNumDirectoryBlocks() const {
75    return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76                              ContainerLayout.SB->BlockSize);
77  }
78  
79  uint64_t PDBFile::getBlockMapOffset() const {
80    return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81           ContainerLayout.SB->BlockSize;
82  }
83  
84  uint32_t PDBFile::getNumStreams() const {
85    return ContainerLayout.StreamSizes.size();
86  }
87  
88  uint32_t PDBFile::getMaxStreamSize() const {
89    return *std::max_element(ContainerLayout.StreamSizes.begin(),
90                             ContainerLayout.StreamSizes.end());
91  }
92  
93  uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
94    return ContainerLayout.StreamSizes[StreamIndex];
95  }
96  
97  ArrayRef<support::ulittle32_t>
98  PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
99    return ContainerLayout.StreamMap[StreamIndex];
100  }
101  
102  uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103  
104  Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
105                                                    uint32_t NumBytes) const {
106    uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107  
108    ArrayRef<uint8_t> Result;
109    if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110      return std::move(EC);
111    return Result;
112  }
113  
114  Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
115                              ArrayRef<uint8_t> Data) const {
116    return make_error<RawError>(raw_error_code::not_writable,
117                                "PDBFile is immutable");
118  }
119  
120  Error PDBFile::parseFileHeaders() {
121    BinaryStreamReader Reader(*Buffer);
122  
123    // Initialize SB.
124    const msf::SuperBlock *SB = nullptr;
125    if (auto EC = Reader.readObject(SB)) {
126      consumeError(std::move(EC));
127      return make_error<RawError>(raw_error_code::corrupt_file,
128                                  "MSF superblock is missing");
129    }
130  
131    if (auto EC = msf::validateSuperBlock(*SB))
132      return EC;
133  
134    if (Buffer->getLength() % SB->BlockSize != 0)
135      return make_error<RawError>(raw_error_code::corrupt_file,
136                                  "File size is not a multiple of block size");
137    ContainerLayout.SB = SB;
138  
139    // Initialize Free Page Map.
140    ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141    // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
142    // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143    // thusly an equal number of total blocks in the file.  For a block size
144    // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145    // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
146    // the Fpm is split across the file at `getBlockSize()` intervals.  As a
147    // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148    // for any non-negative integer k is an Fpm block.  In theory, we only really
149    // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150    // current versions of the MSF format already expect the Fpm to be arranged
151    // at getBlockSize() intervals, so we have to be compatible.
152    // See the function fpmPn() for more information:
153    // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154    auto FpmStream =
155        MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156    BinaryStreamReader FpmReader(*FpmStream);
157    ArrayRef<uint8_t> FpmBytes;
158    if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159      return EC;
160    uint32_t BlocksRemaining = getBlockCount();
161    uint32_t BI = 0;
162    for (auto Byte : FpmBytes) {
163      uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164      for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165        if (Byte & (1 << I))
166          ContainerLayout.FreePageMap[BI] = true;
167        --BlocksRemaining;
168        ++BI;
169      }
170    }
171  
172    Reader.setOffset(getBlockMapOffset());
173    if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174                                   getNumDirectoryBlocks()))
175      return EC;
176  
177    return Error::success();
178  }
179  
180  Error PDBFile::parseStreamData() {
181    assert(ContainerLayout.SB);
182    if (DirectoryStream)
183      return Error::success();
184  
185    uint32_t NumStreams = 0;
186  
187    // Normally you can't use a MappedBlockStream without having fully parsed the
188    // PDB file, because it accesses the directory and various other things, which
189    // is exactly what we are attempting to parse.  By specifying a custom
190    // subclass of IPDBStreamData which only accesses the fields that have already
191    // been parsed, we can avoid this and reuse MappedBlockStream.
192    auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193                                                       Allocator);
194    BinaryStreamReader Reader(*DS);
195    if (auto EC = Reader.readInteger(NumStreams))
196      return EC;
197  
198    if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199      return EC;
200    for (uint32_t I = 0; I < NumStreams; ++I) {
201      uint32_t StreamSize = getStreamByteSize(I);
202      // FIXME: What does StreamSize ~0U mean?
203      uint64_t NumExpectedStreamBlocks =
204          StreamSize == UINT32_MAX
205              ? 0
206              : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207  
208      // For convenience, we store the block array contiguously.  This is because
209      // if someone calls setStreamMap(), it is more convenient to be able to call
210      // it with an ArrayRef instead of setting up a StreamRef.  Since the
211      // DirectoryStream is cached in the class and thus lives for the life of the
212      // class, we can be guaranteed that readArray() will return a stable
213      // reference, even if it has to allocate from its internal pool.
214      ArrayRef<support::ulittle32_t> Blocks;
215      if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216        return EC;
217      for (uint32_t Block : Blocks) {
218        uint64_t BlockEndOffset =
219            (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220        if (BlockEndOffset > getFileSize())
221          return make_error<RawError>(raw_error_code::corrupt_file,
222                                      "Stream block map is corrupt.");
223      }
224      ContainerLayout.StreamMap.push_back(Blocks);
225    }
226  
227    // We should have read exactly SB->NumDirectoryBytes bytes.
228    assert(Reader.bytesRemaining() == 0);
229    DirectoryStream = std::move(DS);
230    return Error::success();
231  }
232  
233  ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
234    return ContainerLayout.DirectoryBlocks;
235  }
236  
237  std::unique_ptr<MappedBlockStream>
238  PDBFile::createIndexedStream(uint16_t SN) const {
239    if (SN == kInvalidStreamIndex)
240      return nullptr;
241    return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
242                                                  Allocator);
243  }
244  
245  MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
246    MSFStreamLayout Result;
247    auto Blocks = getStreamBlockList(StreamIdx);
248    Result.Blocks.assign(Blocks.begin(), Blocks.end());
249    Result.Length = getStreamByteSize(StreamIdx);
250    return Result;
251  }
252  
253  msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
254    return msf::getFpmStreamLayout(ContainerLayout);
255  }
256  
257  Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
258    if (!Globals) {
259      auto DbiS = getPDBDbiStream();
260      if (!DbiS)
261        return DbiS.takeError();
262  
263      auto GlobalS =
264          safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex());
265      if (!GlobalS)
266        return GlobalS.takeError();
267      auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS));
268      if (auto EC = TempGlobals->reload())
269        return std::move(EC);
270      Globals = std::move(TempGlobals);
271    }
272    return *Globals;
273  }
274  
275  Expected<InfoStream &> PDBFile::getPDBInfoStream() {
276    if (!Info) {
277      auto InfoS = safelyCreateIndexedStream(StreamPDB);
278      if (!InfoS)
279        return InfoS.takeError();
280      auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS));
281      if (auto EC = TempInfo->reload())
282        return std::move(EC);
283      Info = std::move(TempInfo);
284    }
285    return *Info;
286  }
287  
288  Expected<DbiStream &> PDBFile::getPDBDbiStream() {
289    if (!Dbi) {
290      auto DbiS = safelyCreateIndexedStream(StreamDBI);
291      if (!DbiS)
292        return DbiS.takeError();
293      auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS));
294      if (auto EC = TempDbi->reload(this))
295        return std::move(EC);
296      Dbi = std::move(TempDbi);
297    }
298    return *Dbi;
299  }
300  
301  Expected<TpiStream &> PDBFile::getPDBTpiStream() {
302    if (!Tpi) {
303      auto TpiS = safelyCreateIndexedStream(StreamTPI);
304      if (!TpiS)
305        return TpiS.takeError();
306      auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS));
307      if (auto EC = TempTpi->reload())
308        return std::move(EC);
309      Tpi = std::move(TempTpi);
310    }
311    return *Tpi;
312  }
313  
314  Expected<TpiStream &> PDBFile::getPDBIpiStream() {
315    if (!Ipi) {
316      if (!hasPDBIpiStream())
317        return make_error<RawError>(raw_error_code::no_stream);
318  
319      auto IpiS = safelyCreateIndexedStream(StreamIPI);
320      if (!IpiS)
321        return IpiS.takeError();
322      auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS));
323      if (auto EC = TempIpi->reload())
324        return std::move(EC);
325      Ipi = std::move(TempIpi);
326    }
327    return *Ipi;
328  }
329  
330  Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
331    if (!Publics) {
332      auto DbiS = getPDBDbiStream();
333      if (!DbiS)
334        return DbiS.takeError();
335  
336      auto PublicS =
337          safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex());
338      if (!PublicS)
339        return PublicS.takeError();
340      auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS));
341      if (auto EC = TempPublics->reload())
342        return std::move(EC);
343      Publics = std::move(TempPublics);
344    }
345    return *Publics;
346  }
347  
348  Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
349    if (!Symbols) {
350      auto DbiS = getPDBDbiStream();
351      if (!DbiS)
352        return DbiS.takeError();
353  
354      uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
355      auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum);
356      if (!SymbolS)
357        return SymbolS.takeError();
358  
359      auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS));
360      if (auto EC = TempSymbols->reload())
361        return std::move(EC);
362      Symbols = std::move(TempSymbols);
363    }
364    return *Symbols;
365  }
366  
367  Expected<PDBStringTable &> PDBFile::getStringTable() {
368    if (!Strings) {
369      auto NS = safelyCreateNamedStream("/names");
370      if (!NS)
371        return NS.takeError();
372  
373      auto N = std::make_unique<PDBStringTable>();
374      BinaryStreamReader Reader(**NS);
375      if (auto EC = N->reload(Reader))
376        return std::move(EC);
377      assert(Reader.bytesRemaining() == 0);
378      StringTableStream = std::move(*NS);
379      Strings = std::move(N);
380    }
381    return *Strings;
382  }
383  
384  Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() {
385    if (!InjectedSources) {
386      auto IJS = safelyCreateNamedStream("/src/headerblock");
387      if (!IJS)
388        return IJS.takeError();
389  
390      auto Strings = getStringTable();
391      if (!Strings)
392        return Strings.takeError();
393  
394      auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS));
395      if (auto EC = IJ->reload(*Strings))
396        return std::move(EC);
397      InjectedSources = std::move(IJ);
398    }
399    return *InjectedSources;
400  }
401  
402  uint32_t PDBFile::getPointerSize() {
403    auto DbiS = getPDBDbiStream();
404    if (!DbiS)
405      return 0;
406    PDB_Machine Machine = DbiS->getMachineType();
407    if (Machine == PDB_Machine::Amd64)
408      return 8;
409    return 4;
410  }
411  
412  bool PDBFile::hasPDBDbiStream() const {
413    return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
414  }
415  
416  bool PDBFile::hasPDBGlobalsStream() {
417    auto DbiS = getPDBDbiStream();
418    if (!DbiS) {
419      consumeError(DbiS.takeError());
420      return false;
421    }
422  
423    return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
424  }
425  
426  bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
427  
428  bool PDBFile::hasPDBIpiStream() const {
429    if (!hasPDBInfoStream())
430      return false;
431  
432    if (StreamIPI >= getNumStreams())
433      return false;
434  
435    auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
436    return InfoStream.containsIdStream();
437  }
438  
439  bool PDBFile::hasPDBPublicsStream() {
440    auto DbiS = getPDBDbiStream();
441    if (!DbiS) {
442      consumeError(DbiS.takeError());
443      return false;
444    }
445    return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
446  }
447  
448  bool PDBFile::hasPDBSymbolStream() {
449    auto DbiS = getPDBDbiStream();
450    if (!DbiS)
451      return false;
452    return DbiS->getSymRecordStreamIndex() < getNumStreams();
453  }
454  
455  bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
456  
457  bool PDBFile::hasPDBStringTable() {
458    auto IS = getPDBInfoStream();
459    if (!IS)
460      return false;
461    Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
462    if (!ExpectedNSI) {
463      consumeError(ExpectedNSI.takeError());
464      return false;
465    }
466    assert(*ExpectedNSI < getNumStreams());
467    return true;
468  }
469  
470  bool PDBFile::hasPDBInjectedSourceStream() {
471    auto IS = getPDBInfoStream();
472    if (!IS)
473      return false;
474    Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock");
475    if (!ExpectedNSI) {
476      consumeError(ExpectedNSI.takeError());
477      return false;
478    }
479    assert(*ExpectedNSI < getNumStreams());
480    return true;
481  }
482  
483  /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
484  /// stream with that index actually exists.  If it does not, the return value
485  /// will have an MSFError with code msf_error_code::no_stream.  Else, the return
486  /// value will contain the stream returned by createIndexedStream().
487  Expected<std::unique_ptr<MappedBlockStream>>
488  PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const {
489    if (StreamIndex >= getNumStreams())
490      // This rejects kInvalidStreamIndex with an error as well.
491      return make_error<RawError>(raw_error_code::no_stream);
492    return createIndexedStream(StreamIndex);
493  }
494  
495  Expected<std::unique_ptr<MappedBlockStream>>
496  PDBFile::safelyCreateNamedStream(StringRef Name) {
497    auto IS = getPDBInfoStream();
498    if (!IS)
499      return IS.takeError();
500  
501    Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name);
502    if (!ExpectedNSI)
503      return ExpectedNSI.takeError();
504    uint32_t NameStreamIndex = *ExpectedNSI;
505  
506    return safelyCreateIndexedStream(NameStreamIndex);
507  }
508