1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/DebugInfo/MSF/MSFCommon.h" 13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 17 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" 18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/RawError.h" 21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 23 #include "llvm/Support/BinaryStream.h" 24 #include "llvm/Support/BinaryStreamArray.h" 25 #include "llvm/Support/BinaryStreamReader.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/Path.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstdint> 32 33 using namespace llvm; 34 using namespace llvm::codeview; 35 using namespace llvm::msf; 36 using namespace llvm::pdb; 37 38 namespace { 39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 40 } // end anonymous namespace 41 42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, 43 BumpPtrAllocator &Allocator) 44 : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} 45 46 PDBFile::~PDBFile() = default; 47 48 StringRef PDBFile::getFilePath() const { return FilePath; } 49 50 StringRef PDBFile::getFileDirectory() const { 51 return sys::path::parent_path(FilePath); 52 } 53 54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 55 56 uint32_t PDBFile::getFreeBlockMapBlock() const { 57 return ContainerLayout.SB->FreeBlockMapBlock; 58 } 59 60 uint32_t PDBFile::getBlockCount() const { 61 return ContainerLayout.SB->NumBlocks; 62 } 63 64 uint32_t PDBFile::getNumDirectoryBytes() const { 65 return ContainerLayout.SB->NumDirectoryBytes; 66 } 67 68 uint32_t PDBFile::getBlockMapIndex() const { 69 return ContainerLayout.SB->BlockMapAddr; 70 } 71 72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 73 74 uint32_t PDBFile::getNumDirectoryBlocks() const { 75 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 76 ContainerLayout.SB->BlockSize); 77 } 78 79 uint64_t PDBFile::getBlockMapOffset() const { 80 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 81 ContainerLayout.SB->BlockSize; 82 } 83 84 uint32_t PDBFile::getNumStreams() const { 85 return ContainerLayout.StreamSizes.size(); 86 } 87 88 uint32_t PDBFile::getMaxStreamSize() const { 89 return *std::max_element(ContainerLayout.StreamSizes.begin(), 90 ContainerLayout.StreamSizes.end()); 91 } 92 93 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 94 return ContainerLayout.StreamSizes[StreamIndex]; 95 } 96 97 ArrayRef<support::ulittle32_t> 98 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 99 return ContainerLayout.StreamMap[StreamIndex]; 100 } 101 102 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } 103 104 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 105 uint32_t NumBytes) const { 106 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 107 108 ArrayRef<uint8_t> Result; 109 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 110 return std::move(EC); 111 return Result; 112 } 113 114 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 115 ArrayRef<uint8_t> Data) const { 116 return make_error<RawError>(raw_error_code::not_writable, 117 "PDBFile is immutable"); 118 } 119 120 Error PDBFile::parseFileHeaders() { 121 BinaryStreamReader Reader(*Buffer); 122 123 // Initialize SB. 124 const msf::SuperBlock *SB = nullptr; 125 if (auto EC = Reader.readObject(SB)) { 126 consumeError(std::move(EC)); 127 return make_error<RawError>(raw_error_code::corrupt_file, 128 "MSF superblock is missing"); 129 } 130 131 if (auto EC = msf::validateSuperBlock(*SB)) 132 return EC; 133 134 if (Buffer->getLength() % SB->BlockSize != 0) 135 return make_error<RawError>(raw_error_code::corrupt_file, 136 "File size is not a multiple of block size"); 137 ContainerLayout.SB = SB; 138 139 // Initialize Free Page Map. 140 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 141 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 142 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 143 // thusly an equal number of total blocks in the file. For a block size 144 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 145 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 146 // the Fpm is split across the file at `getBlockSize()` intervals. As a 147 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 148 // for any non-negative integer k is an Fpm block. In theory, we only really 149 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 150 // current versions of the MSF format already expect the Fpm to be arranged 151 // at getBlockSize() intervals, so we have to be compatible. 152 // See the function fpmPn() for more information: 153 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 154 auto FpmStream = 155 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); 156 BinaryStreamReader FpmReader(*FpmStream); 157 ArrayRef<uint8_t> FpmBytes; 158 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining())) 159 return EC; 160 uint32_t BlocksRemaining = getBlockCount(); 161 uint32_t BI = 0; 162 for (auto Byte : FpmBytes) { 163 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 164 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 165 if (Byte & (1 << I)) 166 ContainerLayout.FreePageMap[BI] = true; 167 --BlocksRemaining; 168 ++BI; 169 } 170 } 171 172 Reader.setOffset(getBlockMapOffset()); 173 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 174 getNumDirectoryBlocks())) 175 return EC; 176 177 return Error::success(); 178 } 179 180 Error PDBFile::parseStreamData() { 181 assert(ContainerLayout.SB); 182 if (DirectoryStream) 183 return Error::success(); 184 185 uint32_t NumStreams = 0; 186 187 // Normally you can't use a MappedBlockStream without having fully parsed the 188 // PDB file, because it accesses the directory and various other things, which 189 // is exactly what we are attempting to parse. By specifying a custom 190 // subclass of IPDBStreamData which only accesses the fields that have already 191 // been parsed, we can avoid this and reuse MappedBlockStream. 192 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, 193 Allocator); 194 BinaryStreamReader Reader(*DS); 195 if (auto EC = Reader.readInteger(NumStreams)) 196 return EC; 197 198 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 199 return EC; 200 for (uint32_t I = 0; I < NumStreams; ++I) { 201 uint32_t StreamSize = getStreamByteSize(I); 202 // FIXME: What does StreamSize ~0U mean? 203 uint64_t NumExpectedStreamBlocks = 204 StreamSize == UINT32_MAX 205 ? 0 206 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 207 208 // For convenience, we store the block array contiguously. This is because 209 // if someone calls setStreamMap(), it is more convenient to be able to call 210 // it with an ArrayRef instead of setting up a StreamRef. Since the 211 // DirectoryStream is cached in the class and thus lives for the life of the 212 // class, we can be guaranteed that readArray() will return a stable 213 // reference, even if it has to allocate from its internal pool. 214 ArrayRef<support::ulittle32_t> Blocks; 215 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 216 return EC; 217 for (uint32_t Block : Blocks) { 218 uint64_t BlockEndOffset = 219 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 220 if (BlockEndOffset > getFileSize()) 221 return make_error<RawError>(raw_error_code::corrupt_file, 222 "Stream block map is corrupt."); 223 } 224 ContainerLayout.StreamMap.push_back(Blocks); 225 } 226 227 // We should have read exactly SB->NumDirectoryBytes bytes. 228 assert(Reader.bytesRemaining() == 0); 229 DirectoryStream = std::move(DS); 230 return Error::success(); 231 } 232 233 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 234 return ContainerLayout.DirectoryBlocks; 235 } 236 237 std::unique_ptr<MappedBlockStream> 238 PDBFile::createIndexedStream(uint16_t SN) const { 239 if (SN == kInvalidStreamIndex) 240 return nullptr; 241 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN, 242 Allocator); 243 } 244 245 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { 246 MSFStreamLayout Result; 247 auto Blocks = getStreamBlockList(StreamIdx); 248 Result.Blocks.assign(Blocks.begin(), Blocks.end()); 249 Result.Length = getStreamByteSize(StreamIdx); 250 return Result; 251 } 252 253 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const { 254 return msf::getFpmStreamLayout(ContainerLayout); 255 } 256 257 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 258 if (!Globals) { 259 auto DbiS = getPDBDbiStream(); 260 if (!DbiS) 261 return DbiS.takeError(); 262 263 auto GlobalS = 264 safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex()); 265 if (!GlobalS) 266 return GlobalS.takeError(); 267 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS)); 268 if (auto EC = TempGlobals->reload()) 269 return std::move(EC); 270 Globals = std::move(TempGlobals); 271 } 272 return *Globals; 273 } 274 275 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 276 if (!Info) { 277 auto InfoS = safelyCreateIndexedStream(StreamPDB); 278 if (!InfoS) 279 return InfoS.takeError(); 280 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); 281 if (auto EC = TempInfo->reload()) 282 return std::move(EC); 283 Info = std::move(TempInfo); 284 } 285 return *Info; 286 } 287 288 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 289 if (!Dbi) { 290 auto DbiS = safelyCreateIndexedStream(StreamDBI); 291 if (!DbiS) 292 return DbiS.takeError(); 293 auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS)); 294 if (auto EC = TempDbi->reload(this)) 295 return std::move(EC); 296 Dbi = std::move(TempDbi); 297 } 298 return *Dbi; 299 } 300 301 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 302 if (!Tpi) { 303 auto TpiS = safelyCreateIndexedStream(StreamTPI); 304 if (!TpiS) 305 return TpiS.takeError(); 306 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); 307 if (auto EC = TempTpi->reload()) 308 return std::move(EC); 309 Tpi = std::move(TempTpi); 310 } 311 return *Tpi; 312 } 313 314 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 315 if (!Ipi) { 316 if (!hasPDBIpiStream()) 317 return make_error<RawError>(raw_error_code::no_stream); 318 319 auto IpiS = safelyCreateIndexedStream(StreamIPI); 320 if (!IpiS) 321 return IpiS.takeError(); 322 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); 323 if (auto EC = TempIpi->reload()) 324 return std::move(EC); 325 Ipi = std::move(TempIpi); 326 } 327 return *Ipi; 328 } 329 330 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 331 if (!Publics) { 332 auto DbiS = getPDBDbiStream(); 333 if (!DbiS) 334 return DbiS.takeError(); 335 336 auto PublicS = 337 safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex()); 338 if (!PublicS) 339 return PublicS.takeError(); 340 auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS)); 341 if (auto EC = TempPublics->reload()) 342 return std::move(EC); 343 Publics = std::move(TempPublics); 344 } 345 return *Publics; 346 } 347 348 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 349 if (!Symbols) { 350 auto DbiS = getPDBDbiStream(); 351 if (!DbiS) 352 return DbiS.takeError(); 353 354 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 355 auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum); 356 if (!SymbolS) 357 return SymbolS.takeError(); 358 359 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); 360 if (auto EC = TempSymbols->reload()) 361 return std::move(EC); 362 Symbols = std::move(TempSymbols); 363 } 364 return *Symbols; 365 } 366 367 Expected<PDBStringTable &> PDBFile::getStringTable() { 368 if (!Strings) { 369 auto NS = safelyCreateNamedStream("/names"); 370 if (!NS) 371 return NS.takeError(); 372 373 auto N = llvm::make_unique<PDBStringTable>(); 374 BinaryStreamReader Reader(**NS); 375 if (auto EC = N->reload(Reader)) 376 return std::move(EC); 377 assert(Reader.bytesRemaining() == 0); 378 StringTableStream = std::move(*NS); 379 Strings = std::move(N); 380 } 381 return *Strings; 382 } 383 384 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() { 385 if (!InjectedSources) { 386 auto IJS = safelyCreateNamedStream("/src/headerblock"); 387 if (!IJS) 388 return IJS.takeError(); 389 390 auto Strings = getStringTable(); 391 if (!Strings) 392 return Strings.takeError(); 393 394 auto IJ = llvm::make_unique<InjectedSourceStream>(std::move(*IJS)); 395 if (auto EC = IJ->reload(*Strings)) 396 return std::move(EC); 397 InjectedSources = std::move(IJ); 398 } 399 return *InjectedSources; 400 } 401 402 uint32_t PDBFile::getPointerSize() { 403 auto DbiS = getPDBDbiStream(); 404 if (!DbiS) 405 return 0; 406 PDB_Machine Machine = DbiS->getMachineType(); 407 if (Machine == PDB_Machine::Amd64) 408 return 8; 409 return 4; 410 } 411 412 bool PDBFile::hasPDBDbiStream() const { 413 return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0; 414 } 415 416 bool PDBFile::hasPDBGlobalsStream() { 417 auto DbiS = getPDBDbiStream(); 418 if (!DbiS) { 419 consumeError(DbiS.takeError()); 420 return false; 421 } 422 423 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 424 } 425 426 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); } 427 428 bool PDBFile::hasPDBIpiStream() const { 429 if (!hasPDBInfoStream()) 430 return false; 431 432 if (StreamIPI >= getNumStreams()) 433 return false; 434 435 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream()); 436 return InfoStream.containsIdStream(); 437 } 438 439 bool PDBFile::hasPDBPublicsStream() { 440 auto DbiS = getPDBDbiStream(); 441 if (!DbiS) { 442 consumeError(DbiS.takeError()); 443 return false; 444 } 445 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 446 } 447 448 bool PDBFile::hasPDBSymbolStream() { 449 auto DbiS = getPDBDbiStream(); 450 if (!DbiS) 451 return false; 452 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 453 } 454 455 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 456 457 bool PDBFile::hasPDBStringTable() { 458 auto IS = getPDBInfoStream(); 459 if (!IS) 460 return false; 461 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names"); 462 if (!ExpectedNSI) { 463 consumeError(ExpectedNSI.takeError()); 464 return false; 465 } 466 assert(*ExpectedNSI < getNumStreams()); 467 return true; 468 } 469 470 bool PDBFile::hasPDBInjectedSourceStream() { 471 auto IS = getPDBInfoStream(); 472 if (!IS) 473 return false; 474 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock"); 475 if (!ExpectedNSI) { 476 consumeError(ExpectedNSI.takeError()); 477 return false; 478 } 479 assert(*ExpectedNSI < getNumStreams()); 480 return true; 481 } 482 483 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a 484 /// stream with that index actually exists. If it does not, the return value 485 /// will have an MSFError with code msf_error_code::no_stream. Else, the return 486 /// value will contain the stream returned by createIndexedStream(). 487 Expected<std::unique_ptr<MappedBlockStream>> 488 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const { 489 if (StreamIndex >= getNumStreams()) 490 // This rejects kInvalidStreamIndex with an error as well. 491 return make_error<RawError>(raw_error_code::no_stream); 492 return createIndexedStream(StreamIndex); 493 } 494 495 Expected<std::unique_ptr<MappedBlockStream>> 496 PDBFile::safelyCreateNamedStream(StringRef Name) { 497 auto IS = getPDBInfoStream(); 498 if (!IS) 499 return IS.takeError(); 500 501 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name); 502 if (!ExpectedNSI) 503 return ExpectedNSI.takeError(); 504 uint32_t NameStreamIndex = *ExpectedNSI; 505 506 return safelyCreateIndexedStream(NameStreamIndex); 507 } 508