1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/DebugInfo/MSF/MSFCommon.h" 12 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 13 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 14 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 15 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 16 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" 17 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 18 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 19 #include "llvm/DebugInfo/PDB/Native/RawError.h" 20 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 21 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 22 #include "llvm/Support/BinaryStream.h" 23 #include "llvm/Support/BinaryStreamArray.h" 24 #include "llvm/Support/BinaryStreamReader.h" 25 #include "llvm/Support/Endian.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/Path.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstdint> 31 32 using namespace llvm; 33 using namespace llvm::codeview; 34 using namespace llvm::msf; 35 using namespace llvm::pdb; 36 37 namespace { 38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 39 } // end anonymous namespace 40 41 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, 42 BumpPtrAllocator &Allocator) 43 : FilePath(std::string(Path)), Allocator(Allocator), 44 Buffer(std::move(PdbFileBuffer)) {} 45 46 PDBFile::~PDBFile() = default; 47 48 StringRef PDBFile::getFilePath() const { return FilePath; } 49 50 StringRef PDBFile::getFileDirectory() const { 51 return sys::path::parent_path(FilePath); 52 } 53 54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 55 56 uint32_t PDBFile::getFreeBlockMapBlock() const { 57 return ContainerLayout.SB->FreeBlockMapBlock; 58 } 59 60 uint32_t PDBFile::getBlockCount() const { 61 return ContainerLayout.SB->NumBlocks; 62 } 63 64 uint32_t PDBFile::getNumDirectoryBytes() const { 65 return ContainerLayout.SB->NumDirectoryBytes; 66 } 67 68 uint32_t PDBFile::getBlockMapIndex() const { 69 return ContainerLayout.SB->BlockMapAddr; 70 } 71 72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 73 74 uint32_t PDBFile::getNumDirectoryBlocks() const { 75 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 76 ContainerLayout.SB->BlockSize); 77 } 78 79 uint64_t PDBFile::getBlockMapOffset() const { 80 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 81 ContainerLayout.SB->BlockSize; 82 } 83 84 uint32_t PDBFile::getNumStreams() const { 85 return ContainerLayout.StreamSizes.size(); 86 } 87 88 uint32_t PDBFile::getMaxStreamSize() const { 89 return *llvm::max_element(ContainerLayout.StreamSizes); 90 } 91 92 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 93 return ContainerLayout.StreamSizes[StreamIndex]; 94 } 95 96 ArrayRef<support::ulittle32_t> 97 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 98 return ContainerLayout.StreamMap[StreamIndex]; 99 } 100 101 uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); } 102 103 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 104 uint32_t NumBytes) const { 105 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 106 107 ArrayRef<uint8_t> Result; 108 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 109 return std::move(EC); 110 return Result; 111 } 112 113 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 114 ArrayRef<uint8_t> Data) const { 115 return make_error<RawError>(raw_error_code::not_writable, 116 "PDBFile is immutable"); 117 } 118 119 Error PDBFile::parseFileHeaders() { 120 BinaryStreamReader Reader(*Buffer); 121 122 // Initialize SB. 123 const msf::SuperBlock *SB = nullptr; 124 if (auto EC = Reader.readObject(SB)) { 125 consumeError(std::move(EC)); 126 return make_error<RawError>(raw_error_code::corrupt_file, 127 "MSF superblock is missing"); 128 } 129 130 if (auto EC = msf::validateSuperBlock(*SB)) 131 return EC; 132 133 if (Buffer->getLength() % SB->BlockSize != 0) 134 return make_error<RawError>(raw_error_code::corrupt_file, 135 "File size is not a multiple of block size"); 136 ContainerLayout.SB = SB; 137 138 // Initialize Free Page Map. 139 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 140 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 141 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 142 // thusly an equal number of total blocks in the file. For a block size 143 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 144 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 145 // the Fpm is split across the file at `getBlockSize()` intervals. As a 146 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 147 // for any non-negative integer k is an Fpm block. In theory, we only really 148 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 149 // current versions of the MSF format already expect the Fpm to be arranged 150 // at getBlockSize() intervals, so we have to be compatible. 151 // See the function fpmPn() for more information: 152 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 153 auto FpmStream = 154 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); 155 BinaryStreamReader FpmReader(*FpmStream); 156 ArrayRef<uint8_t> FpmBytes; 157 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining())) 158 return EC; 159 uint32_t BlocksRemaining = getBlockCount(); 160 uint32_t BI = 0; 161 for (auto Byte : FpmBytes) { 162 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 163 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 164 if (Byte & (1 << I)) 165 ContainerLayout.FreePageMap[BI] = true; 166 --BlocksRemaining; 167 ++BI; 168 } 169 } 170 171 Reader.setOffset(getBlockMapOffset()); 172 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 173 getNumDirectoryBlocks())) 174 return EC; 175 176 return Error::success(); 177 } 178 179 Error PDBFile::parseStreamData() { 180 assert(ContainerLayout.SB); 181 if (DirectoryStream) 182 return Error::success(); 183 184 uint32_t NumStreams = 0; 185 186 // Normally you can't use a MappedBlockStream without having fully parsed the 187 // PDB file, because it accesses the directory and various other things, which 188 // is exactly what we are attempting to parse. By specifying a custom 189 // subclass of IPDBStreamData which only accesses the fields that have already 190 // been parsed, we can avoid this and reuse MappedBlockStream. 191 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, 192 Allocator); 193 BinaryStreamReader Reader(*DS); 194 if (auto EC = Reader.readInteger(NumStreams)) 195 return EC; 196 197 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 198 return EC; 199 for (uint32_t I = 0; I < NumStreams; ++I) { 200 uint32_t StreamSize = getStreamByteSize(I); 201 // FIXME: What does StreamSize ~0U mean? 202 uint64_t NumExpectedStreamBlocks = 203 StreamSize == UINT32_MAX 204 ? 0 205 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 206 207 // For convenience, we store the block array contiguously. This is because 208 // if someone calls setStreamMap(), it is more convenient to be able to call 209 // it with an ArrayRef instead of setting up a StreamRef. Since the 210 // DirectoryStream is cached in the class and thus lives for the life of the 211 // class, we can be guaranteed that readArray() will return a stable 212 // reference, even if it has to allocate from its internal pool. 213 ArrayRef<support::ulittle32_t> Blocks; 214 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 215 return EC; 216 for (uint32_t Block : Blocks) { 217 uint64_t BlockEndOffset = 218 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 219 if (BlockEndOffset > getFileSize()) 220 return make_error<RawError>(raw_error_code::corrupt_file, 221 "Stream block map is corrupt."); 222 } 223 ContainerLayout.StreamMap.push_back(Blocks); 224 } 225 226 // We should have read exactly SB->NumDirectoryBytes bytes. 227 assert(Reader.bytesRemaining() == 0); 228 DirectoryStream = std::move(DS); 229 return Error::success(); 230 } 231 232 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 233 return ContainerLayout.DirectoryBlocks; 234 } 235 236 std::unique_ptr<MappedBlockStream> 237 PDBFile::createIndexedStream(uint16_t SN) const { 238 if (SN == kInvalidStreamIndex) 239 return nullptr; 240 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN, 241 Allocator); 242 } 243 244 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { 245 MSFStreamLayout Result; 246 auto Blocks = getStreamBlockList(StreamIdx); 247 Result.Blocks.assign(Blocks.begin(), Blocks.end()); 248 Result.Length = getStreamByteSize(StreamIdx); 249 return Result; 250 } 251 252 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const { 253 return msf::getFpmStreamLayout(ContainerLayout); 254 } 255 256 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 257 if (!Globals) { 258 auto DbiS = getPDBDbiStream(); 259 if (!DbiS) 260 return DbiS.takeError(); 261 262 auto GlobalS = 263 safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex()); 264 if (!GlobalS) 265 return GlobalS.takeError(); 266 auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS)); 267 if (auto EC = TempGlobals->reload()) 268 return std::move(EC); 269 Globals = std::move(TempGlobals); 270 } 271 return *Globals; 272 } 273 274 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 275 if (!Info) { 276 auto InfoS = safelyCreateIndexedStream(StreamPDB); 277 if (!InfoS) 278 return InfoS.takeError(); 279 auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS)); 280 if (auto EC = TempInfo->reload()) 281 return std::move(EC); 282 Info = std::move(TempInfo); 283 } 284 return *Info; 285 } 286 287 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 288 if (!Dbi) { 289 auto DbiS = safelyCreateIndexedStream(StreamDBI); 290 if (!DbiS) 291 return DbiS.takeError(); 292 auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS)); 293 if (auto EC = TempDbi->reload(this)) 294 return std::move(EC); 295 Dbi = std::move(TempDbi); 296 } 297 return *Dbi; 298 } 299 300 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 301 if (!Tpi) { 302 auto TpiS = safelyCreateIndexedStream(StreamTPI); 303 if (!TpiS) 304 return TpiS.takeError(); 305 auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS)); 306 if (auto EC = TempTpi->reload()) 307 return std::move(EC); 308 Tpi = std::move(TempTpi); 309 } 310 return *Tpi; 311 } 312 313 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 314 if (!Ipi) { 315 if (!hasPDBIpiStream()) 316 return make_error<RawError>(raw_error_code::no_stream); 317 318 auto IpiS = safelyCreateIndexedStream(StreamIPI); 319 if (!IpiS) 320 return IpiS.takeError(); 321 auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS)); 322 if (auto EC = TempIpi->reload()) 323 return std::move(EC); 324 Ipi = std::move(TempIpi); 325 } 326 return *Ipi; 327 } 328 329 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 330 if (!Publics) { 331 auto DbiS = getPDBDbiStream(); 332 if (!DbiS) 333 return DbiS.takeError(); 334 335 auto PublicS = 336 safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex()); 337 if (!PublicS) 338 return PublicS.takeError(); 339 auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS)); 340 if (auto EC = TempPublics->reload()) 341 return std::move(EC); 342 Publics = std::move(TempPublics); 343 } 344 return *Publics; 345 } 346 347 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 348 if (!Symbols) { 349 auto DbiS = getPDBDbiStream(); 350 if (!DbiS) 351 return DbiS.takeError(); 352 353 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 354 auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum); 355 if (!SymbolS) 356 return SymbolS.takeError(); 357 358 auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS)); 359 if (auto EC = TempSymbols->reload()) 360 return std::move(EC); 361 Symbols = std::move(TempSymbols); 362 } 363 return *Symbols; 364 } 365 366 Expected<PDBStringTable &> PDBFile::getStringTable() { 367 if (!Strings) { 368 auto NS = safelyCreateNamedStream("/names"); 369 if (!NS) 370 return NS.takeError(); 371 372 auto N = std::make_unique<PDBStringTable>(); 373 BinaryStreamReader Reader(**NS); 374 if (auto EC = N->reload(Reader)) 375 return std::move(EC); 376 assert(Reader.bytesRemaining() == 0); 377 StringTableStream = std::move(*NS); 378 Strings = std::move(N); 379 } 380 return *Strings; 381 } 382 383 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() { 384 if (!InjectedSources) { 385 auto IJS = safelyCreateNamedStream("/src/headerblock"); 386 if (!IJS) 387 return IJS.takeError(); 388 389 auto Strings = getStringTable(); 390 if (!Strings) 391 return Strings.takeError(); 392 393 auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS)); 394 if (auto EC = IJ->reload(*Strings)) 395 return std::move(EC); 396 InjectedSources = std::move(IJ); 397 } 398 return *InjectedSources; 399 } 400 401 uint32_t PDBFile::getPointerSize() { 402 auto DbiS = getPDBDbiStream(); 403 if (!DbiS) 404 return 0; 405 PDB_Machine Machine = DbiS->getMachineType(); 406 if (Machine == PDB_Machine::Amd64) 407 return 8; 408 return 4; 409 } 410 411 bool PDBFile::hasPDBDbiStream() const { 412 return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0; 413 } 414 415 bool PDBFile::hasPDBGlobalsStream() { 416 auto DbiS = getPDBDbiStream(); 417 if (!DbiS) { 418 consumeError(DbiS.takeError()); 419 return false; 420 } 421 422 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 423 } 424 425 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); } 426 427 bool PDBFile::hasPDBIpiStream() const { 428 if (!hasPDBInfoStream()) 429 return false; 430 431 if (StreamIPI >= getNumStreams()) 432 return false; 433 434 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream()); 435 return InfoStream.containsIdStream(); 436 } 437 438 bool PDBFile::hasPDBPublicsStream() { 439 auto DbiS = getPDBDbiStream(); 440 if (!DbiS) { 441 consumeError(DbiS.takeError()); 442 return false; 443 } 444 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 445 } 446 447 bool PDBFile::hasPDBSymbolStream() { 448 auto DbiS = getPDBDbiStream(); 449 if (!DbiS) 450 return false; 451 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 452 } 453 454 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 455 456 bool PDBFile::hasPDBStringTable() { 457 auto IS = getPDBInfoStream(); 458 if (!IS) 459 return false; 460 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names"); 461 if (!ExpectedNSI) { 462 consumeError(ExpectedNSI.takeError()); 463 return false; 464 } 465 assert(*ExpectedNSI < getNumStreams()); 466 return true; 467 } 468 469 bool PDBFile::hasPDBInjectedSourceStream() { 470 auto IS = getPDBInfoStream(); 471 if (!IS) 472 return false; 473 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock"); 474 if (!ExpectedNSI) { 475 consumeError(ExpectedNSI.takeError()); 476 return false; 477 } 478 assert(*ExpectedNSI < getNumStreams()); 479 return true; 480 } 481 482 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a 483 /// stream with that index actually exists. If it does not, the return value 484 /// will have an MSFError with code msf_error_code::no_stream. Else, the return 485 /// value will contain the stream returned by createIndexedStream(). 486 Expected<std::unique_ptr<MappedBlockStream>> 487 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const { 488 if (StreamIndex >= getNumStreams()) 489 // This rejects kInvalidStreamIndex with an error as well. 490 return make_error<RawError>(raw_error_code::no_stream); 491 return createIndexedStream(StreamIndex); 492 } 493 494 Expected<std::unique_ptr<MappedBlockStream>> 495 PDBFile::safelyCreateNamedStream(StringRef Name) { 496 auto IS = getPDBInfoStream(); 497 if (!IS) 498 return IS.takeError(); 499 500 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name); 501 if (!ExpectedNSI) 502 return ExpectedNSI.takeError(); 503 uint32_t NameStreamIndex = *ExpectedNSI; 504 505 return safelyCreateIndexedStream(NameStreamIndex); 506 } 507