1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 10 #include "llvm/ADT/ArrayRef.h" 11 #include "llvm/ADT/STLExtras.h" 12 #include "llvm/DebugInfo/MSF/MSFCommon.h" 13 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 14 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 15 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 16 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 17 #include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" 18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/RawError.h" 21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 23 #include "llvm/Support/BinaryStream.h" 24 #include "llvm/Support/BinaryStreamArray.h" 25 #include "llvm/Support/BinaryStreamReader.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include "llvm/Support/Path.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstdint> 32 33 using namespace llvm; 34 using namespace llvm::codeview; 35 using namespace llvm::msf; 36 using namespace llvm::pdb; 37 38 namespace { 39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 40 } // end anonymous namespace 41 42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, 43 BumpPtrAllocator &Allocator) 44 : FilePath(std::string(Path)), Allocator(Allocator), 45 Buffer(std::move(PdbFileBuffer)) {} 46 47 PDBFile::~PDBFile() = default; 48 49 StringRef PDBFile::getFilePath() const { return FilePath; } 50 51 StringRef PDBFile::getFileDirectory() const { 52 return sys::path::parent_path(FilePath); 53 } 54 55 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 56 57 uint32_t PDBFile::getFreeBlockMapBlock() const { 58 return ContainerLayout.SB->FreeBlockMapBlock; 59 } 60 61 uint32_t PDBFile::getBlockCount() const { 62 return ContainerLayout.SB->NumBlocks; 63 } 64 65 uint32_t PDBFile::getNumDirectoryBytes() const { 66 return ContainerLayout.SB->NumDirectoryBytes; 67 } 68 69 uint32_t PDBFile::getBlockMapIndex() const { 70 return ContainerLayout.SB->BlockMapAddr; 71 } 72 73 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 74 75 uint32_t PDBFile::getNumDirectoryBlocks() const { 76 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 77 ContainerLayout.SB->BlockSize); 78 } 79 80 uint64_t PDBFile::getBlockMapOffset() const { 81 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 82 ContainerLayout.SB->BlockSize; 83 } 84 85 uint32_t PDBFile::getNumStreams() const { 86 return ContainerLayout.StreamSizes.size(); 87 } 88 89 uint32_t PDBFile::getMaxStreamSize() const { 90 return *std::max_element(ContainerLayout.StreamSizes.begin(), 91 ContainerLayout.StreamSizes.end()); 92 } 93 94 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 95 return ContainerLayout.StreamSizes[StreamIndex]; 96 } 97 98 ArrayRef<support::ulittle32_t> 99 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 100 return ContainerLayout.StreamMap[StreamIndex]; 101 } 102 103 uint64_t PDBFile::getFileSize() const { return Buffer->getLength(); } 104 105 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 106 uint32_t NumBytes) const { 107 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 108 109 ArrayRef<uint8_t> Result; 110 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 111 return std::move(EC); 112 return Result; 113 } 114 115 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 116 ArrayRef<uint8_t> Data) const { 117 return make_error<RawError>(raw_error_code::not_writable, 118 "PDBFile is immutable"); 119 } 120 121 Error PDBFile::parseFileHeaders() { 122 BinaryStreamReader Reader(*Buffer); 123 124 // Initialize SB. 125 const msf::SuperBlock *SB = nullptr; 126 if (auto EC = Reader.readObject(SB)) { 127 consumeError(std::move(EC)); 128 return make_error<RawError>(raw_error_code::corrupt_file, 129 "MSF superblock is missing"); 130 } 131 132 if (auto EC = msf::validateSuperBlock(*SB)) 133 return EC; 134 135 if (Buffer->getLength() % SB->BlockSize != 0) 136 return make_error<RawError>(raw_error_code::corrupt_file, 137 "File size is not a multiple of block size"); 138 ContainerLayout.SB = SB; 139 140 // Initialize Free Page Map. 141 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 142 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 143 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 144 // thusly an equal number of total blocks in the file. For a block size 145 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 146 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 147 // the Fpm is split across the file at `getBlockSize()` intervals. As a 148 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 149 // for any non-negative integer k is an Fpm block. In theory, we only really 150 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 151 // current versions of the MSF format already expect the Fpm to be arranged 152 // at getBlockSize() intervals, so we have to be compatible. 153 // See the function fpmPn() for more information: 154 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 155 auto FpmStream = 156 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); 157 BinaryStreamReader FpmReader(*FpmStream); 158 ArrayRef<uint8_t> FpmBytes; 159 if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining())) 160 return EC; 161 uint32_t BlocksRemaining = getBlockCount(); 162 uint32_t BI = 0; 163 for (auto Byte : FpmBytes) { 164 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 165 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 166 if (Byte & (1 << I)) 167 ContainerLayout.FreePageMap[BI] = true; 168 --BlocksRemaining; 169 ++BI; 170 } 171 } 172 173 Reader.setOffset(getBlockMapOffset()); 174 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 175 getNumDirectoryBlocks())) 176 return EC; 177 178 return Error::success(); 179 } 180 181 Error PDBFile::parseStreamData() { 182 assert(ContainerLayout.SB); 183 if (DirectoryStream) 184 return Error::success(); 185 186 uint32_t NumStreams = 0; 187 188 // Normally you can't use a MappedBlockStream without having fully parsed the 189 // PDB file, because it accesses the directory and various other things, which 190 // is exactly what we are attempting to parse. By specifying a custom 191 // subclass of IPDBStreamData which only accesses the fields that have already 192 // been parsed, we can avoid this and reuse MappedBlockStream. 193 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, 194 Allocator); 195 BinaryStreamReader Reader(*DS); 196 if (auto EC = Reader.readInteger(NumStreams)) 197 return EC; 198 199 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 200 return EC; 201 for (uint32_t I = 0; I < NumStreams; ++I) { 202 uint32_t StreamSize = getStreamByteSize(I); 203 // FIXME: What does StreamSize ~0U mean? 204 uint64_t NumExpectedStreamBlocks = 205 StreamSize == UINT32_MAX 206 ? 0 207 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 208 209 // For convenience, we store the block array contiguously. This is because 210 // if someone calls setStreamMap(), it is more convenient to be able to call 211 // it with an ArrayRef instead of setting up a StreamRef. Since the 212 // DirectoryStream is cached in the class and thus lives for the life of the 213 // class, we can be guaranteed that readArray() will return a stable 214 // reference, even if it has to allocate from its internal pool. 215 ArrayRef<support::ulittle32_t> Blocks; 216 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 217 return EC; 218 for (uint32_t Block : Blocks) { 219 uint64_t BlockEndOffset = 220 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 221 if (BlockEndOffset > getFileSize()) 222 return make_error<RawError>(raw_error_code::corrupt_file, 223 "Stream block map is corrupt."); 224 } 225 ContainerLayout.StreamMap.push_back(Blocks); 226 } 227 228 // We should have read exactly SB->NumDirectoryBytes bytes. 229 assert(Reader.bytesRemaining() == 0); 230 DirectoryStream = std::move(DS); 231 return Error::success(); 232 } 233 234 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 235 return ContainerLayout.DirectoryBlocks; 236 } 237 238 std::unique_ptr<MappedBlockStream> 239 PDBFile::createIndexedStream(uint16_t SN) const { 240 if (SN == kInvalidStreamIndex) 241 return nullptr; 242 return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN, 243 Allocator); 244 } 245 246 MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const { 247 MSFStreamLayout Result; 248 auto Blocks = getStreamBlockList(StreamIdx); 249 Result.Blocks.assign(Blocks.begin(), Blocks.end()); 250 Result.Length = getStreamByteSize(StreamIdx); 251 return Result; 252 } 253 254 msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const { 255 return msf::getFpmStreamLayout(ContainerLayout); 256 } 257 258 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 259 if (!Globals) { 260 auto DbiS = getPDBDbiStream(); 261 if (!DbiS) 262 return DbiS.takeError(); 263 264 auto GlobalS = 265 safelyCreateIndexedStream(DbiS->getGlobalSymbolStreamIndex()); 266 if (!GlobalS) 267 return GlobalS.takeError(); 268 auto TempGlobals = std::make_unique<GlobalsStream>(std::move(*GlobalS)); 269 if (auto EC = TempGlobals->reload()) 270 return std::move(EC); 271 Globals = std::move(TempGlobals); 272 } 273 return *Globals; 274 } 275 276 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 277 if (!Info) { 278 auto InfoS = safelyCreateIndexedStream(StreamPDB); 279 if (!InfoS) 280 return InfoS.takeError(); 281 auto TempInfo = std::make_unique<InfoStream>(std::move(*InfoS)); 282 if (auto EC = TempInfo->reload()) 283 return std::move(EC); 284 Info = std::move(TempInfo); 285 } 286 return *Info; 287 } 288 289 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 290 if (!Dbi) { 291 auto DbiS = safelyCreateIndexedStream(StreamDBI); 292 if (!DbiS) 293 return DbiS.takeError(); 294 auto TempDbi = std::make_unique<DbiStream>(std::move(*DbiS)); 295 if (auto EC = TempDbi->reload(this)) 296 return std::move(EC); 297 Dbi = std::move(TempDbi); 298 } 299 return *Dbi; 300 } 301 302 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 303 if (!Tpi) { 304 auto TpiS = safelyCreateIndexedStream(StreamTPI); 305 if (!TpiS) 306 return TpiS.takeError(); 307 auto TempTpi = std::make_unique<TpiStream>(*this, std::move(*TpiS)); 308 if (auto EC = TempTpi->reload()) 309 return std::move(EC); 310 Tpi = std::move(TempTpi); 311 } 312 return *Tpi; 313 } 314 315 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 316 if (!Ipi) { 317 if (!hasPDBIpiStream()) 318 return make_error<RawError>(raw_error_code::no_stream); 319 320 auto IpiS = safelyCreateIndexedStream(StreamIPI); 321 if (!IpiS) 322 return IpiS.takeError(); 323 auto TempIpi = std::make_unique<TpiStream>(*this, std::move(*IpiS)); 324 if (auto EC = TempIpi->reload()) 325 return std::move(EC); 326 Ipi = std::move(TempIpi); 327 } 328 return *Ipi; 329 } 330 331 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 332 if (!Publics) { 333 auto DbiS = getPDBDbiStream(); 334 if (!DbiS) 335 return DbiS.takeError(); 336 337 auto PublicS = 338 safelyCreateIndexedStream(DbiS->getPublicSymbolStreamIndex()); 339 if (!PublicS) 340 return PublicS.takeError(); 341 auto TempPublics = std::make_unique<PublicsStream>(std::move(*PublicS)); 342 if (auto EC = TempPublics->reload()) 343 return std::move(EC); 344 Publics = std::move(TempPublics); 345 } 346 return *Publics; 347 } 348 349 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 350 if (!Symbols) { 351 auto DbiS = getPDBDbiStream(); 352 if (!DbiS) 353 return DbiS.takeError(); 354 355 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 356 auto SymbolS = safelyCreateIndexedStream(SymbolStreamNum); 357 if (!SymbolS) 358 return SymbolS.takeError(); 359 360 auto TempSymbols = std::make_unique<SymbolStream>(std::move(*SymbolS)); 361 if (auto EC = TempSymbols->reload()) 362 return std::move(EC); 363 Symbols = std::move(TempSymbols); 364 } 365 return *Symbols; 366 } 367 368 Expected<PDBStringTable &> PDBFile::getStringTable() { 369 if (!Strings) { 370 auto NS = safelyCreateNamedStream("/names"); 371 if (!NS) 372 return NS.takeError(); 373 374 auto N = std::make_unique<PDBStringTable>(); 375 BinaryStreamReader Reader(**NS); 376 if (auto EC = N->reload(Reader)) 377 return std::move(EC); 378 assert(Reader.bytesRemaining() == 0); 379 StringTableStream = std::move(*NS); 380 Strings = std::move(N); 381 } 382 return *Strings; 383 } 384 385 Expected<InjectedSourceStream &> PDBFile::getInjectedSourceStream() { 386 if (!InjectedSources) { 387 auto IJS = safelyCreateNamedStream("/src/headerblock"); 388 if (!IJS) 389 return IJS.takeError(); 390 391 auto Strings = getStringTable(); 392 if (!Strings) 393 return Strings.takeError(); 394 395 auto IJ = std::make_unique<InjectedSourceStream>(std::move(*IJS)); 396 if (auto EC = IJ->reload(*Strings)) 397 return std::move(EC); 398 InjectedSources = std::move(IJ); 399 } 400 return *InjectedSources; 401 } 402 403 uint32_t PDBFile::getPointerSize() { 404 auto DbiS = getPDBDbiStream(); 405 if (!DbiS) 406 return 0; 407 PDB_Machine Machine = DbiS->getMachineType(); 408 if (Machine == PDB_Machine::Amd64) 409 return 8; 410 return 4; 411 } 412 413 bool PDBFile::hasPDBDbiStream() const { 414 return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0; 415 } 416 417 bool PDBFile::hasPDBGlobalsStream() { 418 auto DbiS = getPDBDbiStream(); 419 if (!DbiS) { 420 consumeError(DbiS.takeError()); 421 return false; 422 } 423 424 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 425 } 426 427 bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); } 428 429 bool PDBFile::hasPDBIpiStream() const { 430 if (!hasPDBInfoStream()) 431 return false; 432 433 if (StreamIPI >= getNumStreams()) 434 return false; 435 436 auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream()); 437 return InfoStream.containsIdStream(); 438 } 439 440 bool PDBFile::hasPDBPublicsStream() { 441 auto DbiS = getPDBDbiStream(); 442 if (!DbiS) { 443 consumeError(DbiS.takeError()); 444 return false; 445 } 446 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 447 } 448 449 bool PDBFile::hasPDBSymbolStream() { 450 auto DbiS = getPDBDbiStream(); 451 if (!DbiS) 452 return false; 453 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 454 } 455 456 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 457 458 bool PDBFile::hasPDBStringTable() { 459 auto IS = getPDBInfoStream(); 460 if (!IS) 461 return false; 462 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names"); 463 if (!ExpectedNSI) { 464 consumeError(ExpectedNSI.takeError()); 465 return false; 466 } 467 assert(*ExpectedNSI < getNumStreams()); 468 return true; 469 } 470 471 bool PDBFile::hasPDBInjectedSourceStream() { 472 auto IS = getPDBInfoStream(); 473 if (!IS) 474 return false; 475 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock"); 476 if (!ExpectedNSI) { 477 consumeError(ExpectedNSI.takeError()); 478 return false; 479 } 480 assert(*ExpectedNSI < getNumStreams()); 481 return true; 482 } 483 484 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a 485 /// stream with that index actually exists. If it does not, the return value 486 /// will have an MSFError with code msf_error_code::no_stream. Else, the return 487 /// value will contain the stream returned by createIndexedStream(). 488 Expected<std::unique_ptr<MappedBlockStream>> 489 PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const { 490 if (StreamIndex >= getNumStreams()) 491 // This rejects kInvalidStreamIndex with an error as well. 492 return make_error<RawError>(raw_error_code::no_stream); 493 return createIndexedStream(StreamIndex); 494 } 495 496 Expected<std::unique_ptr<MappedBlockStream>> 497 PDBFile::safelyCreateNamedStream(StringRef Name) { 498 auto IS = getPDBInfoStream(); 499 if (!IS) 500 return IS.takeError(); 501 502 Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex(Name); 503 if (!ExpectedNSI) 504 return ExpectedNSI.takeError(); 505 uint32_t NameStreamIndex = *ExpectedNSI; 506 507 return safelyCreateIndexedStream(NameStreamIndex); 508 } 509