1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitcode/BitcodeAnalyzer.h" 10 #include "llvm/Bitcode/BitcodeReader.h" 11 #include "llvm/Bitcode/LLVMBitCodes.h" 12 #include "llvm/Bitstream/BitCodes.h" 13 #include "llvm/Bitstream/BitstreamReader.h" 14 #include "llvm/Support/Format.h" 15 #include "llvm/Support/SHA1.h" 16 17 using namespace llvm; 18 19 static Error reportError(StringRef Message) { 20 return createStringError(std::errc::illegal_byte_sequence, Message.data()); 21 } 22 23 /// Return a symbolic block name if known, otherwise return null. 24 static Optional<const char *> GetBlockName(unsigned BlockID, 25 const BitstreamBlockInfo &BlockInfo, 26 CurStreamTypeType CurStreamType) { 27 // Standard blocks for all bitcode files. 28 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 29 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) 30 return "BLOCKINFO_BLOCK"; 31 return None; 32 } 33 34 // Check to see if we have a blockinfo record for this block, with a name. 35 if (const BitstreamBlockInfo::BlockInfo *Info = 36 BlockInfo.getBlockInfo(BlockID)) { 37 if (!Info->Name.empty()) 38 return Info->Name.c_str(); 39 } 40 41 if (CurStreamType != LLVMIRBitstream) 42 return None; 43 44 switch (BlockID) { 45 default: 46 return None; 47 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 48 return "OPERAND_BUNDLE_TAGS_BLOCK"; 49 case bitc::MODULE_BLOCK_ID: 50 return "MODULE_BLOCK"; 51 case bitc::PARAMATTR_BLOCK_ID: 52 return "PARAMATTR_BLOCK"; 53 case bitc::PARAMATTR_GROUP_BLOCK_ID: 54 return "PARAMATTR_GROUP_BLOCK_ID"; 55 case bitc::TYPE_BLOCK_ID_NEW: 56 return "TYPE_BLOCK_ID"; 57 case bitc::CONSTANTS_BLOCK_ID: 58 return "CONSTANTS_BLOCK"; 59 case bitc::FUNCTION_BLOCK_ID: 60 return "FUNCTION_BLOCK"; 61 case bitc::IDENTIFICATION_BLOCK_ID: 62 return "IDENTIFICATION_BLOCK_ID"; 63 case bitc::VALUE_SYMTAB_BLOCK_ID: 64 return "VALUE_SYMTAB"; 65 case bitc::METADATA_BLOCK_ID: 66 return "METADATA_BLOCK"; 67 case bitc::METADATA_KIND_BLOCK_ID: 68 return "METADATA_KIND_BLOCK"; 69 case bitc::METADATA_ATTACHMENT_ID: 70 return "METADATA_ATTACHMENT_BLOCK"; 71 case bitc::USELIST_BLOCK_ID: 72 return "USELIST_BLOCK_ID"; 73 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 74 return "GLOBALVAL_SUMMARY_BLOCK"; 75 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 76 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK"; 77 case bitc::MODULE_STRTAB_BLOCK_ID: 78 return "MODULE_STRTAB_BLOCK"; 79 case bitc::STRTAB_BLOCK_ID: 80 return "STRTAB_BLOCK"; 81 case bitc::SYMTAB_BLOCK_ID: 82 return "SYMTAB_BLOCK"; 83 } 84 } 85 86 /// Return a symbolic code name if known, otherwise return null. 87 static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID, 88 const BitstreamBlockInfo &BlockInfo, 89 CurStreamTypeType CurStreamType) { 90 // Standard blocks for all bitcode files. 91 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 92 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 93 switch (CodeID) { 94 default: 95 return None; 96 case bitc::BLOCKINFO_CODE_SETBID: 97 return "SETBID"; 98 case bitc::BLOCKINFO_CODE_BLOCKNAME: 99 return "BLOCKNAME"; 100 case bitc::BLOCKINFO_CODE_SETRECORDNAME: 101 return "SETRECORDNAME"; 102 } 103 } 104 return None; 105 } 106 107 // Check to see if we have a blockinfo record for this record, with a name. 108 if (const BitstreamBlockInfo::BlockInfo *Info = 109 BlockInfo.getBlockInfo(BlockID)) { 110 for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i) 111 if (Info->RecordNames[i].first == CodeID) 112 return Info->RecordNames[i].second.c_str(); 113 } 114 115 if (CurStreamType != LLVMIRBitstream) 116 return None; 117 118 #define STRINGIFY_CODE(PREFIX, CODE) \ 119 case bitc::PREFIX##_##CODE: \ 120 return #CODE; 121 switch (BlockID) { 122 default: 123 return None; 124 case bitc::MODULE_BLOCK_ID: 125 switch (CodeID) { 126 default: 127 return None; 128 STRINGIFY_CODE(MODULE_CODE, VERSION) 129 STRINGIFY_CODE(MODULE_CODE, TRIPLE) 130 STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) 131 STRINGIFY_CODE(MODULE_CODE, ASM) 132 STRINGIFY_CODE(MODULE_CODE, SECTIONNAME) 133 STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0 134 STRINGIFY_CODE(MODULE_CODE, GLOBALVAR) 135 STRINGIFY_CODE(MODULE_CODE, FUNCTION) 136 STRINGIFY_CODE(MODULE_CODE, ALIAS) 137 STRINGIFY_CODE(MODULE_CODE, GCNAME) 138 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) 139 STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) 140 STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) 141 STRINGIFY_CODE(MODULE_CODE, HASH) 142 } 143 case bitc::IDENTIFICATION_BLOCK_ID: 144 switch (CodeID) { 145 default: 146 return None; 147 STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) 148 STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) 149 } 150 case bitc::PARAMATTR_BLOCK_ID: 151 switch (CodeID) { 152 default: 153 return None; 154 // FIXME: Should these be different? 155 case bitc::PARAMATTR_CODE_ENTRY_OLD: 156 return "ENTRY"; 157 case bitc::PARAMATTR_CODE_ENTRY: 158 return "ENTRY"; 159 } 160 case bitc::PARAMATTR_GROUP_BLOCK_ID: 161 switch (CodeID) { 162 default: 163 return None; 164 case bitc::PARAMATTR_GRP_CODE_ENTRY: 165 return "ENTRY"; 166 } 167 case bitc::TYPE_BLOCK_ID_NEW: 168 switch (CodeID) { 169 default: 170 return None; 171 STRINGIFY_CODE(TYPE_CODE, NUMENTRY) 172 STRINGIFY_CODE(TYPE_CODE, VOID) 173 STRINGIFY_CODE(TYPE_CODE, FLOAT) 174 STRINGIFY_CODE(TYPE_CODE, DOUBLE) 175 STRINGIFY_CODE(TYPE_CODE, LABEL) 176 STRINGIFY_CODE(TYPE_CODE, OPAQUE) 177 STRINGIFY_CODE(TYPE_CODE, INTEGER) 178 STRINGIFY_CODE(TYPE_CODE, POINTER) 179 STRINGIFY_CODE(TYPE_CODE, ARRAY) 180 STRINGIFY_CODE(TYPE_CODE, VECTOR) 181 STRINGIFY_CODE(TYPE_CODE, X86_FP80) 182 STRINGIFY_CODE(TYPE_CODE, FP128) 183 STRINGIFY_CODE(TYPE_CODE, PPC_FP128) 184 STRINGIFY_CODE(TYPE_CODE, METADATA) 185 STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) 186 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) 187 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) 188 STRINGIFY_CODE(TYPE_CODE, FUNCTION) 189 } 190 191 case bitc::CONSTANTS_BLOCK_ID: 192 switch (CodeID) { 193 default: 194 return None; 195 STRINGIFY_CODE(CST_CODE, SETTYPE) 196 STRINGIFY_CODE(CST_CODE, NULL) 197 STRINGIFY_CODE(CST_CODE, UNDEF) 198 STRINGIFY_CODE(CST_CODE, INTEGER) 199 STRINGIFY_CODE(CST_CODE, WIDE_INTEGER) 200 STRINGIFY_CODE(CST_CODE, FLOAT) 201 STRINGIFY_CODE(CST_CODE, AGGREGATE) 202 STRINGIFY_CODE(CST_CODE, STRING) 203 STRINGIFY_CODE(CST_CODE, CSTRING) 204 STRINGIFY_CODE(CST_CODE, CE_BINOP) 205 STRINGIFY_CODE(CST_CODE, CE_CAST) 206 STRINGIFY_CODE(CST_CODE, CE_GEP) 207 STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP) 208 STRINGIFY_CODE(CST_CODE, CE_SELECT) 209 STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT) 210 STRINGIFY_CODE(CST_CODE, CE_INSERTELT) 211 STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC) 212 STRINGIFY_CODE(CST_CODE, CE_CMP) 213 STRINGIFY_CODE(CST_CODE, INLINEASM) 214 STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX) 215 STRINGIFY_CODE(CST_CODE, CE_UNOP) 216 case bitc::CST_CODE_BLOCKADDRESS: 217 return "CST_CODE_BLOCKADDRESS"; 218 STRINGIFY_CODE(CST_CODE, DATA) 219 } 220 case bitc::FUNCTION_BLOCK_ID: 221 switch (CodeID) { 222 default: 223 return None; 224 STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) 225 STRINGIFY_CODE(FUNC_CODE, INST_BINOP) 226 STRINGIFY_CODE(FUNC_CODE, INST_CAST) 227 STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD) 228 STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD) 229 STRINGIFY_CODE(FUNC_CODE, INST_SELECT) 230 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT) 231 STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT) 232 STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC) 233 STRINGIFY_CODE(FUNC_CODE, INST_CMP) 234 STRINGIFY_CODE(FUNC_CODE, INST_RET) 235 STRINGIFY_CODE(FUNC_CODE, INST_BR) 236 STRINGIFY_CODE(FUNC_CODE, INST_SWITCH) 237 STRINGIFY_CODE(FUNC_CODE, INST_INVOKE) 238 STRINGIFY_CODE(FUNC_CODE, INST_UNOP) 239 STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE) 240 STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET) 241 STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET) 242 STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD) 243 STRINGIFY_CODE(FUNC_CODE, INST_PHI) 244 STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA) 245 STRINGIFY_CODE(FUNC_CODE, INST_LOAD) 246 STRINGIFY_CODE(FUNC_CODE, INST_VAARG) 247 STRINGIFY_CODE(FUNC_CODE, INST_STORE) 248 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL) 249 STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL) 250 STRINGIFY_CODE(FUNC_CODE, INST_CMP2) 251 STRINGIFY_CODE(FUNC_CODE, INST_VSELECT) 252 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN) 253 STRINGIFY_CODE(FUNC_CODE, INST_CALL) 254 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC) 255 STRINGIFY_CODE(FUNC_CODE, INST_GEP) 256 STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE) 257 STRINGIFY_CODE(FUNC_CODE, INST_FENCE) 258 STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW) 259 STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC) 260 STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) 261 STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) 262 STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) 263 } 264 case bitc::VALUE_SYMTAB_BLOCK_ID: 265 switch (CodeID) { 266 default: 267 return None; 268 STRINGIFY_CODE(VST_CODE, ENTRY) 269 STRINGIFY_CODE(VST_CODE, BBENTRY) 270 STRINGIFY_CODE(VST_CODE, FNENTRY) 271 STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY) 272 } 273 case bitc::MODULE_STRTAB_BLOCK_ID: 274 switch (CodeID) { 275 default: 276 return None; 277 STRINGIFY_CODE(MST_CODE, ENTRY) 278 STRINGIFY_CODE(MST_CODE, HASH) 279 } 280 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 281 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 282 switch (CodeID) { 283 default: 284 return None; 285 STRINGIFY_CODE(FS, PERMODULE) 286 STRINGIFY_CODE(FS, PERMODULE_PROFILE) 287 STRINGIFY_CODE(FS, PERMODULE_RELBF) 288 STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) 289 STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS) 290 STRINGIFY_CODE(FS, COMBINED) 291 STRINGIFY_CODE(FS, COMBINED_PROFILE) 292 STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) 293 STRINGIFY_CODE(FS, ALIAS) 294 STRINGIFY_CODE(FS, COMBINED_ALIAS) 295 STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME) 296 STRINGIFY_CODE(FS, VERSION) 297 STRINGIFY_CODE(FS, FLAGS) 298 STRINGIFY_CODE(FS, TYPE_TESTS) 299 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS) 300 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS) 301 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL) 302 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL) 303 STRINGIFY_CODE(FS, VALUE_GUID) 304 STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) 305 STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) 306 STRINGIFY_CODE(FS, TYPE_ID) 307 STRINGIFY_CODE(FS, TYPE_ID_METADATA) 308 } 309 case bitc::METADATA_ATTACHMENT_ID: 310 switch (CodeID) { 311 default: 312 return None; 313 STRINGIFY_CODE(METADATA, ATTACHMENT) 314 } 315 case bitc::METADATA_BLOCK_ID: 316 switch (CodeID) { 317 default: 318 return None; 319 STRINGIFY_CODE(METADATA, STRING_OLD) 320 STRINGIFY_CODE(METADATA, VALUE) 321 STRINGIFY_CODE(METADATA, NODE) 322 STRINGIFY_CODE(METADATA, NAME) 323 STRINGIFY_CODE(METADATA, DISTINCT_NODE) 324 STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK 325 STRINGIFY_CODE(METADATA, LOCATION) 326 STRINGIFY_CODE(METADATA, OLD_NODE) 327 STRINGIFY_CODE(METADATA, OLD_FN_NODE) 328 STRINGIFY_CODE(METADATA, NAMED_NODE) 329 STRINGIFY_CODE(METADATA, GENERIC_DEBUG) 330 STRINGIFY_CODE(METADATA, SUBRANGE) 331 STRINGIFY_CODE(METADATA, ENUMERATOR) 332 STRINGIFY_CODE(METADATA, BASIC_TYPE) 333 STRINGIFY_CODE(METADATA, FILE) 334 STRINGIFY_CODE(METADATA, DERIVED_TYPE) 335 STRINGIFY_CODE(METADATA, COMPOSITE_TYPE) 336 STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE) 337 STRINGIFY_CODE(METADATA, COMPILE_UNIT) 338 STRINGIFY_CODE(METADATA, SUBPROGRAM) 339 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK) 340 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE) 341 STRINGIFY_CODE(METADATA, NAMESPACE) 342 STRINGIFY_CODE(METADATA, TEMPLATE_TYPE) 343 STRINGIFY_CODE(METADATA, TEMPLATE_VALUE) 344 STRINGIFY_CODE(METADATA, GLOBAL_VAR) 345 STRINGIFY_CODE(METADATA, LOCAL_VAR) 346 STRINGIFY_CODE(METADATA, EXPRESSION) 347 STRINGIFY_CODE(METADATA, OBJC_PROPERTY) 348 STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) 349 STRINGIFY_CODE(METADATA, MODULE) 350 STRINGIFY_CODE(METADATA, MACRO) 351 STRINGIFY_CODE(METADATA, MACRO_FILE) 352 STRINGIFY_CODE(METADATA, STRINGS) 353 STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT) 354 STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR) 355 STRINGIFY_CODE(METADATA, INDEX_OFFSET) 356 STRINGIFY_CODE(METADATA, INDEX) 357 } 358 case bitc::METADATA_KIND_BLOCK_ID: 359 switch (CodeID) { 360 default: 361 return None; 362 STRINGIFY_CODE(METADATA, KIND) 363 } 364 case bitc::USELIST_BLOCK_ID: 365 switch (CodeID) { 366 default: 367 return None; 368 case bitc::USELIST_CODE_DEFAULT: 369 return "USELIST_CODE_DEFAULT"; 370 case bitc::USELIST_CODE_BB: 371 return "USELIST_CODE_BB"; 372 } 373 374 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 375 switch (CodeID) { 376 default: 377 return None; 378 case bitc::OPERAND_BUNDLE_TAG: 379 return "OPERAND_BUNDLE_TAG"; 380 } 381 case bitc::STRTAB_BLOCK_ID: 382 switch (CodeID) { 383 default: 384 return None; 385 case bitc::STRTAB_BLOB: 386 return "BLOB"; 387 } 388 case bitc::SYMTAB_BLOCK_ID: 389 switch (CodeID) { 390 default: 391 return None; 392 case bitc::SYMTAB_BLOB: 393 return "BLOB"; 394 } 395 } 396 #undef STRINGIFY_CODE 397 } 398 399 static void printSize(raw_ostream &OS, double Bits) { 400 OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32)); 401 } 402 static void printSize(raw_ostream &OS, uint64_t Bits) { 403 OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8, 404 (unsigned long)(Bits / 32)); 405 } 406 407 static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) { 408 auto tryRead = [&Stream](char &Dest, size_t size) -> Error { 409 if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size)) 410 Dest = MaybeWord.get(); 411 else 412 return MaybeWord.takeError(); 413 return Error::success(); 414 }; 415 416 char Signature[6]; 417 if (Error Err = tryRead(Signature[0], 8)) 418 return std::move(Err); 419 if (Error Err = tryRead(Signature[1], 8)) 420 return std::move(Err); 421 422 // Autodetect the file contents, if it is one we know. 423 if (Signature[0] == 'C' && Signature[1] == 'P') { 424 if (Error Err = tryRead(Signature[2], 8)) 425 return std::move(Err); 426 if (Error Err = tryRead(Signature[3], 8)) 427 return std::move(Err); 428 if (Signature[2] == 'C' && Signature[3] == 'H') 429 return ClangSerializedASTBitstream; 430 } else if (Signature[0] == 'D' && Signature[1] == 'I') { 431 if (Error Err = tryRead(Signature[2], 8)) 432 return std::move(Err); 433 if (Error Err = tryRead(Signature[3], 8)) 434 return std::move(Err); 435 if (Signature[2] == 'A' && Signature[3] == 'G') 436 return ClangSerializedDiagnosticsBitstream; 437 } else { 438 if (Error Err = tryRead(Signature[2], 4)) 439 return std::move(Err); 440 if (Error Err = tryRead(Signature[3], 4)) 441 return std::move(Err); 442 if (Error Err = tryRead(Signature[4], 4)) 443 return std::move(Err); 444 if (Error Err = tryRead(Signature[5], 4)) 445 return std::move(Err); 446 if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 && 447 Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD) 448 return LLVMIRBitstream; 449 } 450 return UnknownBitstream; 451 } 452 453 static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O, 454 BitstreamCursor &Stream) { 455 ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes(); 456 const unsigned char *BufPtr = (const unsigned char *)Bytes.data(); 457 const unsigned char *EndBufPtr = BufPtr + Bytes.size(); 458 459 // If we have a wrapper header, parse it and ignore the non-bc file 460 // contents. The magic number is 0x0B17C0DE stored in little endian. 461 if (isBitcodeWrapper(BufPtr, EndBufPtr)) { 462 if (Bytes.size() < BWH_HeaderSize) 463 return reportError("Invalid bitcode wrapper header"); 464 465 if (O) { 466 unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]); 467 unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]); 468 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 469 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 470 unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]); 471 472 O->OS << "<BITCODE_WRAPPER_HEADER" 473 << " Magic=" << format_hex(Magic, 10) 474 << " Version=" << format_hex(Version, 10) 475 << " Offset=" << format_hex(Offset, 10) 476 << " Size=" << format_hex(Size, 10) 477 << " CPUType=" << format_hex(CPUType, 10) << "/>\n"; 478 } 479 480 if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true)) 481 return reportError("Invalid bitcode wrapper header"); 482 } 483 484 // Use the cursor modified by skipping the wrapper header. 485 Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr)); 486 487 return ReadSignature(Stream); 488 } 489 490 static bool canDecodeBlob(unsigned Code, unsigned BlockID) { 491 return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS; 492 } 493 494 Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent, 495 ArrayRef<uint64_t> Record, 496 StringRef Blob, 497 raw_ostream &OS) { 498 if (Blob.empty()) 499 return reportError("Cannot decode empty blob."); 500 501 if (Record.size() != 2) 502 return reportError( 503 "Decoding metadata strings blob needs two record entries."); 504 505 unsigned NumStrings = Record[0]; 506 unsigned StringsOffset = Record[1]; 507 OS << " num-strings = " << NumStrings << " {\n"; 508 509 StringRef Lengths = Blob.slice(0, StringsOffset); 510 SimpleBitstreamCursor R(Lengths); 511 StringRef Strings = Blob.drop_front(StringsOffset); 512 do { 513 if (R.AtEndOfStream()) 514 return reportError("bad length"); 515 516 Expected<uint32_t> MaybeSize = R.ReadVBR(6); 517 if (!MaybeSize) 518 return MaybeSize.takeError(); 519 uint32_t Size = MaybeSize.get(); 520 if (Strings.size() < Size) 521 return reportError("truncated chars"); 522 523 OS << Indent << " '"; 524 OS.write_escaped(Strings.slice(0, Size), /*hex=*/true); 525 OS << "'\n"; 526 Strings = Strings.drop_front(Size); 527 } while (--NumStrings); 528 529 OS << Indent << " }"; 530 return Error::success(); 531 } 532 533 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer, 534 Optional<StringRef> BlockInfoBuffer) 535 : Stream(Buffer) { 536 if (BlockInfoBuffer) 537 BlockInfoStream.emplace(*BlockInfoBuffer); 538 } 539 540 Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O, 541 Optional<StringRef> CheckHash) { 542 Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream); 543 if (!MaybeType) 544 return MaybeType.takeError(); 545 else 546 CurStreamType = *MaybeType; 547 548 Stream.setBlockInfo(&BlockInfo); 549 550 // Read block info from BlockInfoStream, if specified. 551 // The block info must be a top-level block. 552 if (BlockInfoStream) { 553 BitstreamCursor BlockInfoCursor(*BlockInfoStream); 554 Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor); 555 if (!H) 556 return H.takeError(); 557 558 while (!BlockInfoCursor.AtEndOfStream()) { 559 Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode(); 560 if (!MaybeCode) 561 return MaybeCode.takeError(); 562 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 563 return reportError("Invalid record at top-level in block info file"); 564 565 Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID(); 566 if (!MaybeBlockID) 567 return MaybeBlockID.takeError(); 568 if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) { 569 Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo = 570 BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true); 571 if (!MaybeNewBlockInfo) 572 return MaybeNewBlockInfo.takeError(); 573 Optional<BitstreamBlockInfo> NewBlockInfo = 574 std::move(MaybeNewBlockInfo.get()); 575 if (!NewBlockInfo) 576 return reportError("Malformed BlockInfoBlock in block info file"); 577 BlockInfo = std::move(*NewBlockInfo); 578 break; 579 } 580 581 if (Error Err = BlockInfoCursor.SkipBlock()) 582 return Err; 583 } 584 } 585 586 // Parse the top-level structure. We only allow blocks at the top-level. 587 while (!Stream.AtEndOfStream()) { 588 Expected<unsigned> MaybeCode = Stream.ReadCode(); 589 if (!MaybeCode) 590 return MaybeCode.takeError(); 591 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 592 return reportError("Invalid record at top-level"); 593 594 Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID(); 595 if (!MaybeBlockID) 596 return MaybeBlockID.takeError(); 597 598 if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash)) 599 return E; 600 ++NumTopBlocks; 601 } 602 603 return Error::success(); 604 } 605 606 void BitcodeAnalyzer::printStats(BCDumpOptions O, 607 Optional<StringRef> Filename) { 608 uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT; 609 // Print a summary of the read file. 610 O.OS << "Summary "; 611 if (Filename) 612 O.OS << "of " << Filename->data() << ":\n"; 613 O.OS << " Total size: "; 614 printSize(O.OS, BufferSizeBits); 615 O.OS << "\n"; 616 O.OS << " Stream type: "; 617 switch (CurStreamType) { 618 case UnknownBitstream: 619 O.OS << "unknown\n"; 620 break; 621 case LLVMIRBitstream: 622 O.OS << "LLVM IR\n"; 623 break; 624 case ClangSerializedASTBitstream: 625 O.OS << "Clang Serialized AST\n"; 626 break; 627 case ClangSerializedDiagnosticsBitstream: 628 O.OS << "Clang Serialized Diagnostics\n"; 629 break; 630 } 631 O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; 632 O.OS << "\n"; 633 634 // Emit per-block stats. 635 O.OS << "Per-block Summary:\n"; 636 for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(), 637 E = BlockIDStats.end(); 638 I != E; ++I) { 639 O.OS << " Block ID #" << I->first; 640 if (Optional<const char *> BlockName = 641 GetBlockName(I->first, BlockInfo, CurStreamType)) 642 O.OS << " (" << *BlockName << ")"; 643 O.OS << ":\n"; 644 645 const PerBlockIDStats &Stats = I->second; 646 O.OS << " Num Instances: " << Stats.NumInstances << "\n"; 647 O.OS << " Total Size: "; 648 printSize(O.OS, Stats.NumBits); 649 O.OS << "\n"; 650 double pct = (Stats.NumBits * 100.0) / BufferSizeBits; 651 O.OS << " Percent of file: " << format("%2.4f%%", pct) << "\n"; 652 if (Stats.NumInstances > 1) { 653 O.OS << " Average Size: "; 654 printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances); 655 O.OS << "\n"; 656 O.OS << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/" 657 << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n"; 658 O.OS << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/" 659 << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n"; 660 O.OS << " Tot/Avg Records: " << Stats.NumRecords << "/" 661 << Stats.NumRecords / (double)Stats.NumInstances << "\n"; 662 } else { 663 O.OS << " Num SubBlocks: " << Stats.NumSubBlocks << "\n"; 664 O.OS << " Num Abbrevs: " << Stats.NumAbbrevs << "\n"; 665 O.OS << " Num Records: " << Stats.NumRecords << "\n"; 666 } 667 if (Stats.NumRecords) { 668 double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords; 669 O.OS << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n"; 670 } 671 O.OS << "\n"; 672 673 // Print a histogram of the codes we see. 674 if (O.Histogram && !Stats.CodeFreq.empty()) { 675 std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code> 676 for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) 677 if (unsigned Freq = Stats.CodeFreq[i].NumInstances) 678 FreqPairs.push_back(std::make_pair(Freq, i)); 679 llvm::stable_sort(FreqPairs); 680 std::reverse(FreqPairs.begin(), FreqPairs.end()); 681 682 O.OS << "\tRecord Histogram:\n"; 683 O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n"; 684 for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { 685 const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second]; 686 687 O.OS << format("\t\t%7d %9lu", RecStats.NumInstances, 688 (unsigned long)RecStats.TotalBits); 689 690 if (RecStats.NumInstances > 1) 691 O.OS << format(" %9.1f", 692 (double)RecStats.TotalBits / RecStats.NumInstances); 693 else 694 O.OS << " "; 695 696 if (RecStats.NumAbbrev) 697 O.OS << format(" %7.2f", (double)RecStats.NumAbbrev / 698 RecStats.NumInstances * 100); 699 else 700 O.OS << " "; 701 702 O.OS << " "; 703 if (Optional<const char *> CodeName = GetCodeName( 704 FreqPairs[i].second, I->first, BlockInfo, CurStreamType)) 705 O.OS << *CodeName << "\n"; 706 else 707 O.OS << "UnknownCode" << FreqPairs[i].second << "\n"; 708 } 709 O.OS << "\n"; 710 } 711 } 712 } 713 714 Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, 715 Optional<BCDumpOptions> O, 716 Optional<StringRef> CheckHash) { 717 std::string Indent(IndentLevel * 2, ' '); 718 uint64_t BlockBitStart = Stream.GetCurrentBitNo(); 719 720 // Get the statistics for this BlockID. 721 PerBlockIDStats &BlockStats = BlockIDStats[BlockID]; 722 723 BlockStats.NumInstances++; 724 725 // BLOCKINFO is a special part of the stream. 726 bool DumpRecords = O.hasValue(); 727 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 728 if (O) 729 O->OS << Indent << "<BLOCKINFO_BLOCK/>\n"; 730 Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo = 731 Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true); 732 if (!MaybeNewBlockInfo) 733 return MaybeNewBlockInfo.takeError(); 734 Optional<BitstreamBlockInfo> NewBlockInfo = 735 std::move(MaybeNewBlockInfo.get()); 736 if (!NewBlockInfo) 737 return reportError("Malformed BlockInfoBlock"); 738 BlockInfo = std::move(*NewBlockInfo); 739 if (Error Err = Stream.JumpToBit(BlockBitStart)) 740 return Err; 741 // It's not really interesting to dump the contents of the blockinfo 742 // block. 743 DumpRecords = false; 744 } 745 746 unsigned NumWords = 0; 747 if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords)) 748 return Err; 749 750 // Keep it for later, when we see a MODULE_HASH record 751 uint64_t BlockEntryPos = Stream.getCurrentByteNo(); 752 753 Optional<const char *> BlockName = None; 754 if (DumpRecords) { 755 O->OS << Indent << "<"; 756 if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType))) 757 O->OS << *BlockName; 758 else 759 O->OS << "UnknownBlock" << BlockID; 760 761 if (!O->Symbolic && BlockName) 762 O->OS << " BlockID=" << BlockID; 763 764 O->OS << " NumWords=" << NumWords 765 << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; 766 } 767 768 SmallVector<uint64_t, 64> Record; 769 770 // Keep the offset to the metadata index if seen. 771 uint64_t MetadataIndexOffset = 0; 772 773 // Read all the records for this block. 774 while (1) { 775 if (Stream.AtEndOfStream()) 776 return reportError("Premature end of bitstream"); 777 778 uint64_t RecordStartBit = Stream.GetCurrentBitNo(); 779 780 Expected<BitstreamEntry> MaybeEntry = 781 Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); 782 if (!MaybeEntry) 783 return MaybeEntry.takeError(); 784 BitstreamEntry Entry = MaybeEntry.get(); 785 786 switch (Entry.Kind) { 787 case BitstreamEntry::Error: 788 return reportError("malformed bitcode file"); 789 case BitstreamEntry::EndBlock: { 790 uint64_t BlockBitEnd = Stream.GetCurrentBitNo(); 791 BlockStats.NumBits += BlockBitEnd - BlockBitStart; 792 if (DumpRecords) { 793 O->OS << Indent << "</"; 794 if (BlockName) 795 O->OS << *BlockName << ">\n"; 796 else 797 O->OS << "UnknownBlock" << BlockID << ">\n"; 798 } 799 return Error::success(); 800 } 801 802 case BitstreamEntry::SubBlock: { 803 uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); 804 if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash)) 805 return E; 806 ++BlockStats.NumSubBlocks; 807 uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); 808 809 // Don't include subblock sizes in the size of this block. 810 BlockBitStart += SubBlockBitEnd - SubBlockBitStart; 811 continue; 812 } 813 case BitstreamEntry::Record: 814 // The interesting case. 815 break; 816 } 817 818 if (Entry.ID == bitc::DEFINE_ABBREV) { 819 if (Error Err = Stream.ReadAbbrevRecord()) 820 return Err; 821 ++BlockStats.NumAbbrevs; 822 continue; 823 } 824 825 Record.clear(); 826 827 ++BlockStats.NumRecords; 828 829 StringRef Blob; 830 uint64_t CurrentRecordPos = Stream.GetCurrentBitNo(); 831 Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob); 832 if (!MaybeCode) 833 return MaybeCode.takeError(); 834 unsigned Code = MaybeCode.get(); 835 836 // Increment the # occurrences of this code. 837 if (BlockStats.CodeFreq.size() <= Code) 838 BlockStats.CodeFreq.resize(Code + 1); 839 BlockStats.CodeFreq[Code].NumInstances++; 840 BlockStats.CodeFreq[Code].TotalBits += 841 Stream.GetCurrentBitNo() - RecordStartBit; 842 if (Entry.ID != bitc::UNABBREV_RECORD) { 843 BlockStats.CodeFreq[Code].NumAbbrev++; 844 ++BlockStats.NumAbbreviatedRecords; 845 } 846 847 if (DumpRecords) { 848 O->OS << Indent << " <"; 849 Optional<const char *> CodeName = 850 GetCodeName(Code, BlockID, BlockInfo, CurStreamType); 851 if (CodeName) 852 O->OS << *CodeName; 853 else 854 O->OS << "UnknownCode" << Code; 855 if (!O->Symbolic && CodeName) 856 O->OS << " codeid=" << Code; 857 const BitCodeAbbrev *Abbv = nullptr; 858 if (Entry.ID != bitc::UNABBREV_RECORD) { 859 Abbv = Stream.getAbbrev(Entry.ID); 860 O->OS << " abbrevid=" << Entry.ID; 861 } 862 863 for (unsigned i = 0, e = Record.size(); i != e; ++i) 864 O->OS << " op" << i << "=" << (int64_t)Record[i]; 865 866 // If we found a metadata index, let's verify that we had an offset 867 // before and validate its forward reference offset was correct! 868 if (BlockID == bitc::METADATA_BLOCK_ID) { 869 if (Code == bitc::METADATA_INDEX_OFFSET) { 870 if (Record.size() != 2) 871 O->OS << "(Invalid record)"; 872 else { 873 auto Offset = Record[0] + (Record[1] << 32); 874 MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset; 875 } 876 } 877 if (Code == bitc::METADATA_INDEX) { 878 O->OS << " (offset "; 879 if (MetadataIndexOffset == RecordStartBit) 880 O->OS << "match)"; 881 else 882 O->OS << "mismatch: " << MetadataIndexOffset << " vs " 883 << RecordStartBit << ")"; 884 } 885 } 886 887 // If we found a module hash, let's verify that it matches! 888 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH && 889 CheckHash.hasValue()) { 890 if (Record.size() != 5) 891 O->OS << " (invalid)"; 892 else { 893 // Recompute the hash and compare it to the one in the bitcode 894 SHA1 Hasher; 895 StringRef Hash; 896 Hasher.update(*CheckHash); 897 { 898 int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos; 899 auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); 900 Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize)); 901 Hash = Hasher.result(); 902 } 903 SmallString<20> RecordedHash; 904 RecordedHash.resize(20); 905 int Pos = 0; 906 for (auto &Val : Record) { 907 assert(!(Val >> 32) && "Unexpected high bits set"); 908 RecordedHash[Pos++] = (Val >> 24) & 0xFF; 909 RecordedHash[Pos++] = (Val >> 16) & 0xFF; 910 RecordedHash[Pos++] = (Val >> 8) & 0xFF; 911 RecordedHash[Pos++] = (Val >> 0) & 0xFF; 912 } 913 if (Hash == RecordedHash) 914 O->OS << " (match)"; 915 else 916 O->OS << " (!mismatch!)"; 917 } 918 } 919 920 O->OS << "/>"; 921 922 if (Abbv) { 923 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 924 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 925 if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array) 926 continue; 927 assert(i + 2 == e && "Array op not second to last"); 928 std::string Str; 929 bool ArrayIsPrintable = true; 930 for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { 931 if (!isPrint(static_cast<unsigned char>(Record[j]))) { 932 ArrayIsPrintable = false; 933 break; 934 } 935 Str += (char)Record[j]; 936 } 937 if (ArrayIsPrintable) 938 O->OS << " record string = '" << Str << "'"; 939 break; 940 } 941 } 942 943 if (Blob.data()) { 944 if (canDecodeBlob(Code, BlockID)) { 945 if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS)) 946 return E; 947 } else { 948 O->OS << " blob data = "; 949 if (O->ShowBinaryBlobs) { 950 O->OS << "'"; 951 O->OS.write_escaped(Blob, /*hex=*/true) << "'"; 952 } else { 953 bool BlobIsPrintable = true; 954 for (unsigned i = 0, e = Blob.size(); i != e; ++i) 955 if (!isPrint(static_cast<unsigned char>(Blob[i]))) { 956 BlobIsPrintable = false; 957 break; 958 } 959 960 if (BlobIsPrintable) 961 O->OS << "'" << Blob << "'"; 962 else 963 O->OS << "unprintable, " << Blob.size() << " bytes."; 964 } 965 } 966 } 967 968 O->OS << "\n"; 969 } 970 971 // Make sure that we can skip the current record. 972 if (Error Err = Stream.JumpToBit(CurrentRecordPos)) 973 return Err; 974 if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID)) 975 ; // Do nothing. 976 else 977 return Skipped.takeError(); 978 } 979 } 980 981