1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitcode/BitcodeAnalyzer.h" 10 #include "llvm/Bitcode/BitcodeReader.h" 11 #include "llvm/Bitcode/LLVMBitCodes.h" 12 #include "llvm/Bitstream/BitCodes.h" 13 #include "llvm/Bitstream/BitstreamReader.h" 14 #include "llvm/Support/Format.h" 15 #include "llvm/Support/SHA1.h" 16 #include <optional> 17 18 using namespace llvm; 19 20 static Error reportError(StringRef Message) { 21 return createStringError(std::errc::illegal_byte_sequence, Message.data()); 22 } 23 24 /// Return a symbolic block name if known, otherwise return null. 25 static std::optional<const char *> 26 GetBlockName(unsigned BlockID, const BitstreamBlockInfo &BlockInfo, 27 CurStreamTypeType CurStreamType) { 28 // Standard blocks for all bitcode files. 29 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 30 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) 31 return "BLOCKINFO_BLOCK"; 32 return std::nullopt; 33 } 34 35 // Check to see if we have a blockinfo record for this block, with a name. 36 if (const BitstreamBlockInfo::BlockInfo *Info = 37 BlockInfo.getBlockInfo(BlockID)) { 38 if (!Info->Name.empty()) 39 return Info->Name.c_str(); 40 } 41 42 if (CurStreamType != LLVMIRBitstream) 43 return std::nullopt; 44 45 switch (BlockID) { 46 default: 47 return std::nullopt; 48 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 49 return "OPERAND_BUNDLE_TAGS_BLOCK"; 50 case bitc::MODULE_BLOCK_ID: 51 return "MODULE_BLOCK"; 52 case bitc::PARAMATTR_BLOCK_ID: 53 return "PARAMATTR_BLOCK"; 54 case bitc::PARAMATTR_GROUP_BLOCK_ID: 55 return "PARAMATTR_GROUP_BLOCK_ID"; 56 case bitc::TYPE_BLOCK_ID_NEW: 57 return "TYPE_BLOCK_ID"; 58 case bitc::CONSTANTS_BLOCK_ID: 59 return "CONSTANTS_BLOCK"; 60 case bitc::FUNCTION_BLOCK_ID: 61 return "FUNCTION_BLOCK"; 62 case bitc::IDENTIFICATION_BLOCK_ID: 63 return "IDENTIFICATION_BLOCK_ID"; 64 case bitc::VALUE_SYMTAB_BLOCK_ID: 65 return "VALUE_SYMTAB"; 66 case bitc::METADATA_BLOCK_ID: 67 return "METADATA_BLOCK"; 68 case bitc::METADATA_KIND_BLOCK_ID: 69 return "METADATA_KIND_BLOCK"; 70 case bitc::METADATA_ATTACHMENT_ID: 71 return "METADATA_ATTACHMENT_BLOCK"; 72 case bitc::USELIST_BLOCK_ID: 73 return "USELIST_BLOCK_ID"; 74 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 75 return "GLOBALVAL_SUMMARY_BLOCK"; 76 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 77 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK"; 78 case bitc::MODULE_STRTAB_BLOCK_ID: 79 return "MODULE_STRTAB_BLOCK"; 80 case bitc::STRTAB_BLOCK_ID: 81 return "STRTAB_BLOCK"; 82 case bitc::SYMTAB_BLOCK_ID: 83 return "SYMTAB_BLOCK"; 84 } 85 } 86 87 /// Return a symbolic code name if known, otherwise return null. 88 static std::optional<const char *> 89 GetCodeName(unsigned CodeID, unsigned BlockID, 90 const BitstreamBlockInfo &BlockInfo, 91 CurStreamTypeType CurStreamType) { 92 // Standard blocks for all bitcode files. 93 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 94 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 95 switch (CodeID) { 96 default: 97 return std::nullopt; 98 case bitc::BLOCKINFO_CODE_SETBID: 99 return "SETBID"; 100 case bitc::BLOCKINFO_CODE_BLOCKNAME: 101 return "BLOCKNAME"; 102 case bitc::BLOCKINFO_CODE_SETRECORDNAME: 103 return "SETRECORDNAME"; 104 } 105 } 106 return std::nullopt; 107 } 108 109 // Check to see if we have a blockinfo record for this record, with a name. 110 if (const BitstreamBlockInfo::BlockInfo *Info = 111 BlockInfo.getBlockInfo(BlockID)) { 112 for (const std::pair<unsigned, std::string> &RN : Info->RecordNames) 113 if (RN.first == CodeID) 114 return RN.second.c_str(); 115 } 116 117 if (CurStreamType != LLVMIRBitstream) 118 return std::nullopt; 119 120 #define STRINGIFY_CODE(PREFIX, CODE) \ 121 case bitc::PREFIX##_##CODE: \ 122 return #CODE; 123 switch (BlockID) { 124 default: 125 return std::nullopt; 126 case bitc::MODULE_BLOCK_ID: 127 switch (CodeID) { 128 default: 129 return std::nullopt; 130 STRINGIFY_CODE(MODULE_CODE, VERSION) 131 STRINGIFY_CODE(MODULE_CODE, TRIPLE) 132 STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) 133 STRINGIFY_CODE(MODULE_CODE, ASM) 134 STRINGIFY_CODE(MODULE_CODE, SECTIONNAME) 135 STRINGIFY_CODE(MODULE_CODE, DEPLIB) // Deprecated, present in old bitcode 136 STRINGIFY_CODE(MODULE_CODE, GLOBALVAR) 137 STRINGIFY_CODE(MODULE_CODE, FUNCTION) 138 STRINGIFY_CODE(MODULE_CODE, ALIAS) 139 STRINGIFY_CODE(MODULE_CODE, GCNAME) 140 STRINGIFY_CODE(MODULE_CODE, COMDAT) 141 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) 142 STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) 143 STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) 144 STRINGIFY_CODE(MODULE_CODE, HASH) 145 } 146 case bitc::IDENTIFICATION_BLOCK_ID: 147 switch (CodeID) { 148 default: 149 return std::nullopt; 150 STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) 151 STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) 152 } 153 case bitc::PARAMATTR_BLOCK_ID: 154 switch (CodeID) { 155 default: 156 return std::nullopt; 157 // FIXME: Should these be different? 158 case bitc::PARAMATTR_CODE_ENTRY_OLD: 159 return "ENTRY"; 160 case bitc::PARAMATTR_CODE_ENTRY: 161 return "ENTRY"; 162 } 163 case bitc::PARAMATTR_GROUP_BLOCK_ID: 164 switch (CodeID) { 165 default: 166 return std::nullopt; 167 case bitc::PARAMATTR_GRP_CODE_ENTRY: 168 return "ENTRY"; 169 } 170 case bitc::TYPE_BLOCK_ID_NEW: 171 switch (CodeID) { 172 default: 173 return std::nullopt; 174 STRINGIFY_CODE(TYPE_CODE, NUMENTRY) 175 STRINGIFY_CODE(TYPE_CODE, VOID) 176 STRINGIFY_CODE(TYPE_CODE, FLOAT) 177 STRINGIFY_CODE(TYPE_CODE, DOUBLE) 178 STRINGIFY_CODE(TYPE_CODE, LABEL) 179 STRINGIFY_CODE(TYPE_CODE, OPAQUE) 180 STRINGIFY_CODE(TYPE_CODE, INTEGER) 181 STRINGIFY_CODE(TYPE_CODE, POINTER) 182 STRINGIFY_CODE(TYPE_CODE, HALF) 183 STRINGIFY_CODE(TYPE_CODE, ARRAY) 184 STRINGIFY_CODE(TYPE_CODE, VECTOR) 185 STRINGIFY_CODE(TYPE_CODE, X86_FP80) 186 STRINGIFY_CODE(TYPE_CODE, FP128) 187 STRINGIFY_CODE(TYPE_CODE, PPC_FP128) 188 STRINGIFY_CODE(TYPE_CODE, METADATA) 189 STRINGIFY_CODE(TYPE_CODE, X86_MMX) 190 STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) 191 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) 192 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) 193 STRINGIFY_CODE(TYPE_CODE, FUNCTION) 194 STRINGIFY_CODE(TYPE_CODE, TOKEN) 195 STRINGIFY_CODE(TYPE_CODE, BFLOAT) 196 } 197 198 case bitc::CONSTANTS_BLOCK_ID: 199 switch (CodeID) { 200 default: 201 return std::nullopt; 202 STRINGIFY_CODE(CST_CODE, SETTYPE) 203 STRINGIFY_CODE(CST_CODE, NULL) 204 STRINGIFY_CODE(CST_CODE, UNDEF) 205 STRINGIFY_CODE(CST_CODE, INTEGER) 206 STRINGIFY_CODE(CST_CODE, WIDE_INTEGER) 207 STRINGIFY_CODE(CST_CODE, FLOAT) 208 STRINGIFY_CODE(CST_CODE, AGGREGATE) 209 STRINGIFY_CODE(CST_CODE, STRING) 210 STRINGIFY_CODE(CST_CODE, CSTRING) 211 STRINGIFY_CODE(CST_CODE, CE_BINOP) 212 STRINGIFY_CODE(CST_CODE, CE_CAST) 213 STRINGIFY_CODE(CST_CODE, CE_GEP) 214 STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP) 215 STRINGIFY_CODE(CST_CODE, CE_SELECT) 216 STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT) 217 STRINGIFY_CODE(CST_CODE, CE_INSERTELT) 218 STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC) 219 STRINGIFY_CODE(CST_CODE, CE_CMP) 220 STRINGIFY_CODE(CST_CODE, INLINEASM) 221 STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX) 222 STRINGIFY_CODE(CST_CODE, CE_UNOP) 223 STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT) 224 STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE) 225 STRINGIFY_CODE(CST_CODE, PTRAUTH) 226 case bitc::CST_CODE_BLOCKADDRESS: 227 return "CST_CODE_BLOCKADDRESS"; 228 STRINGIFY_CODE(CST_CODE, DATA) 229 } 230 case bitc::FUNCTION_BLOCK_ID: 231 switch (CodeID) { 232 default: 233 return std::nullopt; 234 STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) 235 STRINGIFY_CODE(FUNC_CODE, INST_BINOP) 236 STRINGIFY_CODE(FUNC_CODE, INST_CAST) 237 STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD) 238 STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD) 239 STRINGIFY_CODE(FUNC_CODE, INST_SELECT) 240 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT) 241 STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT) 242 STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC) 243 STRINGIFY_CODE(FUNC_CODE, INST_CMP) 244 STRINGIFY_CODE(FUNC_CODE, INST_RET) 245 STRINGIFY_CODE(FUNC_CODE, INST_BR) 246 STRINGIFY_CODE(FUNC_CODE, INST_SWITCH) 247 STRINGIFY_CODE(FUNC_CODE, INST_INVOKE) 248 STRINGIFY_CODE(FUNC_CODE, INST_UNOP) 249 STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE) 250 STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET) 251 STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET) 252 STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD) 253 STRINGIFY_CODE(FUNC_CODE, INST_PHI) 254 STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA) 255 STRINGIFY_CODE(FUNC_CODE, INST_LOAD) 256 STRINGIFY_CODE(FUNC_CODE, INST_VAARG) 257 STRINGIFY_CODE(FUNC_CODE, INST_STORE) 258 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL) 259 STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL) 260 STRINGIFY_CODE(FUNC_CODE, INST_CMP2) 261 STRINGIFY_CODE(FUNC_CODE, INST_VSELECT) 262 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN) 263 STRINGIFY_CODE(FUNC_CODE, INST_CALL) 264 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC) 265 STRINGIFY_CODE(FUNC_CODE, INST_GEP) 266 STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE) 267 STRINGIFY_CODE(FUNC_CODE, INST_FENCE) 268 STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW) 269 STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC) 270 STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) 271 STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) 272 STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) 273 STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) 274 STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_DECLARE) 275 STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE) 276 STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_ASSIGN) 277 STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_VALUE_SIMPLE) 278 STRINGIFY_CODE(FUNC_CODE, DEBUG_RECORD_LABEL) 279 } 280 case bitc::VALUE_SYMTAB_BLOCK_ID: 281 switch (CodeID) { 282 default: 283 return std::nullopt; 284 STRINGIFY_CODE(VST_CODE, ENTRY) 285 STRINGIFY_CODE(VST_CODE, BBENTRY) 286 STRINGIFY_CODE(VST_CODE, FNENTRY) 287 STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY) 288 } 289 case bitc::MODULE_STRTAB_BLOCK_ID: 290 switch (CodeID) { 291 default: 292 return std::nullopt; 293 STRINGIFY_CODE(MST_CODE, ENTRY) 294 STRINGIFY_CODE(MST_CODE, HASH) 295 } 296 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 297 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 298 switch (CodeID) { 299 default: 300 return std::nullopt; 301 STRINGIFY_CODE(FS, PERMODULE) 302 STRINGIFY_CODE(FS, PERMODULE_PROFILE) 303 STRINGIFY_CODE(FS, PERMODULE_RELBF) 304 STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) 305 STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS) 306 STRINGIFY_CODE(FS, COMBINED) 307 STRINGIFY_CODE(FS, COMBINED_PROFILE) 308 STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) 309 STRINGIFY_CODE(FS, ALIAS) 310 STRINGIFY_CODE(FS, COMBINED_ALIAS) 311 STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME) 312 STRINGIFY_CODE(FS, VERSION) 313 STRINGIFY_CODE(FS, FLAGS) 314 STRINGIFY_CODE(FS, TYPE_TESTS) 315 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS) 316 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS) 317 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL) 318 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL) 319 STRINGIFY_CODE(FS, VALUE_GUID) 320 STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) 321 STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) 322 STRINGIFY_CODE(FS, TYPE_ID) 323 STRINGIFY_CODE(FS, TYPE_ID_METADATA) 324 STRINGIFY_CODE(FS, BLOCK_COUNT) 325 STRINGIFY_CODE(FS, PARAM_ACCESS) 326 STRINGIFY_CODE(FS, PERMODULE_CALLSITE_INFO) 327 STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO) 328 STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO) 329 STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO) 330 STRINGIFY_CODE(FS, STACK_IDS) 331 STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS) 332 STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY) 333 STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO_NO_CONTEXT) 334 } 335 case bitc::METADATA_ATTACHMENT_ID: 336 switch (CodeID) { 337 default: 338 return std::nullopt; 339 STRINGIFY_CODE(METADATA, ATTACHMENT) 340 } 341 case bitc::METADATA_BLOCK_ID: 342 switch (CodeID) { 343 default: 344 return std::nullopt; 345 STRINGIFY_CODE(METADATA, STRING_OLD) 346 STRINGIFY_CODE(METADATA, VALUE) 347 STRINGIFY_CODE(METADATA, NODE) 348 STRINGIFY_CODE(METADATA, NAME) 349 STRINGIFY_CODE(METADATA, DISTINCT_NODE) 350 STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK 351 STRINGIFY_CODE(METADATA, LOCATION) 352 STRINGIFY_CODE(METADATA, OLD_NODE) 353 STRINGIFY_CODE(METADATA, OLD_FN_NODE) 354 STRINGIFY_CODE(METADATA, NAMED_NODE) 355 STRINGIFY_CODE(METADATA, GENERIC_DEBUG) 356 STRINGIFY_CODE(METADATA, SUBRANGE) 357 STRINGIFY_CODE(METADATA, ENUMERATOR) 358 STRINGIFY_CODE(METADATA, BASIC_TYPE) 359 STRINGIFY_CODE(METADATA, FILE) 360 STRINGIFY_CODE(METADATA, DERIVED_TYPE) 361 STRINGIFY_CODE(METADATA, COMPOSITE_TYPE) 362 STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE) 363 STRINGIFY_CODE(METADATA, COMPILE_UNIT) 364 STRINGIFY_CODE(METADATA, SUBPROGRAM) 365 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK) 366 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE) 367 STRINGIFY_CODE(METADATA, NAMESPACE) 368 STRINGIFY_CODE(METADATA, TEMPLATE_TYPE) 369 STRINGIFY_CODE(METADATA, TEMPLATE_VALUE) 370 STRINGIFY_CODE(METADATA, GLOBAL_VAR) 371 STRINGIFY_CODE(METADATA, LOCAL_VAR) 372 STRINGIFY_CODE(METADATA, EXPRESSION) 373 STRINGIFY_CODE(METADATA, OBJC_PROPERTY) 374 STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) 375 STRINGIFY_CODE(METADATA, MODULE) 376 STRINGIFY_CODE(METADATA, MACRO) 377 STRINGIFY_CODE(METADATA, MACRO_FILE) 378 STRINGIFY_CODE(METADATA, STRINGS) 379 STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT) 380 STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR) 381 STRINGIFY_CODE(METADATA, INDEX_OFFSET) 382 STRINGIFY_CODE(METADATA, INDEX) 383 STRINGIFY_CODE(METADATA, ARG_LIST) 384 } 385 case bitc::METADATA_KIND_BLOCK_ID: 386 switch (CodeID) { 387 default: 388 return std::nullopt; 389 STRINGIFY_CODE(METADATA, KIND) 390 } 391 case bitc::USELIST_BLOCK_ID: 392 switch (CodeID) { 393 default: 394 return std::nullopt; 395 case bitc::USELIST_CODE_DEFAULT: 396 return "USELIST_CODE_DEFAULT"; 397 case bitc::USELIST_CODE_BB: 398 return "USELIST_CODE_BB"; 399 } 400 401 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 402 switch (CodeID) { 403 default: 404 return std::nullopt; 405 case bitc::OPERAND_BUNDLE_TAG: 406 return "OPERAND_BUNDLE_TAG"; 407 } 408 case bitc::STRTAB_BLOCK_ID: 409 switch (CodeID) { 410 default: 411 return std::nullopt; 412 case bitc::STRTAB_BLOB: 413 return "BLOB"; 414 } 415 case bitc::SYMTAB_BLOCK_ID: 416 switch (CodeID) { 417 default: 418 return std::nullopt; 419 case bitc::SYMTAB_BLOB: 420 return "BLOB"; 421 } 422 } 423 #undef STRINGIFY_CODE 424 } 425 426 static void printSize(raw_ostream &OS, double Bits) { 427 OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32)); 428 } 429 static void printSize(raw_ostream &OS, uint64_t Bits) { 430 OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8, 431 (unsigned long)(Bits / 32)); 432 } 433 434 static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) { 435 auto tryRead = [&Stream](char &Dest, size_t size) -> Error { 436 if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size)) 437 Dest = MaybeWord.get(); 438 else 439 return MaybeWord.takeError(); 440 return Error::success(); 441 }; 442 443 char Signature[6]; 444 if (Error Err = tryRead(Signature[0], 8)) 445 return std::move(Err); 446 if (Error Err = tryRead(Signature[1], 8)) 447 return std::move(Err); 448 449 // Autodetect the file contents, if it is one we know. 450 if (Signature[0] == 'C' && Signature[1] == 'P') { 451 if (Error Err = tryRead(Signature[2], 8)) 452 return std::move(Err); 453 if (Error Err = tryRead(Signature[3], 8)) 454 return std::move(Err); 455 if (Signature[2] == 'C' && Signature[3] == 'H') 456 return ClangSerializedASTBitstream; 457 } else if (Signature[0] == 'D' && Signature[1] == 'I') { 458 if (Error Err = tryRead(Signature[2], 8)) 459 return std::move(Err); 460 if (Error Err = tryRead(Signature[3], 8)) 461 return std::move(Err); 462 if (Signature[2] == 'A' && Signature[3] == 'G') 463 return ClangSerializedDiagnosticsBitstream; 464 } else if (Signature[0] == 'R' && Signature[1] == 'M') { 465 if (Error Err = tryRead(Signature[2], 8)) 466 return std::move(Err); 467 if (Error Err = tryRead(Signature[3], 8)) 468 return std::move(Err); 469 if (Signature[2] == 'R' && Signature[3] == 'K') 470 return LLVMBitstreamRemarks; 471 } else { 472 if (Error Err = tryRead(Signature[2], 4)) 473 return std::move(Err); 474 if (Error Err = tryRead(Signature[3], 4)) 475 return std::move(Err); 476 if (Error Err = tryRead(Signature[4], 4)) 477 return std::move(Err); 478 if (Error Err = tryRead(Signature[5], 4)) 479 return std::move(Err); 480 if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 && 481 Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD) 482 return LLVMIRBitstream; 483 } 484 return UnknownBitstream; 485 } 486 487 static Expected<CurStreamTypeType> analyzeHeader(std::optional<BCDumpOptions> O, 488 BitstreamCursor &Stream) { 489 ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes(); 490 const unsigned char *BufPtr = (const unsigned char *)Bytes.data(); 491 const unsigned char *EndBufPtr = BufPtr + Bytes.size(); 492 493 // If we have a wrapper header, parse it and ignore the non-bc file 494 // contents. The magic number is 0x0B17C0DE stored in little endian. 495 if (isBitcodeWrapper(BufPtr, EndBufPtr)) { 496 if (Bytes.size() < BWH_HeaderSize) 497 return reportError("Invalid bitcode wrapper header"); 498 499 if (O) { 500 unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]); 501 unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]); 502 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 503 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 504 unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]); 505 506 O->OS << "<BITCODE_WRAPPER_HEADER" 507 << " Magic=" << format_hex(Magic, 10) 508 << " Version=" << format_hex(Version, 10) 509 << " Offset=" << format_hex(Offset, 10) 510 << " Size=" << format_hex(Size, 10) 511 << " CPUType=" << format_hex(CPUType, 10) << "/>\n"; 512 } 513 514 if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true)) 515 return reportError("Invalid bitcode wrapper header"); 516 } 517 518 // Use the cursor modified by skipping the wrapper header. 519 Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr)); 520 521 return ReadSignature(Stream); 522 } 523 524 static bool canDecodeBlob(unsigned Code, unsigned BlockID) { 525 return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS; 526 } 527 528 Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent, 529 ArrayRef<uint64_t> Record, 530 StringRef Blob, 531 raw_ostream &OS) { 532 if (Blob.empty()) 533 return reportError("Cannot decode empty blob."); 534 535 if (Record.size() != 2) 536 return reportError( 537 "Decoding metadata strings blob needs two record entries."); 538 539 unsigned NumStrings = Record[0]; 540 unsigned StringsOffset = Record[1]; 541 OS << " num-strings = " << NumStrings << " {\n"; 542 543 StringRef Lengths = Blob.slice(0, StringsOffset); 544 SimpleBitstreamCursor R(Lengths); 545 StringRef Strings = Blob.drop_front(StringsOffset); 546 do { 547 if (R.AtEndOfStream()) 548 return reportError("bad length"); 549 550 uint32_t Size; 551 if (Error E = R.ReadVBR(6).moveInto(Size)) 552 return E; 553 if (Strings.size() < Size) 554 return reportError("truncated chars"); 555 556 OS << Indent << " '"; 557 OS.write_escaped(Strings.slice(0, Size), /*hex=*/true); 558 OS << "'\n"; 559 Strings = Strings.drop_front(Size); 560 } while (--NumStrings); 561 562 OS << Indent << " }"; 563 return Error::success(); 564 } 565 566 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer, 567 std::optional<StringRef> BlockInfoBuffer) 568 : Stream(Buffer) { 569 if (BlockInfoBuffer) 570 BlockInfoStream.emplace(*BlockInfoBuffer); 571 } 572 573 Error BitcodeAnalyzer::analyze(std::optional<BCDumpOptions> O, 574 std::optional<StringRef> CheckHash) { 575 if (Error E = analyzeHeader(O, Stream).moveInto(CurStreamType)) 576 return E; 577 578 Stream.setBlockInfo(&BlockInfo); 579 580 // Read block info from BlockInfoStream, if specified. 581 // The block info must be a top-level block. 582 if (BlockInfoStream) { 583 BitstreamCursor BlockInfoCursor(*BlockInfoStream); 584 if (Error E = analyzeHeader(O, BlockInfoCursor).takeError()) 585 return E; 586 587 while (!BlockInfoCursor.AtEndOfStream()) { 588 Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode(); 589 if (!MaybeCode) 590 return MaybeCode.takeError(); 591 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 592 return reportError("Invalid record at top-level in block info file"); 593 594 Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID(); 595 if (!MaybeBlockID) 596 return MaybeBlockID.takeError(); 597 if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) { 598 std::optional<BitstreamBlockInfo> NewBlockInfo; 599 if (Error E = 600 BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true) 601 .moveInto(NewBlockInfo)) 602 return E; 603 if (!NewBlockInfo) 604 return reportError("Malformed BlockInfoBlock in block info file"); 605 BlockInfo = std::move(*NewBlockInfo); 606 break; 607 } 608 609 if (Error Err = BlockInfoCursor.SkipBlock()) 610 return Err; 611 } 612 } 613 614 // Parse the top-level structure. We only allow blocks at the top-level. 615 while (!Stream.AtEndOfStream()) { 616 Expected<unsigned> MaybeCode = Stream.ReadCode(); 617 if (!MaybeCode) 618 return MaybeCode.takeError(); 619 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 620 return reportError("Invalid record at top-level"); 621 622 Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID(); 623 if (!MaybeBlockID) 624 return MaybeBlockID.takeError(); 625 626 if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash)) 627 return E; 628 ++NumTopBlocks; 629 } 630 631 return Error::success(); 632 } 633 634 void BitcodeAnalyzer::printStats(BCDumpOptions O, 635 std::optional<StringRef> Filename) { 636 uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT; 637 // Print a summary of the read file. 638 O.OS << "Summary "; 639 if (Filename) 640 O.OS << "of " << Filename->data() << ":\n"; 641 O.OS << " Total size: "; 642 printSize(O.OS, BufferSizeBits); 643 O.OS << "\n"; 644 O.OS << " Stream type: "; 645 switch (CurStreamType) { 646 case UnknownBitstream: 647 O.OS << "unknown\n"; 648 break; 649 case LLVMIRBitstream: 650 O.OS << "LLVM IR\n"; 651 break; 652 case ClangSerializedASTBitstream: 653 O.OS << "Clang Serialized AST\n"; 654 break; 655 case ClangSerializedDiagnosticsBitstream: 656 O.OS << "Clang Serialized Diagnostics\n"; 657 break; 658 case LLVMBitstreamRemarks: 659 O.OS << "LLVM Remarks\n"; 660 break; 661 } 662 O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; 663 O.OS << "\n"; 664 665 // Emit per-block stats. 666 O.OS << "Per-block Summary:\n"; 667 for (const auto &Stat : BlockIDStats) { 668 O.OS << " Block ID #" << Stat.first; 669 if (std::optional<const char *> BlockName = 670 GetBlockName(Stat.first, BlockInfo, CurStreamType)) 671 O.OS << " (" << *BlockName << ")"; 672 O.OS << ":\n"; 673 674 const PerBlockIDStats &Stats = Stat.second; 675 O.OS << " Num Instances: " << Stats.NumInstances << "\n"; 676 O.OS << " Total Size: "; 677 printSize(O.OS, Stats.NumBits); 678 O.OS << "\n"; 679 double pct = (Stats.NumBits * 100.0) / BufferSizeBits; 680 O.OS << " Percent of file: " << format("%2.4f%%", pct) << "\n"; 681 if (Stats.NumInstances > 1) { 682 O.OS << " Average Size: "; 683 printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances); 684 O.OS << "\n"; 685 O.OS << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/" 686 << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n"; 687 O.OS << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/" 688 << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n"; 689 O.OS << " Tot/Avg Records: " << Stats.NumRecords << "/" 690 << Stats.NumRecords / (double)Stats.NumInstances << "\n"; 691 } else { 692 O.OS << " Num SubBlocks: " << Stats.NumSubBlocks << "\n"; 693 O.OS << " Num Abbrevs: " << Stats.NumAbbrevs << "\n"; 694 O.OS << " Num Records: " << Stats.NumRecords << "\n"; 695 } 696 if (Stats.NumRecords) { 697 double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords; 698 O.OS << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n"; 699 } 700 O.OS << "\n"; 701 702 // Print a histogram of the codes we see. 703 if (O.Histogram && !Stats.CodeFreq.empty()) { 704 std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code> 705 for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) 706 if (unsigned Freq = Stats.CodeFreq[i].NumInstances) 707 FreqPairs.push_back(std::make_pair(Freq, i)); 708 llvm::stable_sort(FreqPairs); 709 std::reverse(FreqPairs.begin(), FreqPairs.end()); 710 711 O.OS << "\tRecord Histogram:\n"; 712 O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n"; 713 for (const auto &FreqPair : FreqPairs) { 714 const PerRecordStats &RecStats = Stats.CodeFreq[FreqPair.second]; 715 716 O.OS << format("\t\t%7d %9lu", RecStats.NumInstances, 717 (unsigned long)RecStats.TotalBits); 718 719 if (RecStats.NumInstances > 1) 720 O.OS << format(" %9.1f", 721 (double)RecStats.TotalBits / RecStats.NumInstances); 722 else 723 O.OS << " "; 724 725 if (RecStats.NumAbbrev) 726 O.OS << format(" %7.2f", (double)RecStats.NumAbbrev / 727 RecStats.NumInstances * 100); 728 else 729 O.OS << " "; 730 731 O.OS << " "; 732 if (std::optional<const char *> CodeName = GetCodeName( 733 FreqPair.second, Stat.first, BlockInfo, CurStreamType)) 734 O.OS << *CodeName << "\n"; 735 else 736 O.OS << "UnknownCode" << FreqPair.second << "\n"; 737 } 738 O.OS << "\n"; 739 } 740 } 741 } 742 743 Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, 744 std::optional<BCDumpOptions> O, 745 std::optional<StringRef> CheckHash) { 746 std::string Indent(IndentLevel * 2, ' '); 747 uint64_t BlockBitStart = Stream.GetCurrentBitNo(); 748 749 // Get the statistics for this BlockID. 750 PerBlockIDStats &BlockStats = BlockIDStats[BlockID]; 751 752 BlockStats.NumInstances++; 753 754 // BLOCKINFO is a special part of the stream. 755 bool DumpRecords = O.has_value(); 756 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 757 if (O && !O->DumpBlockinfo) 758 O->OS << Indent << "<BLOCKINFO_BLOCK/>\n"; 759 std::optional<BitstreamBlockInfo> NewBlockInfo; 760 if (Error E = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true) 761 .moveInto(NewBlockInfo)) 762 return E; 763 if (!NewBlockInfo) 764 return reportError("Malformed BlockInfoBlock"); 765 BlockInfo = std::move(*NewBlockInfo); 766 if (Error Err = Stream.JumpToBit(BlockBitStart)) 767 return Err; 768 // It's not really interesting to dump the contents of the blockinfo 769 // block, so only do it if the user explicitly requests it. 770 DumpRecords = O && O->DumpBlockinfo; 771 } 772 773 unsigned NumWords = 0; 774 if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords)) 775 return Err; 776 777 // Keep it for later, when we see a MODULE_HASH record 778 uint64_t BlockEntryPos = Stream.getCurrentByteNo(); 779 780 std::optional<const char *> BlockName; 781 if (DumpRecords) { 782 O->OS << Indent << "<"; 783 if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType))) 784 O->OS << *BlockName; 785 else 786 O->OS << "UnknownBlock" << BlockID; 787 788 if (!O->Symbolic && BlockName) 789 O->OS << " BlockID=" << BlockID; 790 791 O->OS << " NumWords=" << NumWords 792 << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; 793 } 794 795 SmallVector<uint64_t, 64> Record; 796 797 // Keep the offset to the metadata index if seen. 798 uint64_t MetadataIndexOffset = 0; 799 800 // Read all the records for this block. 801 while (true) { 802 if (Stream.AtEndOfStream()) 803 return reportError("Premature end of bitstream"); 804 805 uint64_t RecordStartBit = Stream.GetCurrentBitNo(); 806 807 BitstreamEntry Entry; 808 if (Error E = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs) 809 .moveInto(Entry)) 810 return E; 811 812 switch (Entry.Kind) { 813 case BitstreamEntry::Error: 814 return reportError("malformed bitcode file"); 815 case BitstreamEntry::EndBlock: { 816 uint64_t BlockBitEnd = Stream.GetCurrentBitNo(); 817 BlockStats.NumBits += BlockBitEnd - BlockBitStart; 818 if (DumpRecords) { 819 O->OS << Indent << "</"; 820 if (BlockName) 821 O->OS << *BlockName << ">\n"; 822 else 823 O->OS << "UnknownBlock" << BlockID << ">\n"; 824 } 825 return Error::success(); 826 } 827 828 case BitstreamEntry::SubBlock: { 829 uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); 830 if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash)) 831 return E; 832 ++BlockStats.NumSubBlocks; 833 uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); 834 835 // Don't include subblock sizes in the size of this block. 836 BlockBitStart += SubBlockBitEnd - SubBlockBitStart; 837 continue; 838 } 839 case BitstreamEntry::Record: 840 // The interesting case. 841 break; 842 } 843 844 if (Entry.ID == bitc::DEFINE_ABBREV) { 845 if (Error Err = Stream.ReadAbbrevRecord()) 846 return Err; 847 ++BlockStats.NumAbbrevs; 848 continue; 849 } 850 851 Record.clear(); 852 853 ++BlockStats.NumRecords; 854 855 StringRef Blob; 856 uint64_t CurrentRecordPos = Stream.GetCurrentBitNo(); 857 unsigned Code; 858 if (Error E = Stream.readRecord(Entry.ID, Record, &Blob).moveInto(Code)) 859 return E; 860 861 // Increment the # occurrences of this code. 862 if (BlockStats.CodeFreq.size() <= Code) 863 BlockStats.CodeFreq.resize(Code + 1); 864 BlockStats.CodeFreq[Code].NumInstances++; 865 BlockStats.CodeFreq[Code].TotalBits += 866 Stream.GetCurrentBitNo() - RecordStartBit; 867 if (Entry.ID != bitc::UNABBREV_RECORD) { 868 BlockStats.CodeFreq[Code].NumAbbrev++; 869 ++BlockStats.NumAbbreviatedRecords; 870 } 871 872 if (DumpRecords) { 873 O->OS << Indent << " <"; 874 std::optional<const char *> CodeName = 875 GetCodeName(Code, BlockID, BlockInfo, CurStreamType); 876 if (CodeName) 877 O->OS << *CodeName; 878 else 879 O->OS << "UnknownCode" << Code; 880 if (!O->Symbolic && CodeName) 881 O->OS << " codeid=" << Code; 882 const BitCodeAbbrev *Abbv = nullptr; 883 if (Entry.ID != bitc::UNABBREV_RECORD) { 884 Expected<const BitCodeAbbrev *> MaybeAbbv = Stream.getAbbrev(Entry.ID); 885 if (!MaybeAbbv) 886 return MaybeAbbv.takeError(); 887 Abbv = MaybeAbbv.get(); 888 O->OS << " abbrevid=" << Entry.ID; 889 } 890 891 for (unsigned i = 0, e = Record.size(); i != e; ++i) 892 O->OS << " op" << i << "=" << (int64_t)Record[i]; 893 894 // If we found a metadata index, let's verify that we had an offset 895 // before and validate its forward reference offset was correct! 896 if (BlockID == bitc::METADATA_BLOCK_ID) { 897 if (Code == bitc::METADATA_INDEX_OFFSET) { 898 if (Record.size() != 2) 899 O->OS << "(Invalid record)"; 900 else { 901 auto Offset = Record[0] + (Record[1] << 32); 902 MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset; 903 } 904 } 905 if (Code == bitc::METADATA_INDEX) { 906 O->OS << " (offset "; 907 if (MetadataIndexOffset == RecordStartBit) 908 O->OS << "match)"; 909 else 910 O->OS << "mismatch: " << MetadataIndexOffset << " vs " 911 << RecordStartBit << ")"; 912 } 913 } 914 915 // If we found a module hash, let's verify that it matches! 916 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH && 917 CheckHash) { 918 if (Record.size() != 5) 919 O->OS << " (invalid)"; 920 else { 921 // Recompute the hash and compare it to the one in the bitcode 922 SHA1 Hasher; 923 std::array<uint8_t, 20> Hash; 924 Hasher.update(*CheckHash); 925 { 926 int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos; 927 auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); 928 Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize)); 929 Hash = Hasher.result(); 930 } 931 std::array<uint8_t, 20> RecordedHash; 932 int Pos = 0; 933 for (auto &Val : Record) { 934 assert(!(Val >> 32) && "Unexpected high bits set"); 935 support::endian::write32be(&RecordedHash[Pos], Val); 936 Pos += 4; 937 } 938 if (Hash == RecordedHash) 939 O->OS << " (match)"; 940 else 941 O->OS << " (!mismatch!)"; 942 } 943 } 944 945 O->OS << "/>"; 946 947 if (Abbv) { 948 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 949 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 950 if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array) 951 continue; 952 assert(i + 2 == e && "Array op not second to last"); 953 std::string Str; 954 bool ArrayIsPrintable = true; 955 for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { 956 if (!isPrint(static_cast<unsigned char>(Record[j]))) { 957 ArrayIsPrintable = false; 958 break; 959 } 960 Str += (char)Record[j]; 961 } 962 if (ArrayIsPrintable) 963 O->OS << " record string = '" << Str << "'"; 964 break; 965 } 966 } 967 968 if (Blob.data()) { 969 if (canDecodeBlob(Code, BlockID)) { 970 if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS)) 971 return E; 972 } else { 973 O->OS << " blob data = "; 974 if (O->ShowBinaryBlobs) { 975 O->OS << "'"; 976 O->OS.write_escaped(Blob, /*hex=*/true) << "'"; 977 } else { 978 bool BlobIsPrintable = true; 979 for (char C : Blob) 980 if (!isPrint(static_cast<unsigned char>(C))) { 981 BlobIsPrintable = false; 982 break; 983 } 984 985 if (BlobIsPrintable) 986 O->OS << "'" << Blob << "'"; 987 else 988 O->OS << "unprintable, " << Blob.size() << " bytes."; 989 } 990 } 991 } 992 993 O->OS << "\n"; 994 } 995 996 // Make sure that we can skip the current record. 997 if (Error Err = Stream.JumpToBit(CurrentRecordPos)) 998 return Err; 999 if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID)) 1000 ; // Do nothing. 1001 else 1002 return Skipped.takeError(); 1003 } 1004 } 1005 1006