xref: /freebsd/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp (revision 0a7e5f1f02aad2ff5fff1c60f44c6975fd07e1d9)
1  //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/Bitcode/BitcodeAnalyzer.h"
10  #include "llvm/Bitcode/BitcodeReader.h"
11  #include "llvm/Bitcode/LLVMBitCodes.h"
12  #include "llvm/Bitstream/BitCodes.h"
13  #include "llvm/Bitstream/BitstreamReader.h"
14  #include "llvm/Support/Format.h"
15  #include "llvm/Support/SHA1.h"
16  #include <optional>
17  
18  using namespace llvm;
19  
20  static Error reportError(StringRef Message) {
21    return createStringError(std::errc::illegal_byte_sequence, Message.data());
22  }
23  
24  /// Return a symbolic block name if known, otherwise return null.
25  static std::optional<const char *>
26  GetBlockName(unsigned BlockID, const BitstreamBlockInfo &BlockInfo,
27               CurStreamTypeType CurStreamType) {
28    // Standard blocks for all bitcode files.
29    if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
30      if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
31        return "BLOCKINFO_BLOCK";
32      return std::nullopt;
33    }
34  
35    // Check to see if we have a blockinfo record for this block, with a name.
36    if (const BitstreamBlockInfo::BlockInfo *Info =
37            BlockInfo.getBlockInfo(BlockID)) {
38      if (!Info->Name.empty())
39        return Info->Name.c_str();
40    }
41  
42    if (CurStreamType != LLVMIRBitstream)
43      return std::nullopt;
44  
45    switch (BlockID) {
46    default:
47      return std::nullopt;
48    case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
49      return "OPERAND_BUNDLE_TAGS_BLOCK";
50    case bitc::MODULE_BLOCK_ID:
51      return "MODULE_BLOCK";
52    case bitc::PARAMATTR_BLOCK_ID:
53      return "PARAMATTR_BLOCK";
54    case bitc::PARAMATTR_GROUP_BLOCK_ID:
55      return "PARAMATTR_GROUP_BLOCK_ID";
56    case bitc::TYPE_BLOCK_ID_NEW:
57      return "TYPE_BLOCK_ID";
58    case bitc::CONSTANTS_BLOCK_ID:
59      return "CONSTANTS_BLOCK";
60    case bitc::FUNCTION_BLOCK_ID:
61      return "FUNCTION_BLOCK";
62    case bitc::IDENTIFICATION_BLOCK_ID:
63      return "IDENTIFICATION_BLOCK_ID";
64    case bitc::VALUE_SYMTAB_BLOCK_ID:
65      return "VALUE_SYMTAB";
66    case bitc::METADATA_BLOCK_ID:
67      return "METADATA_BLOCK";
68    case bitc::METADATA_KIND_BLOCK_ID:
69      return "METADATA_KIND_BLOCK";
70    case bitc::METADATA_ATTACHMENT_ID:
71      return "METADATA_ATTACHMENT_BLOCK";
72    case bitc::USELIST_BLOCK_ID:
73      return "USELIST_BLOCK_ID";
74    case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
75      return "GLOBALVAL_SUMMARY_BLOCK";
76    case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
77      return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
78    case bitc::MODULE_STRTAB_BLOCK_ID:
79      return "MODULE_STRTAB_BLOCK";
80    case bitc::STRTAB_BLOCK_ID:
81      return "STRTAB_BLOCK";
82    case bitc::SYMTAB_BLOCK_ID:
83      return "SYMTAB_BLOCK";
84    }
85  }
86  
87  /// Return a symbolic code name if known, otherwise return null.
88  static std::optional<const char *>
89  GetCodeName(unsigned CodeID, unsigned BlockID,
90              const BitstreamBlockInfo &BlockInfo,
91              CurStreamTypeType CurStreamType) {
92    // Standard blocks for all bitcode files.
93    if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
94      if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
95        switch (CodeID) {
96        default:
97          return std::nullopt;
98        case bitc::BLOCKINFO_CODE_SETBID:
99          return "SETBID";
100        case bitc::BLOCKINFO_CODE_BLOCKNAME:
101          return "BLOCKNAME";
102        case bitc::BLOCKINFO_CODE_SETRECORDNAME:
103          return "SETRECORDNAME";
104        }
105      }
106      return std::nullopt;
107    }
108  
109    // Check to see if we have a blockinfo record for this record, with a name.
110    if (const BitstreamBlockInfo::BlockInfo *Info =
111            BlockInfo.getBlockInfo(BlockID)) {
112      for (const std::pair<unsigned, std::string> &RN : Info->RecordNames)
113        if (RN.first == CodeID)
114          return RN.second.c_str();
115    }
116  
117    if (CurStreamType != LLVMIRBitstream)
118      return std::nullopt;
119  
120  #define STRINGIFY_CODE(PREFIX, CODE)                                           \
121    case bitc::PREFIX##_##CODE:                                                  \
122      return #CODE;
123    switch (BlockID) {
124    default:
125      return std::nullopt;
126    case bitc::MODULE_BLOCK_ID:
127      switch (CodeID) {
128      default:
129        return std::nullopt;
130        STRINGIFY_CODE(MODULE_CODE, VERSION)
131        STRINGIFY_CODE(MODULE_CODE, TRIPLE)
132        STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
133        STRINGIFY_CODE(MODULE_CODE, ASM)
134        STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
135        STRINGIFY_CODE(MODULE_CODE, DEPLIB) // Deprecated, present in old bitcode
136        STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
137        STRINGIFY_CODE(MODULE_CODE, FUNCTION)
138        STRINGIFY_CODE(MODULE_CODE, ALIAS)
139        STRINGIFY_CODE(MODULE_CODE, GCNAME)
140        STRINGIFY_CODE(MODULE_CODE, COMDAT)
141        STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
142        STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
143        STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
144        STRINGIFY_CODE(MODULE_CODE, HASH)
145      }
146    case bitc::IDENTIFICATION_BLOCK_ID:
147      switch (CodeID) {
148      default:
149        return std::nullopt;
150        STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
151        STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
152      }
153    case bitc::PARAMATTR_BLOCK_ID:
154      switch (CodeID) {
155      default:
156        return std::nullopt;
157      // FIXME: Should these be different?
158      case bitc::PARAMATTR_CODE_ENTRY_OLD:
159        return "ENTRY";
160      case bitc::PARAMATTR_CODE_ENTRY:
161        return "ENTRY";
162      }
163    case bitc::PARAMATTR_GROUP_BLOCK_ID:
164      switch (CodeID) {
165      default:
166        return std::nullopt;
167      case bitc::PARAMATTR_GRP_CODE_ENTRY:
168        return "ENTRY";
169      }
170    case bitc::TYPE_BLOCK_ID_NEW:
171      switch (CodeID) {
172      default:
173        return std::nullopt;
174        STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
175        STRINGIFY_CODE(TYPE_CODE, VOID)
176        STRINGIFY_CODE(TYPE_CODE, FLOAT)
177        STRINGIFY_CODE(TYPE_CODE, DOUBLE)
178        STRINGIFY_CODE(TYPE_CODE, LABEL)
179        STRINGIFY_CODE(TYPE_CODE, OPAQUE)
180        STRINGIFY_CODE(TYPE_CODE, INTEGER)
181        STRINGIFY_CODE(TYPE_CODE, POINTER)
182        STRINGIFY_CODE(TYPE_CODE, HALF)
183        STRINGIFY_CODE(TYPE_CODE, ARRAY)
184        STRINGIFY_CODE(TYPE_CODE, VECTOR)
185        STRINGIFY_CODE(TYPE_CODE, X86_FP80)
186        STRINGIFY_CODE(TYPE_CODE, FP128)
187        STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
188        STRINGIFY_CODE(TYPE_CODE, METADATA)
189        STRINGIFY_CODE(TYPE_CODE, X86_MMX)
190        STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
191        STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
192        STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
193        STRINGIFY_CODE(TYPE_CODE, FUNCTION)
194        STRINGIFY_CODE(TYPE_CODE, TOKEN)
195        STRINGIFY_CODE(TYPE_CODE, BFLOAT)
196      }
197  
198    case bitc::CONSTANTS_BLOCK_ID:
199      switch (CodeID) {
200      default:
201        return std::nullopt;
202        STRINGIFY_CODE(CST_CODE, SETTYPE)
203        STRINGIFY_CODE(CST_CODE, NULL)
204        STRINGIFY_CODE(CST_CODE, UNDEF)
205        STRINGIFY_CODE(CST_CODE, INTEGER)
206        STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
207        STRINGIFY_CODE(CST_CODE, FLOAT)
208        STRINGIFY_CODE(CST_CODE, AGGREGATE)
209        STRINGIFY_CODE(CST_CODE, STRING)
210        STRINGIFY_CODE(CST_CODE, CSTRING)
211        STRINGIFY_CODE(CST_CODE, CE_BINOP)
212        STRINGIFY_CODE(CST_CODE, CE_CAST)
213        STRINGIFY_CODE(CST_CODE, CE_GEP)
214        STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
215        STRINGIFY_CODE(CST_CODE, CE_SELECT)
216        STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
217        STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
218        STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
219        STRINGIFY_CODE(CST_CODE, CE_CMP)
220        STRINGIFY_CODE(CST_CODE, INLINEASM)
221        STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
222        STRINGIFY_CODE(CST_CODE, CE_UNOP)
223        STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT)
224        STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE)
225      case bitc::CST_CODE_BLOCKADDRESS:
226        return "CST_CODE_BLOCKADDRESS";
227        STRINGIFY_CODE(CST_CODE, DATA)
228      }
229    case bitc::FUNCTION_BLOCK_ID:
230      switch (CodeID) {
231      default:
232        return std::nullopt;
233        STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
234        STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
235        STRINGIFY_CODE(FUNC_CODE, INST_CAST)
236        STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
237        STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
238        STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
239        STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
240        STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
241        STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
242        STRINGIFY_CODE(FUNC_CODE, INST_CMP)
243        STRINGIFY_CODE(FUNC_CODE, INST_RET)
244        STRINGIFY_CODE(FUNC_CODE, INST_BR)
245        STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
246        STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
247        STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
248        STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
249        STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
250        STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
251        STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
252        STRINGIFY_CODE(FUNC_CODE, INST_PHI)
253        STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
254        STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
255        STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
256        STRINGIFY_CODE(FUNC_CODE, INST_STORE)
257        STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
258        STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
259        STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
260        STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
261        STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
262        STRINGIFY_CODE(FUNC_CODE, INST_CALL)
263        STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
264        STRINGIFY_CODE(FUNC_CODE, INST_GEP)
265        STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
266        STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
267        STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
268        STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
269        STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
270        STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
271        STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
272        STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS)
273      }
274    case bitc::VALUE_SYMTAB_BLOCK_ID:
275      switch (CodeID) {
276      default:
277        return std::nullopt;
278        STRINGIFY_CODE(VST_CODE, ENTRY)
279        STRINGIFY_CODE(VST_CODE, BBENTRY)
280        STRINGIFY_CODE(VST_CODE, FNENTRY)
281        STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
282      }
283    case bitc::MODULE_STRTAB_BLOCK_ID:
284      switch (CodeID) {
285      default:
286        return std::nullopt;
287        STRINGIFY_CODE(MST_CODE, ENTRY)
288        STRINGIFY_CODE(MST_CODE, HASH)
289      }
290    case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
291    case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
292      switch (CodeID) {
293      default:
294        return std::nullopt;
295        STRINGIFY_CODE(FS, PERMODULE)
296        STRINGIFY_CODE(FS, PERMODULE_PROFILE)
297        STRINGIFY_CODE(FS, PERMODULE_RELBF)
298        STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
299        STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)
300        STRINGIFY_CODE(FS, COMBINED)
301        STRINGIFY_CODE(FS, COMBINED_PROFILE)
302        STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
303        STRINGIFY_CODE(FS, ALIAS)
304        STRINGIFY_CODE(FS, COMBINED_ALIAS)
305        STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
306        STRINGIFY_CODE(FS, VERSION)
307        STRINGIFY_CODE(FS, FLAGS)
308        STRINGIFY_CODE(FS, TYPE_TESTS)
309        STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
310        STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
311        STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
312        STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
313        STRINGIFY_CODE(FS, VALUE_GUID)
314        STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
315        STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
316        STRINGIFY_CODE(FS, TYPE_ID)
317        STRINGIFY_CODE(FS, TYPE_ID_METADATA)
318        STRINGIFY_CODE(FS, BLOCK_COUNT)
319        STRINGIFY_CODE(FS, PARAM_ACCESS)
320        STRINGIFY_CODE(FS, PERMODULE_CALLSITE_INFO)
321        STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO)
322        STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
323        STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
324        STRINGIFY_CODE(FS, STACK_IDS)
325      }
326    case bitc::METADATA_ATTACHMENT_ID:
327      switch (CodeID) {
328      default:
329        return std::nullopt;
330        STRINGIFY_CODE(METADATA, ATTACHMENT)
331      }
332    case bitc::METADATA_BLOCK_ID:
333      switch (CodeID) {
334      default:
335        return std::nullopt;
336        STRINGIFY_CODE(METADATA, STRING_OLD)
337        STRINGIFY_CODE(METADATA, VALUE)
338        STRINGIFY_CODE(METADATA, NODE)
339        STRINGIFY_CODE(METADATA, NAME)
340        STRINGIFY_CODE(METADATA, DISTINCT_NODE)
341        STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
342        STRINGIFY_CODE(METADATA, LOCATION)
343        STRINGIFY_CODE(METADATA, OLD_NODE)
344        STRINGIFY_CODE(METADATA, OLD_FN_NODE)
345        STRINGIFY_CODE(METADATA, NAMED_NODE)
346        STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
347        STRINGIFY_CODE(METADATA, SUBRANGE)
348        STRINGIFY_CODE(METADATA, ENUMERATOR)
349        STRINGIFY_CODE(METADATA, BASIC_TYPE)
350        STRINGIFY_CODE(METADATA, FILE)
351        STRINGIFY_CODE(METADATA, DERIVED_TYPE)
352        STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
353        STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
354        STRINGIFY_CODE(METADATA, COMPILE_UNIT)
355        STRINGIFY_CODE(METADATA, SUBPROGRAM)
356        STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
357        STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
358        STRINGIFY_CODE(METADATA, NAMESPACE)
359        STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
360        STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
361        STRINGIFY_CODE(METADATA, GLOBAL_VAR)
362        STRINGIFY_CODE(METADATA, LOCAL_VAR)
363        STRINGIFY_CODE(METADATA, EXPRESSION)
364        STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
365        STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
366        STRINGIFY_CODE(METADATA, MODULE)
367        STRINGIFY_CODE(METADATA, MACRO)
368        STRINGIFY_CODE(METADATA, MACRO_FILE)
369        STRINGIFY_CODE(METADATA, STRINGS)
370        STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
371        STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
372        STRINGIFY_CODE(METADATA, INDEX_OFFSET)
373        STRINGIFY_CODE(METADATA, INDEX)
374        STRINGIFY_CODE(METADATA, ARG_LIST)
375      }
376    case bitc::METADATA_KIND_BLOCK_ID:
377      switch (CodeID) {
378      default:
379        return std::nullopt;
380        STRINGIFY_CODE(METADATA, KIND)
381      }
382    case bitc::USELIST_BLOCK_ID:
383      switch (CodeID) {
384      default:
385        return std::nullopt;
386      case bitc::USELIST_CODE_DEFAULT:
387        return "USELIST_CODE_DEFAULT";
388      case bitc::USELIST_CODE_BB:
389        return "USELIST_CODE_BB";
390      }
391  
392    case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
393      switch (CodeID) {
394      default:
395        return std::nullopt;
396      case bitc::OPERAND_BUNDLE_TAG:
397        return "OPERAND_BUNDLE_TAG";
398      }
399    case bitc::STRTAB_BLOCK_ID:
400      switch (CodeID) {
401      default:
402        return std::nullopt;
403      case bitc::STRTAB_BLOB:
404        return "BLOB";
405      }
406    case bitc::SYMTAB_BLOCK_ID:
407      switch (CodeID) {
408      default:
409        return std::nullopt;
410      case bitc::SYMTAB_BLOB:
411        return "BLOB";
412      }
413    }
414  #undef STRINGIFY_CODE
415  }
416  
417  static void printSize(raw_ostream &OS, double Bits) {
418    OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32));
419  }
420  static void printSize(raw_ostream &OS, uint64_t Bits) {
421    OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8,
422                 (unsigned long)(Bits / 32));
423  }
424  
425  static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
426    auto tryRead = [&Stream](char &Dest, size_t size) -> Error {
427      if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size))
428        Dest = MaybeWord.get();
429      else
430        return MaybeWord.takeError();
431      return Error::success();
432    };
433  
434    char Signature[6];
435    if (Error Err = tryRead(Signature[0], 8))
436      return std::move(Err);
437    if (Error Err = tryRead(Signature[1], 8))
438      return std::move(Err);
439  
440    // Autodetect the file contents, if it is one we know.
441    if (Signature[0] == 'C' && Signature[1] == 'P') {
442      if (Error Err = tryRead(Signature[2], 8))
443        return std::move(Err);
444      if (Error Err = tryRead(Signature[3], 8))
445        return std::move(Err);
446      if (Signature[2] == 'C' && Signature[3] == 'H')
447        return ClangSerializedASTBitstream;
448    } else if (Signature[0] == 'D' && Signature[1] == 'I') {
449      if (Error Err = tryRead(Signature[2], 8))
450        return std::move(Err);
451      if (Error Err = tryRead(Signature[3], 8))
452        return std::move(Err);
453      if (Signature[2] == 'A' && Signature[3] == 'G')
454        return ClangSerializedDiagnosticsBitstream;
455    } else if (Signature[0] == 'R' && Signature[1] == 'M') {
456      if (Error Err = tryRead(Signature[2], 8))
457        return std::move(Err);
458      if (Error Err = tryRead(Signature[3], 8))
459        return std::move(Err);
460      if (Signature[2] == 'R' && Signature[3] == 'K')
461        return LLVMBitstreamRemarks;
462    } else {
463      if (Error Err = tryRead(Signature[2], 4))
464        return std::move(Err);
465      if (Error Err = tryRead(Signature[3], 4))
466        return std::move(Err);
467      if (Error Err = tryRead(Signature[4], 4))
468        return std::move(Err);
469      if (Error Err = tryRead(Signature[5], 4))
470        return std::move(Err);
471      if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 &&
472          Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
473        return LLVMIRBitstream;
474    }
475    return UnknownBitstream;
476  }
477  
478  static Expected<CurStreamTypeType> analyzeHeader(std::optional<BCDumpOptions> O,
479                                                   BitstreamCursor &Stream) {
480    ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes();
481    const unsigned char *BufPtr = (const unsigned char *)Bytes.data();
482    const unsigned char *EndBufPtr = BufPtr + Bytes.size();
483  
484    // If we have a wrapper header, parse it and ignore the non-bc file
485    // contents. The magic number is 0x0B17C0DE stored in little endian.
486    if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
487      if (Bytes.size() < BWH_HeaderSize)
488        return reportError("Invalid bitcode wrapper header");
489  
490      if (O) {
491        unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
492        unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
493        unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
494        unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
495        unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
496  
497        O->OS << "<BITCODE_WRAPPER_HEADER"
498              << " Magic=" << format_hex(Magic, 10)
499              << " Version=" << format_hex(Version, 10)
500              << " Offset=" << format_hex(Offset, 10)
501              << " Size=" << format_hex(Size, 10)
502              << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
503      }
504  
505      if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
506        return reportError("Invalid bitcode wrapper header");
507    }
508  
509    // Use the cursor modified by skipping the wrapper header.
510    Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
511  
512    return ReadSignature(Stream);
513  }
514  
515  static bool canDecodeBlob(unsigned Code, unsigned BlockID) {
516    return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS;
517  }
518  
519  Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
520                                                   ArrayRef<uint64_t> Record,
521                                                   StringRef Blob,
522                                                   raw_ostream &OS) {
523    if (Blob.empty())
524      return reportError("Cannot decode empty blob.");
525  
526    if (Record.size() != 2)
527      return reportError(
528          "Decoding metadata strings blob needs two record entries.");
529  
530    unsigned NumStrings = Record[0];
531    unsigned StringsOffset = Record[1];
532    OS << " num-strings = " << NumStrings << " {\n";
533  
534    StringRef Lengths = Blob.slice(0, StringsOffset);
535    SimpleBitstreamCursor R(Lengths);
536    StringRef Strings = Blob.drop_front(StringsOffset);
537    do {
538      if (R.AtEndOfStream())
539        return reportError("bad length");
540  
541      uint32_t Size;
542      if (Error E = R.ReadVBR(6).moveInto(Size))
543        return E;
544      if (Strings.size() < Size)
545        return reportError("truncated chars");
546  
547      OS << Indent << "    '";
548      OS.write_escaped(Strings.slice(0, Size), /*hex=*/true);
549      OS << "'\n";
550      Strings = Strings.drop_front(Size);
551    } while (--NumStrings);
552  
553    OS << Indent << "  }";
554    return Error::success();
555  }
556  
557  BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
558                                   std::optional<StringRef> BlockInfoBuffer)
559      : Stream(Buffer) {
560    if (BlockInfoBuffer)
561      BlockInfoStream.emplace(*BlockInfoBuffer);
562  }
563  
564  Error BitcodeAnalyzer::analyze(std::optional<BCDumpOptions> O,
565                                 std::optional<StringRef> CheckHash) {
566    if (Error E = analyzeHeader(O, Stream).moveInto(CurStreamType))
567      return E;
568  
569    Stream.setBlockInfo(&BlockInfo);
570  
571    // Read block info from BlockInfoStream, if specified.
572    // The block info must be a top-level block.
573    if (BlockInfoStream) {
574      BitstreamCursor BlockInfoCursor(*BlockInfoStream);
575      if (Error E = analyzeHeader(O, BlockInfoCursor).takeError())
576        return E;
577  
578      while (!BlockInfoCursor.AtEndOfStream()) {
579        Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
580        if (!MaybeCode)
581          return MaybeCode.takeError();
582        if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
583          return reportError("Invalid record at top-level in block info file");
584  
585        Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID();
586        if (!MaybeBlockID)
587          return MaybeBlockID.takeError();
588        if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
589          std::optional<BitstreamBlockInfo> NewBlockInfo;
590          if (Error E =
591                  BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
592                      .moveInto(NewBlockInfo))
593            return E;
594          if (!NewBlockInfo)
595            return reportError("Malformed BlockInfoBlock in block info file");
596          BlockInfo = std::move(*NewBlockInfo);
597          break;
598        }
599  
600        if (Error Err = BlockInfoCursor.SkipBlock())
601          return Err;
602      }
603    }
604  
605    // Parse the top-level structure.  We only allow blocks at the top-level.
606    while (!Stream.AtEndOfStream()) {
607      Expected<unsigned> MaybeCode = Stream.ReadCode();
608      if (!MaybeCode)
609        return MaybeCode.takeError();
610      if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
611        return reportError("Invalid record at top-level");
612  
613      Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
614      if (!MaybeBlockID)
615        return MaybeBlockID.takeError();
616  
617      if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash))
618        return E;
619      ++NumTopBlocks;
620    }
621  
622    return Error::success();
623  }
624  
625  void BitcodeAnalyzer::printStats(BCDumpOptions O,
626                                   std::optional<StringRef> Filename) {
627    uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
628    // Print a summary of the read file.
629    O.OS << "Summary ";
630    if (Filename)
631      O.OS << "of " << Filename->data() << ":\n";
632    O.OS << "         Total size: ";
633    printSize(O.OS, BufferSizeBits);
634    O.OS << "\n";
635    O.OS << "        Stream type: ";
636    switch (CurStreamType) {
637    case UnknownBitstream:
638      O.OS << "unknown\n";
639      break;
640    case LLVMIRBitstream:
641      O.OS << "LLVM IR\n";
642      break;
643    case ClangSerializedASTBitstream:
644      O.OS << "Clang Serialized AST\n";
645      break;
646    case ClangSerializedDiagnosticsBitstream:
647      O.OS << "Clang Serialized Diagnostics\n";
648      break;
649    case LLVMBitstreamRemarks:
650      O.OS << "LLVM Remarks\n";
651      break;
652    }
653    O.OS << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
654    O.OS << "\n";
655  
656    // Emit per-block stats.
657    O.OS << "Per-block Summary:\n";
658    for (const auto &Stat : BlockIDStats) {
659      O.OS << "  Block ID #" << Stat.first;
660      if (std::optional<const char *> BlockName =
661              GetBlockName(Stat.first, BlockInfo, CurStreamType))
662        O.OS << " (" << *BlockName << ")";
663      O.OS << ":\n";
664  
665      const PerBlockIDStats &Stats = Stat.second;
666      O.OS << "      Num Instances: " << Stats.NumInstances << "\n";
667      O.OS << "         Total Size: ";
668      printSize(O.OS, Stats.NumBits);
669      O.OS << "\n";
670      double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
671      O.OS << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
672      if (Stats.NumInstances > 1) {
673        O.OS << "       Average Size: ";
674        printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances);
675        O.OS << "\n";
676        O.OS << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
677             << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n";
678        O.OS << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
679             << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n";
680        O.OS << "    Tot/Avg Records: " << Stats.NumRecords << "/"
681             << Stats.NumRecords / (double)Stats.NumInstances << "\n";
682      } else {
683        O.OS << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
684        O.OS << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
685        O.OS << "        Num Records: " << Stats.NumRecords << "\n";
686      }
687      if (Stats.NumRecords) {
688        double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
689        O.OS << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
690      }
691      O.OS << "\n";
692  
693      // Print a histogram of the codes we see.
694      if (O.Histogram && !Stats.CodeFreq.empty()) {
695        std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code>
696        for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
697          if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
698            FreqPairs.push_back(std::make_pair(Freq, i));
699        llvm::stable_sort(FreqPairs);
700        std::reverse(FreqPairs.begin(), FreqPairs.end());
701  
702        O.OS << "\tRecord Histogram:\n";
703        O.OS << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
704        for (const auto &FreqPair : FreqPairs) {
705          const PerRecordStats &RecStats = Stats.CodeFreq[FreqPair.second];
706  
707          O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
708                         (unsigned long)RecStats.TotalBits);
709  
710          if (RecStats.NumInstances > 1)
711            O.OS << format(" %9.1f",
712                           (double)RecStats.TotalBits / RecStats.NumInstances);
713          else
714            O.OS << "          ";
715  
716          if (RecStats.NumAbbrev)
717            O.OS << format(" %7.2f", (double)RecStats.NumAbbrev /
718                                         RecStats.NumInstances * 100);
719          else
720            O.OS << "        ";
721  
722          O.OS << "  ";
723          if (std::optional<const char *> CodeName = GetCodeName(
724                  FreqPair.second, Stat.first, BlockInfo, CurStreamType))
725            O.OS << *CodeName << "\n";
726          else
727            O.OS << "UnknownCode" << FreqPair.second << "\n";
728        }
729        O.OS << "\n";
730      }
731    }
732  }
733  
734  Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
735                                    std::optional<BCDumpOptions> O,
736                                    std::optional<StringRef> CheckHash) {
737    std::string Indent(IndentLevel * 2, ' ');
738    uint64_t BlockBitStart = Stream.GetCurrentBitNo();
739  
740    // Get the statistics for this BlockID.
741    PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
742  
743    BlockStats.NumInstances++;
744  
745    // BLOCKINFO is a special part of the stream.
746    bool DumpRecords = O.has_value();
747    if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
748      if (O && !O->DumpBlockinfo)
749        O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
750      std::optional<BitstreamBlockInfo> NewBlockInfo;
751      if (Error E = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
752                        .moveInto(NewBlockInfo))
753        return E;
754      if (!NewBlockInfo)
755        return reportError("Malformed BlockInfoBlock");
756      BlockInfo = std::move(*NewBlockInfo);
757      if (Error Err = Stream.JumpToBit(BlockBitStart))
758        return Err;
759      // It's not really interesting to dump the contents of the blockinfo
760      // block, so only do it if the user explicitly requests it.
761      DumpRecords = O && O->DumpBlockinfo;
762    }
763  
764    unsigned NumWords = 0;
765    if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords))
766      return Err;
767  
768    // Keep it for later, when we see a MODULE_HASH record
769    uint64_t BlockEntryPos = Stream.getCurrentByteNo();
770  
771    std::optional<const char *> BlockName;
772    if (DumpRecords) {
773      O->OS << Indent << "<";
774      if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
775        O->OS << *BlockName;
776      else
777        O->OS << "UnknownBlock" << BlockID;
778  
779      if (!O->Symbolic && BlockName)
780        O->OS << " BlockID=" << BlockID;
781  
782      O->OS << " NumWords=" << NumWords
783            << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
784    }
785  
786    SmallVector<uint64_t, 64> Record;
787  
788    // Keep the offset to the metadata index if seen.
789    uint64_t MetadataIndexOffset = 0;
790  
791    // Read all the records for this block.
792    while (true) {
793      if (Stream.AtEndOfStream())
794        return reportError("Premature end of bitstream");
795  
796      uint64_t RecordStartBit = Stream.GetCurrentBitNo();
797  
798      BitstreamEntry Entry;
799      if (Error E = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs)
800                        .moveInto(Entry))
801        return E;
802  
803      switch (Entry.Kind) {
804      case BitstreamEntry::Error:
805        return reportError("malformed bitcode file");
806      case BitstreamEntry::EndBlock: {
807        uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
808        BlockStats.NumBits += BlockBitEnd - BlockBitStart;
809        if (DumpRecords) {
810          O->OS << Indent << "</";
811          if (BlockName)
812            O->OS << *BlockName << ">\n";
813          else
814            O->OS << "UnknownBlock" << BlockID << ">\n";
815        }
816        return Error::success();
817      }
818  
819      case BitstreamEntry::SubBlock: {
820        uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
821        if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash))
822          return E;
823        ++BlockStats.NumSubBlocks;
824        uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
825  
826        // Don't include subblock sizes in the size of this block.
827        BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
828        continue;
829      }
830      case BitstreamEntry::Record:
831        // The interesting case.
832        break;
833      }
834  
835      if (Entry.ID == bitc::DEFINE_ABBREV) {
836        if (Error Err = Stream.ReadAbbrevRecord())
837          return Err;
838        ++BlockStats.NumAbbrevs;
839        continue;
840      }
841  
842      Record.clear();
843  
844      ++BlockStats.NumRecords;
845  
846      StringRef Blob;
847      uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
848      unsigned Code;
849      if (Error E = Stream.readRecord(Entry.ID, Record, &Blob).moveInto(Code))
850        return E;
851  
852      // Increment the # occurrences of this code.
853      if (BlockStats.CodeFreq.size() <= Code)
854        BlockStats.CodeFreq.resize(Code + 1);
855      BlockStats.CodeFreq[Code].NumInstances++;
856      BlockStats.CodeFreq[Code].TotalBits +=
857          Stream.GetCurrentBitNo() - RecordStartBit;
858      if (Entry.ID != bitc::UNABBREV_RECORD) {
859        BlockStats.CodeFreq[Code].NumAbbrev++;
860        ++BlockStats.NumAbbreviatedRecords;
861      }
862  
863      if (DumpRecords) {
864        O->OS << Indent << "  <";
865        std::optional<const char *> CodeName =
866            GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
867        if (CodeName)
868          O->OS << *CodeName;
869        else
870          O->OS << "UnknownCode" << Code;
871        if (!O->Symbolic && CodeName)
872          O->OS << " codeid=" << Code;
873        const BitCodeAbbrev *Abbv = nullptr;
874        if (Entry.ID != bitc::UNABBREV_RECORD) {
875          Expected<const BitCodeAbbrev *> MaybeAbbv = Stream.getAbbrev(Entry.ID);
876          if (!MaybeAbbv)
877            return MaybeAbbv.takeError();
878          Abbv = MaybeAbbv.get();
879          O->OS << " abbrevid=" << Entry.ID;
880        }
881  
882        for (unsigned i = 0, e = Record.size(); i != e; ++i)
883          O->OS << " op" << i << "=" << (int64_t)Record[i];
884  
885        // If we found a metadata index, let's verify that we had an offset
886        // before and validate its forward reference offset was correct!
887        if (BlockID == bitc::METADATA_BLOCK_ID) {
888          if (Code == bitc::METADATA_INDEX_OFFSET) {
889            if (Record.size() != 2)
890              O->OS << "(Invalid record)";
891            else {
892              auto Offset = Record[0] + (Record[1] << 32);
893              MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
894            }
895          }
896          if (Code == bitc::METADATA_INDEX) {
897            O->OS << " (offset ";
898            if (MetadataIndexOffset == RecordStartBit)
899              O->OS << "match)";
900            else
901              O->OS << "mismatch: " << MetadataIndexOffset << " vs "
902                    << RecordStartBit << ")";
903          }
904        }
905  
906        // If we found a module hash, let's verify that it matches!
907        if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
908            CheckHash) {
909          if (Record.size() != 5)
910            O->OS << " (invalid)";
911          else {
912            // Recompute the hash and compare it to the one in the bitcode
913            SHA1 Hasher;
914            std::array<uint8_t, 20> Hash;
915            Hasher.update(*CheckHash);
916            {
917              int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
918              auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
919              Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
920              Hash = Hasher.result();
921            }
922            std::array<uint8_t, 20> RecordedHash;
923            int Pos = 0;
924            for (auto &Val : Record) {
925              assert(!(Val >> 32) && "Unexpected high bits set");
926              support::endian::write32be(&RecordedHash[Pos], Val);
927              Pos += 4;
928            }
929            if (Hash == RecordedHash)
930              O->OS << " (match)";
931            else
932              O->OS << " (!mismatch!)";
933          }
934        }
935  
936        O->OS << "/>";
937  
938        if (Abbv) {
939          for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
940            const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
941            if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
942              continue;
943            assert(i + 2 == e && "Array op not second to last");
944            std::string Str;
945            bool ArrayIsPrintable = true;
946            for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
947              if (!isPrint(static_cast<unsigned char>(Record[j]))) {
948                ArrayIsPrintable = false;
949                break;
950              }
951              Str += (char)Record[j];
952            }
953            if (ArrayIsPrintable)
954              O->OS << " record string = '" << Str << "'";
955            break;
956          }
957        }
958  
959        if (Blob.data()) {
960          if (canDecodeBlob(Code, BlockID)) {
961            if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS))
962              return E;
963          } else {
964            O->OS << " blob data = ";
965            if (O->ShowBinaryBlobs) {
966              O->OS << "'";
967              O->OS.write_escaped(Blob, /*hex=*/true) << "'";
968            } else {
969              bool BlobIsPrintable = true;
970              for (char C : Blob)
971                if (!isPrint(static_cast<unsigned char>(C))) {
972                  BlobIsPrintable = false;
973                  break;
974                }
975  
976              if (BlobIsPrintable)
977                O->OS << "'" << Blob << "'";
978              else
979                O->OS << "unprintable, " << Blob.size() << " bytes.";
980            }
981          }
982        }
983  
984        O->OS << "\n";
985      }
986  
987      // Make sure that we can skip the current record.
988      if (Error Err = Stream.JumpToBit(CurrentRecordPos))
989        return Err;
990      if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
991        ; // Do nothing.
992      else
993        return Skipped.takeError();
994    }
995  }
996  
997