xref: /freebsd/contrib/llvm-project/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp (revision 23f24377b1a9ab6677f00f2302484d6658d94cab)
1  //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/Bitcode/BitcodeAnalyzer.h"
10  #include "llvm/Bitcode/BitcodeReader.h"
11  #include "llvm/Bitcode/LLVMBitCodes.h"
12  #include "llvm/Bitstream/BitCodes.h"
13  #include "llvm/Bitstream/BitstreamReader.h"
14  #include "llvm/Support/Format.h"
15  #include "llvm/Support/SHA1.h"
16  
17  using namespace llvm;
18  
19  static Error reportError(StringRef Message) {
20    return createStringError(std::errc::illegal_byte_sequence, Message.data());
21  }
22  
23  /// Return a symbolic block name if known, otherwise return null.
24  static Optional<const char *> GetBlockName(unsigned BlockID,
25                                             const BitstreamBlockInfo &BlockInfo,
26                                             CurStreamTypeType CurStreamType) {
27    // Standard blocks for all bitcode files.
28    if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
29      if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
30        return "BLOCKINFO_BLOCK";
31      return None;
32    }
33  
34    // Check to see if we have a blockinfo record for this block, with a name.
35    if (const BitstreamBlockInfo::BlockInfo *Info =
36            BlockInfo.getBlockInfo(BlockID)) {
37      if (!Info->Name.empty())
38        return Info->Name.c_str();
39    }
40  
41    if (CurStreamType != LLVMIRBitstream)
42      return None;
43  
44    switch (BlockID) {
45    default:
46      return None;
47    case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
48      return "OPERAND_BUNDLE_TAGS_BLOCK";
49    case bitc::MODULE_BLOCK_ID:
50      return "MODULE_BLOCK";
51    case bitc::PARAMATTR_BLOCK_ID:
52      return "PARAMATTR_BLOCK";
53    case bitc::PARAMATTR_GROUP_BLOCK_ID:
54      return "PARAMATTR_GROUP_BLOCK_ID";
55    case bitc::TYPE_BLOCK_ID_NEW:
56      return "TYPE_BLOCK_ID";
57    case bitc::CONSTANTS_BLOCK_ID:
58      return "CONSTANTS_BLOCK";
59    case bitc::FUNCTION_BLOCK_ID:
60      return "FUNCTION_BLOCK";
61    case bitc::IDENTIFICATION_BLOCK_ID:
62      return "IDENTIFICATION_BLOCK_ID";
63    case bitc::VALUE_SYMTAB_BLOCK_ID:
64      return "VALUE_SYMTAB";
65    case bitc::METADATA_BLOCK_ID:
66      return "METADATA_BLOCK";
67    case bitc::METADATA_KIND_BLOCK_ID:
68      return "METADATA_KIND_BLOCK";
69    case bitc::METADATA_ATTACHMENT_ID:
70      return "METADATA_ATTACHMENT_BLOCK";
71    case bitc::USELIST_BLOCK_ID:
72      return "USELIST_BLOCK_ID";
73    case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
74      return "GLOBALVAL_SUMMARY_BLOCK";
75    case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
76      return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK";
77    case bitc::MODULE_STRTAB_BLOCK_ID:
78      return "MODULE_STRTAB_BLOCK";
79    case bitc::STRTAB_BLOCK_ID:
80      return "STRTAB_BLOCK";
81    case bitc::SYMTAB_BLOCK_ID:
82      return "SYMTAB_BLOCK";
83    }
84  }
85  
86  /// Return a symbolic code name if known, otherwise return null.
87  static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
88                                            const BitstreamBlockInfo &BlockInfo,
89                                            CurStreamTypeType CurStreamType) {
90    // Standard blocks for all bitcode files.
91    if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
92      if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
93        switch (CodeID) {
94        default:
95          return None;
96        case bitc::BLOCKINFO_CODE_SETBID:
97          return "SETBID";
98        case bitc::BLOCKINFO_CODE_BLOCKNAME:
99          return "BLOCKNAME";
100        case bitc::BLOCKINFO_CODE_SETRECORDNAME:
101          return "SETRECORDNAME";
102        }
103      }
104      return None;
105    }
106  
107    // Check to see if we have a blockinfo record for this record, with a name.
108    if (const BitstreamBlockInfo::BlockInfo *Info =
109            BlockInfo.getBlockInfo(BlockID)) {
110      for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
111        if (Info->RecordNames[i].first == CodeID)
112          return Info->RecordNames[i].second.c_str();
113    }
114  
115    if (CurStreamType != LLVMIRBitstream)
116      return None;
117  
118  #define STRINGIFY_CODE(PREFIX, CODE)                                           \
119    case bitc::PREFIX##_##CODE:                                                  \
120      return #CODE;
121    switch (BlockID) {
122    default:
123      return None;
124    case bitc::MODULE_BLOCK_ID:
125      switch (CodeID) {
126      default:
127        return None;
128        STRINGIFY_CODE(MODULE_CODE, VERSION)
129        STRINGIFY_CODE(MODULE_CODE, TRIPLE)
130        STRINGIFY_CODE(MODULE_CODE, DATALAYOUT)
131        STRINGIFY_CODE(MODULE_CODE, ASM)
132        STRINGIFY_CODE(MODULE_CODE, SECTIONNAME)
133        STRINGIFY_CODE(MODULE_CODE, DEPLIB) // Deprecated, present in old bitcode
134        STRINGIFY_CODE(MODULE_CODE, GLOBALVAR)
135        STRINGIFY_CODE(MODULE_CODE, FUNCTION)
136        STRINGIFY_CODE(MODULE_CODE, ALIAS)
137        STRINGIFY_CODE(MODULE_CODE, GCNAME)
138        STRINGIFY_CODE(MODULE_CODE, COMDAT)
139        STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
140        STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
141        STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
142        STRINGIFY_CODE(MODULE_CODE, HASH)
143      }
144    case bitc::IDENTIFICATION_BLOCK_ID:
145      switch (CodeID) {
146      default:
147        return None;
148        STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
149        STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
150      }
151    case bitc::PARAMATTR_BLOCK_ID:
152      switch (CodeID) {
153      default:
154        return None;
155      // FIXME: Should these be different?
156      case bitc::PARAMATTR_CODE_ENTRY_OLD:
157        return "ENTRY";
158      case bitc::PARAMATTR_CODE_ENTRY:
159        return "ENTRY";
160      }
161    case bitc::PARAMATTR_GROUP_BLOCK_ID:
162      switch (CodeID) {
163      default:
164        return None;
165      case bitc::PARAMATTR_GRP_CODE_ENTRY:
166        return "ENTRY";
167      }
168    case bitc::TYPE_BLOCK_ID_NEW:
169      switch (CodeID) {
170      default:
171        return None;
172        STRINGIFY_CODE(TYPE_CODE, NUMENTRY)
173        STRINGIFY_CODE(TYPE_CODE, VOID)
174        STRINGIFY_CODE(TYPE_CODE, FLOAT)
175        STRINGIFY_CODE(TYPE_CODE, DOUBLE)
176        STRINGIFY_CODE(TYPE_CODE, LABEL)
177        STRINGIFY_CODE(TYPE_CODE, OPAQUE)
178        STRINGIFY_CODE(TYPE_CODE, INTEGER)
179        STRINGIFY_CODE(TYPE_CODE, POINTER)
180        STRINGIFY_CODE(TYPE_CODE, HALF)
181        STRINGIFY_CODE(TYPE_CODE, ARRAY)
182        STRINGIFY_CODE(TYPE_CODE, VECTOR)
183        STRINGIFY_CODE(TYPE_CODE, X86_FP80)
184        STRINGIFY_CODE(TYPE_CODE, FP128)
185        STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
186        STRINGIFY_CODE(TYPE_CODE, METADATA)
187        STRINGIFY_CODE(TYPE_CODE, X86_MMX)
188        STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
189        STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
190        STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
191        STRINGIFY_CODE(TYPE_CODE, FUNCTION)
192        STRINGIFY_CODE(TYPE_CODE, TOKEN)
193        STRINGIFY_CODE(TYPE_CODE, BFLOAT)
194      }
195  
196    case bitc::CONSTANTS_BLOCK_ID:
197      switch (CodeID) {
198      default:
199        return None;
200        STRINGIFY_CODE(CST_CODE, SETTYPE)
201        STRINGIFY_CODE(CST_CODE, NULL)
202        STRINGIFY_CODE(CST_CODE, UNDEF)
203        STRINGIFY_CODE(CST_CODE, INTEGER)
204        STRINGIFY_CODE(CST_CODE, WIDE_INTEGER)
205        STRINGIFY_CODE(CST_CODE, FLOAT)
206        STRINGIFY_CODE(CST_CODE, AGGREGATE)
207        STRINGIFY_CODE(CST_CODE, STRING)
208        STRINGIFY_CODE(CST_CODE, CSTRING)
209        STRINGIFY_CODE(CST_CODE, CE_BINOP)
210        STRINGIFY_CODE(CST_CODE, CE_CAST)
211        STRINGIFY_CODE(CST_CODE, CE_GEP)
212        STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP)
213        STRINGIFY_CODE(CST_CODE, CE_SELECT)
214        STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT)
215        STRINGIFY_CODE(CST_CODE, CE_INSERTELT)
216        STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC)
217        STRINGIFY_CODE(CST_CODE, CE_CMP)
218        STRINGIFY_CODE(CST_CODE, INLINEASM)
219        STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX)
220        STRINGIFY_CODE(CST_CODE, CE_UNOP)
221      case bitc::CST_CODE_BLOCKADDRESS:
222        return "CST_CODE_BLOCKADDRESS";
223        STRINGIFY_CODE(CST_CODE, DATA)
224      }
225    case bitc::FUNCTION_BLOCK_ID:
226      switch (CodeID) {
227      default:
228        return None;
229        STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS)
230        STRINGIFY_CODE(FUNC_CODE, INST_BINOP)
231        STRINGIFY_CODE(FUNC_CODE, INST_CAST)
232        STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD)
233        STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD)
234        STRINGIFY_CODE(FUNC_CODE, INST_SELECT)
235        STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT)
236        STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT)
237        STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC)
238        STRINGIFY_CODE(FUNC_CODE, INST_CMP)
239        STRINGIFY_CODE(FUNC_CODE, INST_RET)
240        STRINGIFY_CODE(FUNC_CODE, INST_BR)
241        STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
242        STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
243        STRINGIFY_CODE(FUNC_CODE, INST_UNOP)
244        STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
245        STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
246        STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
247        STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
248        STRINGIFY_CODE(FUNC_CODE, INST_PHI)
249        STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
250        STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
251        STRINGIFY_CODE(FUNC_CODE, INST_VAARG)
252        STRINGIFY_CODE(FUNC_CODE, INST_STORE)
253        STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL)
254        STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL)
255        STRINGIFY_CODE(FUNC_CODE, INST_CMP2)
256        STRINGIFY_CODE(FUNC_CODE, INST_VSELECT)
257        STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN)
258        STRINGIFY_CODE(FUNC_CODE, INST_CALL)
259        STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC)
260        STRINGIFY_CODE(FUNC_CODE, INST_GEP)
261        STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE)
262        STRINGIFY_CODE(FUNC_CODE, INST_FENCE)
263        STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW)
264        STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC)
265        STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC)
266        STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG)
267        STRINGIFY_CODE(FUNC_CODE, INST_CALLBR)
268      }
269    case bitc::VALUE_SYMTAB_BLOCK_ID:
270      switch (CodeID) {
271      default:
272        return None;
273        STRINGIFY_CODE(VST_CODE, ENTRY)
274        STRINGIFY_CODE(VST_CODE, BBENTRY)
275        STRINGIFY_CODE(VST_CODE, FNENTRY)
276        STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY)
277      }
278    case bitc::MODULE_STRTAB_BLOCK_ID:
279      switch (CodeID) {
280      default:
281        return None;
282        STRINGIFY_CODE(MST_CODE, ENTRY)
283        STRINGIFY_CODE(MST_CODE, HASH)
284      }
285    case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
286    case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID:
287      switch (CodeID) {
288      default:
289        return None;
290        STRINGIFY_CODE(FS, PERMODULE)
291        STRINGIFY_CODE(FS, PERMODULE_PROFILE)
292        STRINGIFY_CODE(FS, PERMODULE_RELBF)
293        STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS)
294        STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)
295        STRINGIFY_CODE(FS, COMBINED)
296        STRINGIFY_CODE(FS, COMBINED_PROFILE)
297        STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS)
298        STRINGIFY_CODE(FS, ALIAS)
299        STRINGIFY_CODE(FS, COMBINED_ALIAS)
300        STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
301        STRINGIFY_CODE(FS, VERSION)
302        STRINGIFY_CODE(FS, FLAGS)
303        STRINGIFY_CODE(FS, TYPE_TESTS)
304        STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS)
305        STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS)
306        STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL)
307        STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL)
308        STRINGIFY_CODE(FS, VALUE_GUID)
309        STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS)
310        STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS)
311        STRINGIFY_CODE(FS, TYPE_ID)
312        STRINGIFY_CODE(FS, TYPE_ID_METADATA)
313        STRINGIFY_CODE(FS, BLOCK_COUNT)
314        STRINGIFY_CODE(FS, PARAM_ACCESS)
315      }
316    case bitc::METADATA_ATTACHMENT_ID:
317      switch (CodeID) {
318      default:
319        return None;
320        STRINGIFY_CODE(METADATA, ATTACHMENT)
321      }
322    case bitc::METADATA_BLOCK_ID:
323      switch (CodeID) {
324      default:
325        return None;
326        STRINGIFY_CODE(METADATA, STRING_OLD)
327        STRINGIFY_CODE(METADATA, VALUE)
328        STRINGIFY_CODE(METADATA, NODE)
329        STRINGIFY_CODE(METADATA, NAME)
330        STRINGIFY_CODE(METADATA, DISTINCT_NODE)
331        STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
332        STRINGIFY_CODE(METADATA, LOCATION)
333        STRINGIFY_CODE(METADATA, OLD_NODE)
334        STRINGIFY_CODE(METADATA, OLD_FN_NODE)
335        STRINGIFY_CODE(METADATA, NAMED_NODE)
336        STRINGIFY_CODE(METADATA, GENERIC_DEBUG)
337        STRINGIFY_CODE(METADATA, SUBRANGE)
338        STRINGIFY_CODE(METADATA, ENUMERATOR)
339        STRINGIFY_CODE(METADATA, BASIC_TYPE)
340        STRINGIFY_CODE(METADATA, FILE)
341        STRINGIFY_CODE(METADATA, DERIVED_TYPE)
342        STRINGIFY_CODE(METADATA, COMPOSITE_TYPE)
343        STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE)
344        STRINGIFY_CODE(METADATA, COMPILE_UNIT)
345        STRINGIFY_CODE(METADATA, SUBPROGRAM)
346        STRINGIFY_CODE(METADATA, LEXICAL_BLOCK)
347        STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE)
348        STRINGIFY_CODE(METADATA, NAMESPACE)
349        STRINGIFY_CODE(METADATA, TEMPLATE_TYPE)
350        STRINGIFY_CODE(METADATA, TEMPLATE_VALUE)
351        STRINGIFY_CODE(METADATA, GLOBAL_VAR)
352        STRINGIFY_CODE(METADATA, LOCAL_VAR)
353        STRINGIFY_CODE(METADATA, EXPRESSION)
354        STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
355        STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
356        STRINGIFY_CODE(METADATA, MODULE)
357        STRINGIFY_CODE(METADATA, MACRO)
358        STRINGIFY_CODE(METADATA, MACRO_FILE)
359        STRINGIFY_CODE(METADATA, STRINGS)
360        STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT)
361        STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR)
362        STRINGIFY_CODE(METADATA, INDEX_OFFSET)
363        STRINGIFY_CODE(METADATA, INDEX)
364      }
365    case bitc::METADATA_KIND_BLOCK_ID:
366      switch (CodeID) {
367      default:
368        return None;
369        STRINGIFY_CODE(METADATA, KIND)
370      }
371    case bitc::USELIST_BLOCK_ID:
372      switch (CodeID) {
373      default:
374        return None;
375      case bitc::USELIST_CODE_DEFAULT:
376        return "USELIST_CODE_DEFAULT";
377      case bitc::USELIST_CODE_BB:
378        return "USELIST_CODE_BB";
379      }
380  
381    case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
382      switch (CodeID) {
383      default:
384        return None;
385      case bitc::OPERAND_BUNDLE_TAG:
386        return "OPERAND_BUNDLE_TAG";
387      }
388    case bitc::STRTAB_BLOCK_ID:
389      switch (CodeID) {
390      default:
391        return None;
392      case bitc::STRTAB_BLOB:
393        return "BLOB";
394      }
395    case bitc::SYMTAB_BLOCK_ID:
396      switch (CodeID) {
397      default:
398        return None;
399      case bitc::SYMTAB_BLOB:
400        return "BLOB";
401      }
402    }
403  #undef STRINGIFY_CODE
404  }
405  
406  static void printSize(raw_ostream &OS, double Bits) {
407    OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32));
408  }
409  static void printSize(raw_ostream &OS, uint64_t Bits) {
410    OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8,
411                 (unsigned long)(Bits / 32));
412  }
413  
414  static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) {
415    auto tryRead = [&Stream](char &Dest, size_t size) -> Error {
416      if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size))
417        Dest = MaybeWord.get();
418      else
419        return MaybeWord.takeError();
420      return Error::success();
421    };
422  
423    char Signature[6];
424    if (Error Err = tryRead(Signature[0], 8))
425      return std::move(Err);
426    if (Error Err = tryRead(Signature[1], 8))
427      return std::move(Err);
428  
429    // Autodetect the file contents, if it is one we know.
430    if (Signature[0] == 'C' && Signature[1] == 'P') {
431      if (Error Err = tryRead(Signature[2], 8))
432        return std::move(Err);
433      if (Error Err = tryRead(Signature[3], 8))
434        return std::move(Err);
435      if (Signature[2] == 'C' && Signature[3] == 'H')
436        return ClangSerializedASTBitstream;
437    } else if (Signature[0] == 'D' && Signature[1] == 'I') {
438      if (Error Err = tryRead(Signature[2], 8))
439        return std::move(Err);
440      if (Error Err = tryRead(Signature[3], 8))
441        return std::move(Err);
442      if (Signature[2] == 'A' && Signature[3] == 'G')
443        return ClangSerializedDiagnosticsBitstream;
444    } else if (Signature[0] == 'R' && Signature[1] == 'M') {
445      if (Error Err = tryRead(Signature[2], 8))
446        return std::move(Err);
447      if (Error Err = tryRead(Signature[3], 8))
448        return std::move(Err);
449      if (Signature[2] == 'R' && Signature[3] == 'K')
450        return LLVMBitstreamRemarks;
451    } else {
452      if (Error Err = tryRead(Signature[2], 4))
453        return std::move(Err);
454      if (Error Err = tryRead(Signature[3], 4))
455        return std::move(Err);
456      if (Error Err = tryRead(Signature[4], 4))
457        return std::move(Err);
458      if (Error Err = tryRead(Signature[5], 4))
459        return std::move(Err);
460      if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 &&
461          Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD)
462        return LLVMIRBitstream;
463    }
464    return UnknownBitstream;
465  }
466  
467  static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O,
468                                                   BitstreamCursor &Stream) {
469    ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes();
470    const unsigned char *BufPtr = (const unsigned char *)Bytes.data();
471    const unsigned char *EndBufPtr = BufPtr + Bytes.size();
472  
473    // If we have a wrapper header, parse it and ignore the non-bc file
474    // contents. The magic number is 0x0B17C0DE stored in little endian.
475    if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
476      if (Bytes.size() < BWH_HeaderSize)
477        return reportError("Invalid bitcode wrapper header");
478  
479      if (O) {
480        unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
481        unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]);
482        unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
483        unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
484        unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]);
485  
486        O->OS << "<BITCODE_WRAPPER_HEADER"
487              << " Magic=" << format_hex(Magic, 10)
488              << " Version=" << format_hex(Version, 10)
489              << " Offset=" << format_hex(Offset, 10)
490              << " Size=" << format_hex(Size, 10)
491              << " CPUType=" << format_hex(CPUType, 10) << "/>\n";
492      }
493  
494      if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
495        return reportError("Invalid bitcode wrapper header");
496    }
497  
498    // Use the cursor modified by skipping the wrapper header.
499    Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
500  
501    return ReadSignature(Stream);
502  }
503  
504  static bool canDecodeBlob(unsigned Code, unsigned BlockID) {
505    return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS;
506  }
507  
508  Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
509                                                   ArrayRef<uint64_t> Record,
510                                                   StringRef Blob,
511                                                   raw_ostream &OS) {
512    if (Blob.empty())
513      return reportError("Cannot decode empty blob.");
514  
515    if (Record.size() != 2)
516      return reportError(
517          "Decoding metadata strings blob needs two record entries.");
518  
519    unsigned NumStrings = Record[0];
520    unsigned StringsOffset = Record[1];
521    OS << " num-strings = " << NumStrings << " {\n";
522  
523    StringRef Lengths = Blob.slice(0, StringsOffset);
524    SimpleBitstreamCursor R(Lengths);
525    StringRef Strings = Blob.drop_front(StringsOffset);
526    do {
527      if (R.AtEndOfStream())
528        return reportError("bad length");
529  
530      Expected<uint32_t> MaybeSize = R.ReadVBR(6);
531      if (!MaybeSize)
532        return MaybeSize.takeError();
533      uint32_t Size = MaybeSize.get();
534      if (Strings.size() < Size)
535        return reportError("truncated chars");
536  
537      OS << Indent << "    '";
538      OS.write_escaped(Strings.slice(0, Size), /*hex=*/true);
539      OS << "'\n";
540      Strings = Strings.drop_front(Size);
541    } while (--NumStrings);
542  
543    OS << Indent << "  }";
544    return Error::success();
545  }
546  
547  BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
548                                   Optional<StringRef> BlockInfoBuffer)
549      : Stream(Buffer) {
550    if (BlockInfoBuffer)
551      BlockInfoStream.emplace(*BlockInfoBuffer);
552  }
553  
554  Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
555                                 Optional<StringRef> CheckHash) {
556    Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
557    if (!MaybeType)
558      return MaybeType.takeError();
559    else
560      CurStreamType = *MaybeType;
561  
562    Stream.setBlockInfo(&BlockInfo);
563  
564    // Read block info from BlockInfoStream, if specified.
565    // The block info must be a top-level block.
566    if (BlockInfoStream) {
567      BitstreamCursor BlockInfoCursor(*BlockInfoStream);
568      Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
569      if (!H)
570        return H.takeError();
571  
572      while (!BlockInfoCursor.AtEndOfStream()) {
573        Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
574        if (!MaybeCode)
575          return MaybeCode.takeError();
576        if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
577          return reportError("Invalid record at top-level in block info file");
578  
579        Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID();
580        if (!MaybeBlockID)
581          return MaybeBlockID.takeError();
582        if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
583          Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
584              BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
585          if (!MaybeNewBlockInfo)
586            return MaybeNewBlockInfo.takeError();
587          Optional<BitstreamBlockInfo> NewBlockInfo =
588              std::move(MaybeNewBlockInfo.get());
589          if (!NewBlockInfo)
590            return reportError("Malformed BlockInfoBlock in block info file");
591          BlockInfo = std::move(*NewBlockInfo);
592          break;
593        }
594  
595        if (Error Err = BlockInfoCursor.SkipBlock())
596          return Err;
597      }
598    }
599  
600    // Parse the top-level structure.  We only allow blocks at the top-level.
601    while (!Stream.AtEndOfStream()) {
602      Expected<unsigned> MaybeCode = Stream.ReadCode();
603      if (!MaybeCode)
604        return MaybeCode.takeError();
605      if (MaybeCode.get() != bitc::ENTER_SUBBLOCK)
606        return reportError("Invalid record at top-level");
607  
608      Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID();
609      if (!MaybeBlockID)
610        return MaybeBlockID.takeError();
611  
612      if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash))
613        return E;
614      ++NumTopBlocks;
615    }
616  
617    return Error::success();
618  }
619  
620  void BitcodeAnalyzer::printStats(BCDumpOptions O,
621                                   Optional<StringRef> Filename) {
622    uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
623    // Print a summary of the read file.
624    O.OS << "Summary ";
625    if (Filename)
626      O.OS << "of " << Filename->data() << ":\n";
627    O.OS << "         Total size: ";
628    printSize(O.OS, BufferSizeBits);
629    O.OS << "\n";
630    O.OS << "        Stream type: ";
631    switch (CurStreamType) {
632    case UnknownBitstream:
633      O.OS << "unknown\n";
634      break;
635    case LLVMIRBitstream:
636      O.OS << "LLVM IR\n";
637      break;
638    case ClangSerializedASTBitstream:
639      O.OS << "Clang Serialized AST\n";
640      break;
641    case ClangSerializedDiagnosticsBitstream:
642      O.OS << "Clang Serialized Diagnostics\n";
643      break;
644    case LLVMBitstreamRemarks:
645      O.OS << "LLVM Remarks\n";
646      break;
647    }
648    O.OS << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
649    O.OS << "\n";
650  
651    // Emit per-block stats.
652    O.OS << "Per-block Summary:\n";
653    for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
654                                                       E = BlockIDStats.end();
655         I != E; ++I) {
656      O.OS << "  Block ID #" << I->first;
657      if (Optional<const char *> BlockName =
658              GetBlockName(I->first, BlockInfo, CurStreamType))
659        O.OS << " (" << *BlockName << ")";
660      O.OS << ":\n";
661  
662      const PerBlockIDStats &Stats = I->second;
663      O.OS << "      Num Instances: " << Stats.NumInstances << "\n";
664      O.OS << "         Total Size: ";
665      printSize(O.OS, Stats.NumBits);
666      O.OS << "\n";
667      double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
668      O.OS << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
669      if (Stats.NumInstances > 1) {
670        O.OS << "       Average Size: ";
671        printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances);
672        O.OS << "\n";
673        O.OS << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
674             << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n";
675        O.OS << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
676             << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n";
677        O.OS << "    Tot/Avg Records: " << Stats.NumRecords << "/"
678             << Stats.NumRecords / (double)Stats.NumInstances << "\n";
679      } else {
680        O.OS << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
681        O.OS << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
682        O.OS << "        Num Records: " << Stats.NumRecords << "\n";
683      }
684      if (Stats.NumRecords) {
685        double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
686        O.OS << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
687      }
688      O.OS << "\n";
689  
690      // Print a histogram of the codes we see.
691      if (O.Histogram && !Stats.CodeFreq.empty()) {
692        std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code>
693        for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
694          if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
695            FreqPairs.push_back(std::make_pair(Freq, i));
696        llvm::stable_sort(FreqPairs);
697        std::reverse(FreqPairs.begin(), FreqPairs.end());
698  
699        O.OS << "\tRecord Histogram:\n";
700        O.OS << "\t\t  Count    # Bits     b/Rec   % Abv  Record Kind\n";
701        for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
702          const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
703  
704          O.OS << format("\t\t%7d %9lu", RecStats.NumInstances,
705                         (unsigned long)RecStats.TotalBits);
706  
707          if (RecStats.NumInstances > 1)
708            O.OS << format(" %9.1f",
709                           (double)RecStats.TotalBits / RecStats.NumInstances);
710          else
711            O.OS << "          ";
712  
713          if (RecStats.NumAbbrev)
714            O.OS << format(" %7.2f", (double)RecStats.NumAbbrev /
715                                         RecStats.NumInstances * 100);
716          else
717            O.OS << "        ";
718  
719          O.OS << "  ";
720          if (Optional<const char *> CodeName = GetCodeName(
721                  FreqPairs[i].second, I->first, BlockInfo, CurStreamType))
722            O.OS << *CodeName << "\n";
723          else
724            O.OS << "UnknownCode" << FreqPairs[i].second << "\n";
725        }
726        O.OS << "\n";
727      }
728    }
729  }
730  
731  Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
732                                    Optional<BCDumpOptions> O,
733                                    Optional<StringRef> CheckHash) {
734    std::string Indent(IndentLevel * 2, ' ');
735    uint64_t BlockBitStart = Stream.GetCurrentBitNo();
736  
737    // Get the statistics for this BlockID.
738    PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
739  
740    BlockStats.NumInstances++;
741  
742    // BLOCKINFO is a special part of the stream.
743    bool DumpRecords = O.hasValue();
744    if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
745      if (O)
746        O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
747      Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
748          Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
749      if (!MaybeNewBlockInfo)
750        return MaybeNewBlockInfo.takeError();
751      Optional<BitstreamBlockInfo> NewBlockInfo =
752          std::move(MaybeNewBlockInfo.get());
753      if (!NewBlockInfo)
754        return reportError("Malformed BlockInfoBlock");
755      BlockInfo = std::move(*NewBlockInfo);
756      if (Error Err = Stream.JumpToBit(BlockBitStart))
757        return Err;
758      // It's not really interesting to dump the contents of the blockinfo
759      // block.
760      DumpRecords = false;
761    }
762  
763    unsigned NumWords = 0;
764    if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords))
765      return Err;
766  
767    // Keep it for later, when we see a MODULE_HASH record
768    uint64_t BlockEntryPos = Stream.getCurrentByteNo();
769  
770    Optional<const char *> BlockName = None;
771    if (DumpRecords) {
772      O->OS << Indent << "<";
773      if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
774        O->OS << *BlockName;
775      else
776        O->OS << "UnknownBlock" << BlockID;
777  
778      if (!O->Symbolic && BlockName)
779        O->OS << " BlockID=" << BlockID;
780  
781      O->OS << " NumWords=" << NumWords
782            << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
783    }
784  
785    SmallVector<uint64_t, 64> Record;
786  
787    // Keep the offset to the metadata index if seen.
788    uint64_t MetadataIndexOffset = 0;
789  
790    // Read all the records for this block.
791    while (1) {
792      if (Stream.AtEndOfStream())
793        return reportError("Premature end of bitstream");
794  
795      uint64_t RecordStartBit = Stream.GetCurrentBitNo();
796  
797      Expected<BitstreamEntry> MaybeEntry =
798          Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
799      if (!MaybeEntry)
800        return MaybeEntry.takeError();
801      BitstreamEntry Entry = MaybeEntry.get();
802  
803      switch (Entry.Kind) {
804      case BitstreamEntry::Error:
805        return reportError("malformed bitcode file");
806      case BitstreamEntry::EndBlock: {
807        uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
808        BlockStats.NumBits += BlockBitEnd - BlockBitStart;
809        if (DumpRecords) {
810          O->OS << Indent << "</";
811          if (BlockName)
812            O->OS << *BlockName << ">\n";
813          else
814            O->OS << "UnknownBlock" << BlockID << ">\n";
815        }
816        return Error::success();
817      }
818  
819      case BitstreamEntry::SubBlock: {
820        uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
821        if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash))
822          return E;
823        ++BlockStats.NumSubBlocks;
824        uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
825  
826        // Don't include subblock sizes in the size of this block.
827        BlockBitStart += SubBlockBitEnd - SubBlockBitStart;
828        continue;
829      }
830      case BitstreamEntry::Record:
831        // The interesting case.
832        break;
833      }
834  
835      if (Entry.ID == bitc::DEFINE_ABBREV) {
836        if (Error Err = Stream.ReadAbbrevRecord())
837          return Err;
838        ++BlockStats.NumAbbrevs;
839        continue;
840      }
841  
842      Record.clear();
843  
844      ++BlockStats.NumRecords;
845  
846      StringRef Blob;
847      uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
848      Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
849      if (!MaybeCode)
850        return MaybeCode.takeError();
851      unsigned Code = MaybeCode.get();
852  
853      // Increment the # occurrences of this code.
854      if (BlockStats.CodeFreq.size() <= Code)
855        BlockStats.CodeFreq.resize(Code + 1);
856      BlockStats.CodeFreq[Code].NumInstances++;
857      BlockStats.CodeFreq[Code].TotalBits +=
858          Stream.GetCurrentBitNo() - RecordStartBit;
859      if (Entry.ID != bitc::UNABBREV_RECORD) {
860        BlockStats.CodeFreq[Code].NumAbbrev++;
861        ++BlockStats.NumAbbreviatedRecords;
862      }
863  
864      if (DumpRecords) {
865        O->OS << Indent << "  <";
866        Optional<const char *> CodeName =
867            GetCodeName(Code, BlockID, BlockInfo, CurStreamType);
868        if (CodeName)
869          O->OS << *CodeName;
870        else
871          O->OS << "UnknownCode" << Code;
872        if (!O->Symbolic && CodeName)
873          O->OS << " codeid=" << Code;
874        const BitCodeAbbrev *Abbv = nullptr;
875        if (Entry.ID != bitc::UNABBREV_RECORD) {
876          Abbv = Stream.getAbbrev(Entry.ID);
877          O->OS << " abbrevid=" << Entry.ID;
878        }
879  
880        for (unsigned i = 0, e = Record.size(); i != e; ++i)
881          O->OS << " op" << i << "=" << (int64_t)Record[i];
882  
883        // If we found a metadata index, let's verify that we had an offset
884        // before and validate its forward reference offset was correct!
885        if (BlockID == bitc::METADATA_BLOCK_ID) {
886          if (Code == bitc::METADATA_INDEX_OFFSET) {
887            if (Record.size() != 2)
888              O->OS << "(Invalid record)";
889            else {
890              auto Offset = Record[0] + (Record[1] << 32);
891              MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
892            }
893          }
894          if (Code == bitc::METADATA_INDEX) {
895            O->OS << " (offset ";
896            if (MetadataIndexOffset == RecordStartBit)
897              O->OS << "match)";
898            else
899              O->OS << "mismatch: " << MetadataIndexOffset << " vs "
900                    << RecordStartBit << ")";
901          }
902        }
903  
904        // If we found a module hash, let's verify that it matches!
905        if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH &&
906            CheckHash.hasValue()) {
907          if (Record.size() != 5)
908            O->OS << " (invalid)";
909          else {
910            // Recompute the hash and compare it to the one in the bitcode
911            SHA1 Hasher;
912            StringRef Hash;
913            Hasher.update(*CheckHash);
914            {
915              int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
916              auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
917              Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
918              Hash = Hasher.result();
919            }
920            std::array<char, 20> RecordedHash;
921            int Pos = 0;
922            for (auto &Val : Record) {
923              assert(!(Val >> 32) && "Unexpected high bits set");
924              support::endian::write32be(&RecordedHash[Pos], Val);
925              Pos += 4;
926            }
927            if (Hash == StringRef(RecordedHash.data(), RecordedHash.size()))
928              O->OS << " (match)";
929            else
930              O->OS << " (!mismatch!)";
931          }
932        }
933  
934        O->OS << "/>";
935  
936        if (Abbv) {
937          for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
938            const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
939            if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
940              continue;
941            assert(i + 2 == e && "Array op not second to last");
942            std::string Str;
943            bool ArrayIsPrintable = true;
944            for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
945              if (!isPrint(static_cast<unsigned char>(Record[j]))) {
946                ArrayIsPrintable = false;
947                break;
948              }
949              Str += (char)Record[j];
950            }
951            if (ArrayIsPrintable)
952              O->OS << " record string = '" << Str << "'";
953            break;
954          }
955        }
956  
957        if (Blob.data()) {
958          if (canDecodeBlob(Code, BlockID)) {
959            if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS))
960              return E;
961          } else {
962            O->OS << " blob data = ";
963            if (O->ShowBinaryBlobs) {
964              O->OS << "'";
965              O->OS.write_escaped(Blob, /*hex=*/true) << "'";
966            } else {
967              bool BlobIsPrintable = true;
968              for (unsigned i = 0, e = Blob.size(); i != e; ++i)
969                if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
970                  BlobIsPrintable = false;
971                  break;
972                }
973  
974              if (BlobIsPrintable)
975                O->OS << "'" << Blob << "'";
976              else
977                O->OS << "unprintable, " << Blob.size() << " bytes.";
978            }
979          }
980        }
981  
982        O->OS << "\n";
983      }
984  
985      // Make sure that we can skip the current record.
986      if (Error Err = Stream.JumpToBit(CurrentRecordPos))
987        return Err;
988      if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
989        ; // Do nothing.
990      else
991        return Skipped.takeError();
992    }
993  }
994  
995