xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Bitcode/BitcodeReader.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines interfaces to read LLVM bitcode files/streams.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_BITCODE_BITCODEREADER_H
14 #define LLVM_BITCODE_BITCODEREADER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Bitstream/BitCodeEnums.h"
19 #include "llvm/IR/GlobalValue.h"
20 #include "llvm/Support/Compiler.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/MemoryBufferRef.h"
25 #include <cstdint>
26 #include <memory>
27 #include <optional>
28 #include <string>
29 #include <system_error>
30 #include <vector>
31 namespace llvm {
32 
33 class LLVMContext;
34 class Module;
35 class MemoryBuffer;
36 class Metadata;
37 class ModuleSummaryIndex;
38 class Type;
39 class Value;
40 
41 // Callback to override the data layout string of an imported bitcode module.
42 // The first argument is the target triple, the second argument the data layout
43 // string from the input, or a default string. It will be used if the callback
44 // returns std::nullopt.
45 typedef std::function<std::optional<std::string>(StringRef, StringRef)>
46     DataLayoutCallbackFuncTy;
47 
48 typedef std::function<Type *(unsigned)> GetTypeByIDTy;
49 
50 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
51 
52 typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
53                            GetContainedTypeIDTy)>
54     ValueTypeCallbackTy;
55 
56 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
57                            GetContainedTypeIDTy)>
58     MDTypeCallbackTy;
59 
60 // These functions are for converting Expected/Error values to
61 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
62 // Remove these functions once no longer needed by the C and libLTO APIs.
63 
64 LLVM_ABI std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
65                                                        Error Err);
66 
67 template <typename T>
expectedToErrorOrAndEmitErrors(LLVMContext & Ctx,Expected<T> Val)68 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
69   if (!Val)
70     return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
71   return std::move(*Val);
72 }
73 
74 struct ParserCallbacks {
75   std::optional<DataLayoutCallbackFuncTy> DataLayout;
76   /// The ValueType callback is called for every function definition or
77   /// declaration and allows accessing the type information, also behind
78   /// pointers. This can be useful, when the opaque pointer upgrade cleans all
79   /// type information behind pointers.
80   /// The second argument to ValueTypeCallback is the type ID of the
81   /// function, the two passed functions can be used to extract type
82   /// information.
83   std::optional<ValueTypeCallbackTy> ValueType;
84   /// The MDType callback is called for every value in metadata.
85   std::optional<MDTypeCallbackTy> MDType;
86 
87   ParserCallbacks() = default;
ParserCallbacksParserCallbacks88   explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
89       : DataLayout(DataLayout) {}
90 };
91 
92   struct BitcodeFileContents;
93 
94   /// Basic information extracted from a bitcode module to be used for LTO.
95   struct BitcodeLTOInfo {
96     bool IsThinLTO;
97     bool HasSummary;
98     bool EnableSplitLTOUnit;
99     bool UnifiedLTO;
100   };
101 
102   /// Represents a module in a bitcode file.
103   class BitcodeModule {
104     // This covers the identification (if present) and module blocks.
105     ArrayRef<uint8_t> Buffer;
106     StringRef ModuleIdentifier;
107 
108     // The string table used to interpret this module.
109     StringRef Strtab;
110 
111     // The bitstream location of the IDENTIFICATION_BLOCK.
112     uint64_t IdentificationBit;
113 
114     // The bitstream location of this module's MODULE_BLOCK.
115     uint64_t ModuleBit;
116 
BitcodeModule(ArrayRef<uint8_t> Buffer,StringRef ModuleIdentifier,uint64_t IdentificationBit,uint64_t ModuleBit)117     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
118                   uint64_t IdentificationBit, uint64_t ModuleBit)
119         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
120           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
121 
122     // Calls the ctor.
123     LLVM_ABI friend Expected<BitcodeFileContents>
124     getBitcodeFileContents(MemoryBufferRef Buffer);
125 
126     Expected<std::unique_ptr<Module>>
127     getModuleImpl(LLVMContext &Context, bool MaterializeAll,
128                   bool ShouldLazyLoadMetadata, bool IsImporting,
129                   ParserCallbacks Callbacks = {});
130 
131   public:
getBuffer()132     StringRef getBuffer() const {
133       return StringRef((const char *)Buffer.begin(), Buffer.size());
134     }
135 
getStrtab()136     StringRef getStrtab() const { return Strtab; }
137 
getModuleIdentifier()138     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
139 
140     /// Read the bitcode module and prepare for lazy deserialization of function
141     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
142     /// If IsImporting is true, this module is being parsed for ThinLTO
143     /// importing into another module.
144     LLVM_ABI Expected<std::unique_ptr<Module>>
145     getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
146                   bool IsImporting, ParserCallbacks Callbacks = {});
147 
148     /// Read the entire bitcode module and return it.
149     LLVM_ABI Expected<std::unique_ptr<Module>>
150     parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
151 
152     /// Returns information about the module to be used for LTO: whether to
153     /// compile with ThinLTO, and whether it has a summary.
154     LLVM_ABI Expected<BitcodeLTOInfo> getLTOInfo();
155 
156     /// Parse the specified bitcode buffer, returning the module summary index.
157     LLVM_ABI Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
158 
159     /// Parse the specified bitcode buffer and merge its module summary index
160     /// into CombinedIndex.
161     LLVM_ABI Error
162     readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
163                 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
164   };
165 
166   struct BitcodeFileContents {
167     std::vector<BitcodeModule> Mods;
168     StringRef Symtab, StrtabForSymtab;
169   };
170 
171   /// Returns the contents of a bitcode file. This includes the raw contents of
172   /// the symbol table embedded in the bitcode file. Clients which require a
173   /// symbol table should prefer to use irsymtab::read instead of this function
174   /// because it creates a reader for the irsymtab and handles upgrading bitcode
175   /// files without a symbol table or with an old symbol table.
176   LLVM_ABI Expected<BitcodeFileContents>
177   getBitcodeFileContents(MemoryBufferRef Buffer);
178 
179   /// Returns a list of modules in the specified bitcode buffer.
180   LLVM_ABI Expected<std::vector<BitcodeModule>>
181   getBitcodeModuleList(MemoryBufferRef Buffer);
182 
183   /// Read the header of the specified bitcode buffer and prepare for lazy
184   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
185   /// lazily load metadata as well. If IsImporting is true, this module is
186   /// being parsed for ThinLTO importing into another module.
187   LLVM_ABI Expected<std::unique_ptr<Module>>
188   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
189                        bool ShouldLazyLoadMetadata = false,
190                        bool IsImporting = false,
191                        ParserCallbacks Callbacks = {});
192 
193   /// Like getLazyBitcodeModule, except that the module takes ownership of
194   /// the memory buffer if successful. If successful, this moves Buffer. On
195   /// error, this *does not* move Buffer. If IsImporting is true, this module is
196   /// being parsed for ThinLTO importing into another module.
197   LLVM_ABI Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
198       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
199       bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
200       ParserCallbacks Callbacks = {});
201 
202   /// Read the header of the specified bitcode buffer and extract just the
203   /// triple information. If successful, this returns a string. On error, this
204   /// returns "".
205   LLVM_ABI Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
206 
207   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
208   /// or class) in it.
209   LLVM_ABI Expected<bool>
210   isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
211 
212   /// Read the header of the specified bitcode buffer and extract just the
213   /// producer string information. If successful, this returns a string. On
214   /// error, this returns "".
215   LLVM_ABI Expected<std::string>
216   getBitcodeProducerString(MemoryBufferRef Buffer);
217 
218   /// Read the specified bitcode file, returning the module.
219   LLVM_ABI Expected<std::unique_ptr<Module>>
220   parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
221                    ParserCallbacks Callbacks = {});
222 
223   /// Returns LTO information for the specified bitcode file.
224   LLVM_ABI Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
225 
226   /// Parse the specified bitcode buffer, returning the module summary index.
227   LLVM_ABI Expected<std::unique_ptr<ModuleSummaryIndex>>
228   getModuleSummaryIndex(MemoryBufferRef Buffer);
229 
230   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
231   LLVM_ABI Error readModuleSummaryIndex(MemoryBufferRef Buffer,
232                                         ModuleSummaryIndex &CombinedIndex);
233 
234   /// Parse the module summary index out of an IR file and return the module
235   /// summary index object if found, or an empty summary if not. If Path refers
236   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
237   /// this function will return nullptr.
238   LLVM_ABI Expected<std::unique_ptr<ModuleSummaryIndex>>
239   getModuleSummaryIndexForFile(StringRef Path,
240                                bool IgnoreEmptyThinLTOIndexFile = false);
241 
242   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
243   /// for an LLVM IR bitcode wrapper.
isBitcodeWrapper(const unsigned char * BufPtr,const unsigned char * BufEnd)244   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
245                                const unsigned char *BufEnd) {
246     // See if you can find the hidden message in the magic bytes :-).
247     // (Hint: it's a little-endian encoding.)
248     return BufPtr != BufEnd &&
249            BufPtr[0] == 0xDE &&
250            BufPtr[1] == 0xC0 &&
251            BufPtr[2] == 0x17 &&
252            BufPtr[3] == 0x0B;
253   }
254 
255   /// isRawBitcode - Return true if the given bytes are the magic bytes for
256   /// raw LLVM IR bitcode (without a wrapper).
isRawBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)257   inline bool isRawBitcode(const unsigned char *BufPtr,
258                            const unsigned char *BufEnd) {
259     // These bytes sort of have a hidden message, but it's not in
260     // little-endian this time, and it's a little redundant.
261     return BufPtr != BufEnd &&
262            BufPtr[0] == 'B' &&
263            BufPtr[1] == 'C' &&
264            BufPtr[2] == 0xc0 &&
265            BufPtr[3] == 0xde;
266   }
267 
268   /// isBitcode - Return true if the given bytes are the magic bytes for
269   /// LLVM IR bitcode, either with or without a wrapper.
isBitcode(const unsigned char * BufPtr,const unsigned char * BufEnd)270   inline bool isBitcode(const unsigned char *BufPtr,
271                         const unsigned char *BufEnd) {
272     return isBitcodeWrapper(BufPtr, BufEnd) ||
273            isRawBitcode(BufPtr, BufEnd);
274   }
275 
276   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
277   /// header for padding or other reasons.  The format of this header is:
278   ///
279   /// struct bc_header {
280   ///   uint32_t Magic;         // 0x0B17C0DE
281   ///   uint32_t Version;       // Version, currently always 0.
282   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
283   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
284   ///   ... potentially other gunk ...
285   /// };
286   ///
287   /// This function is called when we find a file with a matching magic number.
288   /// In this case, skip down to the subsection of the file that is actually a
289   /// BC file.
290   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
291   /// contain the whole bitcode file.
SkipBitcodeWrapperHeader(const unsigned char * & BufPtr,const unsigned char * & BufEnd,bool VerifyBufferSize)292   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
293                                        const unsigned char *&BufEnd,
294                                        bool VerifyBufferSize) {
295     // Must contain the offset and size field!
296     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
297       return true;
298 
299     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
300     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
301     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
302 
303     // Verify that Offset+Size fits in the file.
304     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
305       return true;
306     BufPtr += Offset;
307     BufEnd = BufPtr+Size;
308     return false;
309   }
310 
311   LLVM_ABI APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
312 
313   LLVM_ABI const std::error_category &BitcodeErrorCategory();
314   enum class BitcodeError { CorruptedBitcode = 1 };
make_error_code(BitcodeError E)315   inline std::error_code make_error_code(BitcodeError E) {
316     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
317   }
318 
319 } // end namespace llvm
320 
321 namespace std {
322 
323 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
324 
325 } // end namespace std
326 
327 #endif // LLVM_BITCODE_BITCODEREADER_H
328