1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This header defines interfaces to read LLVM bitcode files/streams. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_BITCODE_BITCODEREADER_H 14 #define LLVM_BITCODE_BITCODEREADER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Bitstream/BitCodeEnums.h" 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/Support/Endian.h" 21 #include "llvm/Support/Error.h" 22 #include "llvm/Support/ErrorOr.h" 23 #include "llvm/Support/MemoryBufferRef.h" 24 #include <cstdint> 25 #include <memory> 26 #include <optional> 27 #include <string> 28 #include <system_error> 29 #include <vector> 30 namespace llvm { 31 32 class LLVMContext; 33 class Module; 34 class MemoryBuffer; 35 class Metadata; 36 class ModuleSummaryIndex; 37 class Type; 38 class Value; 39 40 // Callback to override the data layout string of an imported bitcode module. 41 // The first argument is the target triple, the second argument the data layout 42 // string from the input, or a default string. It will be used if the callback 43 // returns std::nullopt. 44 typedef std::function<std::optional<std::string>(StringRef, StringRef)> 45 DataLayoutCallbackFuncTy; 46 47 typedef std::function<Type *(unsigned)> GetTypeByIDTy; 48 49 typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy; 50 51 typedef std::function<void(Value *, unsigned, GetTypeByIDTy, 52 GetContainedTypeIDTy)> 53 ValueTypeCallbackTy; 54 55 typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy, 56 GetContainedTypeIDTy)> 57 MDTypeCallbackTy; 58 59 // These functions are for converting Expected/Error values to 60 // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: 61 // Remove these functions once no longer needed by the C and libLTO APIs. 62 63 std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); 64 65 template <typename T> 66 ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { 67 if (!Val) 68 return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); 69 return std::move(*Val); 70 } 71 72 struct ParserCallbacks { 73 std::optional<DataLayoutCallbackFuncTy> DataLayout; 74 /// The ValueType callback is called for every function definition or 75 /// declaration and allows accessing the type information, also behind 76 /// pointers. This can be useful, when the opaque pointer upgrade cleans all 77 /// type information behind pointers. 78 /// The second argument to ValueTypeCallback is the type ID of the 79 /// function, the two passed functions can be used to extract type 80 /// information. 81 std::optional<ValueTypeCallbackTy> ValueType; 82 /// The MDType callback is called for every value in metadata. 83 std::optional<MDTypeCallbackTy> MDType; 84 85 ParserCallbacks() = default; 86 explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout) 87 : DataLayout(DataLayout) {} 88 }; 89 90 struct BitcodeFileContents; 91 92 /// Basic information extracted from a bitcode module to be used for LTO. 93 struct BitcodeLTOInfo { 94 bool IsThinLTO; 95 bool HasSummary; 96 bool EnableSplitLTOUnit; 97 bool UnifiedLTO; 98 }; 99 100 /// Represents a module in a bitcode file. 101 class BitcodeModule { 102 // This covers the identification (if present) and module blocks. 103 ArrayRef<uint8_t> Buffer; 104 StringRef ModuleIdentifier; 105 106 // The string table used to interpret this module. 107 StringRef Strtab; 108 109 // The bitstream location of the IDENTIFICATION_BLOCK. 110 uint64_t IdentificationBit; 111 112 // The bitstream location of this module's MODULE_BLOCK. 113 uint64_t ModuleBit; 114 115 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, 116 uint64_t IdentificationBit, uint64_t ModuleBit) 117 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), 118 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} 119 120 // Calls the ctor. 121 friend Expected<BitcodeFileContents> 122 getBitcodeFileContents(MemoryBufferRef Buffer); 123 124 Expected<std::unique_ptr<Module>> 125 getModuleImpl(LLVMContext &Context, bool MaterializeAll, 126 bool ShouldLazyLoadMetadata, bool IsImporting, 127 ParserCallbacks Callbacks = {}); 128 129 public: 130 StringRef getBuffer() const { 131 return StringRef((const char *)Buffer.begin(), Buffer.size()); 132 } 133 134 StringRef getStrtab() const { return Strtab; } 135 136 StringRef getModuleIdentifier() const { return ModuleIdentifier; } 137 138 /// Read the bitcode module and prepare for lazy deserialization of function 139 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. 140 /// If IsImporting is true, this module is being parsed for ThinLTO 141 /// importing into another module. 142 Expected<std::unique_ptr<Module>> 143 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, 144 bool IsImporting, ParserCallbacks Callbacks = {}); 145 146 /// Read the entire bitcode module and return it. 147 Expected<std::unique_ptr<Module>> 148 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {}); 149 150 /// Returns information about the module to be used for LTO: whether to 151 /// compile with ThinLTO, and whether it has a summary. 152 Expected<BitcodeLTOInfo> getLTOInfo(); 153 154 /// Parse the specified bitcode buffer, returning the module summary index. 155 Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); 156 157 /// Parse the specified bitcode buffer and merge its module summary index 158 /// into CombinedIndex. 159 Error 160 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, 161 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr); 162 }; 163 164 struct BitcodeFileContents { 165 std::vector<BitcodeModule> Mods; 166 StringRef Symtab, StrtabForSymtab; 167 }; 168 169 /// Returns the contents of a bitcode file. This includes the raw contents of 170 /// the symbol table embedded in the bitcode file. Clients which require a 171 /// symbol table should prefer to use irsymtab::read instead of this function 172 /// because it creates a reader for the irsymtab and handles upgrading bitcode 173 /// files without a symbol table or with an old symbol table. 174 Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); 175 176 /// Returns a list of modules in the specified bitcode buffer. 177 Expected<std::vector<BitcodeModule>> 178 getBitcodeModuleList(MemoryBufferRef Buffer); 179 180 /// Read the header of the specified bitcode buffer and prepare for lazy 181 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, 182 /// lazily load metadata as well. If IsImporting is true, this module is 183 /// being parsed for ThinLTO importing into another module. 184 Expected<std::unique_ptr<Module>> 185 getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, 186 bool ShouldLazyLoadMetadata = false, 187 bool IsImporting = false, 188 ParserCallbacks Callbacks = {}); 189 190 /// Like getLazyBitcodeModule, except that the module takes ownership of 191 /// the memory buffer if successful. If successful, this moves Buffer. On 192 /// error, this *does not* move Buffer. If IsImporting is true, this module is 193 /// being parsed for ThinLTO importing into another module. 194 Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( 195 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, 196 bool ShouldLazyLoadMetadata = false, bool IsImporting = false, 197 ParserCallbacks Callbacks = {}); 198 199 /// Read the header of the specified bitcode buffer and extract just the 200 /// triple information. If successful, this returns a string. On error, this 201 /// returns "". 202 Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); 203 204 /// Return true if \p Buffer contains a bitcode file with ObjC code (category 205 /// or class) in it. 206 Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); 207 208 /// Read the header of the specified bitcode buffer and extract just the 209 /// producer string information. If successful, this returns a string. On 210 /// error, this returns "". 211 Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); 212 213 /// Read the specified bitcode file, returning the module. 214 Expected<std::unique_ptr<Module>> 215 parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, 216 ParserCallbacks Callbacks = {}); 217 218 /// Returns LTO information for the specified bitcode file. 219 Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); 220 221 /// Parse the specified bitcode buffer, returning the module summary index. 222 Expected<std::unique_ptr<ModuleSummaryIndex>> 223 getModuleSummaryIndex(MemoryBufferRef Buffer); 224 225 /// Parse the specified bitcode buffer and merge the index into CombinedIndex. 226 Error readModuleSummaryIndex(MemoryBufferRef Buffer, 227 ModuleSummaryIndex &CombinedIndex); 228 229 /// Parse the module summary index out of an IR file and return the module 230 /// summary index object if found, or an empty summary if not. If Path refers 231 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then 232 /// this function will return nullptr. 233 Expected<std::unique_ptr<ModuleSummaryIndex>> 234 getModuleSummaryIndexForFile(StringRef Path, 235 bool IgnoreEmptyThinLTOIndexFile = false); 236 237 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes 238 /// for an LLVM IR bitcode wrapper. 239 inline bool isBitcodeWrapper(const unsigned char *BufPtr, 240 const unsigned char *BufEnd) { 241 // See if you can find the hidden message in the magic bytes :-). 242 // (Hint: it's a little-endian encoding.) 243 return BufPtr != BufEnd && 244 BufPtr[0] == 0xDE && 245 BufPtr[1] == 0xC0 && 246 BufPtr[2] == 0x17 && 247 BufPtr[3] == 0x0B; 248 } 249 250 /// isRawBitcode - Return true if the given bytes are the magic bytes for 251 /// raw LLVM IR bitcode (without a wrapper). 252 inline bool isRawBitcode(const unsigned char *BufPtr, 253 const unsigned char *BufEnd) { 254 // These bytes sort of have a hidden message, but it's not in 255 // little-endian this time, and it's a little redundant. 256 return BufPtr != BufEnd && 257 BufPtr[0] == 'B' && 258 BufPtr[1] == 'C' && 259 BufPtr[2] == 0xc0 && 260 BufPtr[3] == 0xde; 261 } 262 263 /// isBitcode - Return true if the given bytes are the magic bytes for 264 /// LLVM IR bitcode, either with or without a wrapper. 265 inline bool isBitcode(const unsigned char *BufPtr, 266 const unsigned char *BufEnd) { 267 return isBitcodeWrapper(BufPtr, BufEnd) || 268 isRawBitcode(BufPtr, BufEnd); 269 } 270 271 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special 272 /// header for padding or other reasons. The format of this header is: 273 /// 274 /// struct bc_header { 275 /// uint32_t Magic; // 0x0B17C0DE 276 /// uint32_t Version; // Version, currently always 0. 277 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. 278 /// uint32_t BitcodeSize; // Size of traditional bitcode file. 279 /// ... potentially other gunk ... 280 /// }; 281 /// 282 /// This function is called when we find a file with a matching magic number. 283 /// In this case, skip down to the subsection of the file that is actually a 284 /// BC file. 285 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to 286 /// contain the whole bitcode file. 287 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, 288 const unsigned char *&BufEnd, 289 bool VerifyBufferSize) { 290 // Must contain the offset and size field! 291 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) 292 return true; 293 294 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 295 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 296 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; 297 298 // Verify that Offset+Size fits in the file. 299 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) 300 return true; 301 BufPtr += Offset; 302 BufEnd = BufPtr+Size; 303 return false; 304 } 305 306 APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); 307 308 const std::error_category &BitcodeErrorCategory(); 309 enum class BitcodeError { CorruptedBitcode = 1 }; 310 inline std::error_code make_error_code(BitcodeError E) { 311 return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); 312 } 313 314 } // end namespace llvm 315 316 namespace std { 317 318 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; 319 320 } // end namespace std 321 322 #endif // LLVM_BITCODE_BITCODEREADER_H 323