1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 22 // Save the current block's state on BlockScope. 23 BlockScope.push_back(Block(CurCodeSize)); 24 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 25 26 // Add the abbrevs specific to this block to the CurAbbrevs list. 27 if (BlockInfo) { 28 if (const BitstreamBlockInfo::BlockInfo *Info = 29 BlockInfo->getBlockInfo(BlockID)) { 30 llvm::append_range(CurAbbrevs, Info->Abbrevs); 31 } 32 } 33 34 // Get the codesize of this block. 35 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 36 if (!MaybeVBR) 37 return MaybeVBR.takeError(); 38 CurCodeSize = MaybeVBR.get(); 39 40 if (CurCodeSize > MaxChunkSize) 41 return llvm::createStringError( 42 std::errc::illegal_byte_sequence, 43 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 44 CurCodeSize); 45 46 SkipToFourByteBoundary(); 47 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 48 if (!MaybeNum) 49 return MaybeNum.takeError(); 50 word_t NumWords = MaybeNum.get(); 51 if (NumWordsP) 52 *NumWordsP = NumWords; 53 54 if (CurCodeSize == 0) 55 return llvm::createStringError( 56 std::errc::illegal_byte_sequence, 57 "can't enter sub-block: current code size is 0"); 58 if (AtEndOfStream()) 59 return llvm::createStringError( 60 std::errc::illegal_byte_sequence, 61 "can't enter sub block: already at end of stream"); 62 63 return Error::success(); 64 } 65 66 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 67 const BitCodeAbbrevOp &Op) { 68 assert(!Op.isLiteral() && "Not to be used with literals!"); 69 70 // Decode the value as we are commanded. 71 switch (Op.getEncoding()) { 72 case BitCodeAbbrevOp::Array: 73 case BitCodeAbbrevOp::Blob: 74 llvm_unreachable("Should not reach here"); 75 case BitCodeAbbrevOp::Fixed: 76 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 77 return Cursor.Read((unsigned)Op.getEncodingData()); 78 case BitCodeAbbrevOp::VBR: 79 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 80 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 81 case BitCodeAbbrevOp::Char6: 82 if (Expected<unsigned> Res = Cursor.Read(6)) 83 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 84 else 85 return Res.takeError(); 86 } 87 llvm_unreachable("invalid abbreviation encoding"); 88 } 89 90 /// skipRecord - Read the current record and discard it. 91 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 92 // Skip unabbreviated records by reading past their entries. 93 if (AbbrevID == bitc::UNABBREV_RECORD) { 94 Expected<uint32_t> MaybeCode = ReadVBR(6); 95 if (!MaybeCode) 96 return MaybeCode.takeError(); 97 unsigned Code = MaybeCode.get(); 98 Expected<uint32_t> MaybeVBR = ReadVBR(6); 99 if (!MaybeVBR) 100 return MaybeVBR.get(); 101 unsigned NumElts = MaybeVBR.get(); 102 for (unsigned i = 0; i != NumElts; ++i) 103 if (Expected<uint64_t> Res = ReadVBR64(6)) 104 ; // Skip! 105 else 106 return Res.takeError(); 107 return Code; 108 } 109 110 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 111 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 112 unsigned Code; 113 if (CodeOp.isLiteral()) 114 Code = CodeOp.getLiteralValue(); 115 else { 116 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 117 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 118 return llvm::createStringError( 119 std::errc::illegal_byte_sequence, 120 "Abbreviation starts with an Array or a Blob"); 121 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 122 if (!MaybeCode) 123 return MaybeCode.takeError(); 124 Code = MaybeCode.get(); 125 } 126 127 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 128 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 129 if (Op.isLiteral()) 130 continue; 131 132 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 133 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 134 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 135 continue; 136 else 137 return MaybeField.takeError(); 138 } 139 140 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 141 // Array case. Read the number of elements as a vbr6. 142 Expected<uint32_t> MaybeNum = ReadVBR(6); 143 if (!MaybeNum) 144 return MaybeNum.takeError(); 145 unsigned NumElts = MaybeNum.get(); 146 147 // Get the element encoding. 148 assert(i+2 == e && "array op not second to last?"); 149 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 150 151 // Read all the elements. 152 // Decode the value as we are commanded. 153 switch (EltEnc.getEncoding()) { 154 default: 155 report_fatal_error("Array element type can't be an Array or a Blob"); 156 case BitCodeAbbrevOp::Fixed: 157 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 158 if (Error Err = 159 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 160 EltEnc.getEncodingData())) 161 return std::move(Err); 162 break; 163 case BitCodeAbbrevOp::VBR: 164 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 165 for (; NumElts; --NumElts) 166 if (Expected<uint64_t> Res = 167 ReadVBR64((unsigned)EltEnc.getEncodingData())) 168 ; // Skip! 169 else 170 return Res.takeError(); 171 break; 172 case BitCodeAbbrevOp::Char6: 173 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 174 return std::move(Err); 175 break; 176 } 177 continue; 178 } 179 180 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 181 // Blob case. Read the number of bytes as a vbr6. 182 Expected<uint32_t> MaybeNum = ReadVBR(6); 183 if (!MaybeNum) 184 return MaybeNum.takeError(); 185 unsigned NumElts = MaybeNum.get(); 186 SkipToFourByteBoundary(); // 32-bit alignment 187 188 // Figure out where the end of this blob will be including tail padding. 189 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; 190 191 // If this would read off the end of the bitcode file, just set the 192 // record to empty and return. 193 if (!canSkipToPos(NewEnd/8)) { 194 skipToEnd(); 195 break; 196 } 197 198 // Skip over the blob. 199 if (Error Err = JumpToBit(NewEnd)) 200 return std::move(Err); 201 } 202 return Code; 203 } 204 205 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 206 SmallVectorImpl<uint64_t> &Vals, 207 StringRef *Blob) { 208 if (AbbrevID == bitc::UNABBREV_RECORD) { 209 Expected<uint32_t> MaybeCode = ReadVBR(6); 210 if (!MaybeCode) 211 return MaybeCode.takeError(); 212 uint32_t Code = MaybeCode.get(); 213 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 214 if (!MaybeNumElts) 215 return MaybeNumElts.takeError(); 216 uint32_t NumElts = MaybeNumElts.get(); 217 Vals.reserve(Vals.size() + NumElts); 218 219 for (unsigned i = 0; i != NumElts; ++i) 220 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 221 Vals.push_back(MaybeVal.get()); 222 else 223 return MaybeVal.takeError(); 224 return Code; 225 } 226 227 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 228 229 // Read the record code first. 230 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 231 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 232 unsigned Code; 233 if (CodeOp.isLiteral()) 234 Code = CodeOp.getLiteralValue(); 235 else { 236 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 237 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 238 report_fatal_error("Abbreviation starts with an Array or a Blob"); 239 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 240 Code = MaybeCode.get(); 241 else 242 return MaybeCode.takeError(); 243 } 244 245 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 246 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 247 if (Op.isLiteral()) { 248 Vals.push_back(Op.getLiteralValue()); 249 continue; 250 } 251 252 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 253 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 254 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 255 Vals.push_back(MaybeVal.get()); 256 else 257 return MaybeVal.takeError(); 258 continue; 259 } 260 261 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 262 // Array case. Read the number of elements as a vbr6. 263 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 264 if (!MaybeNumElts) 265 return MaybeNumElts.takeError(); 266 uint32_t NumElts = MaybeNumElts.get(); 267 Vals.reserve(Vals.size() + NumElts); 268 269 // Get the element encoding. 270 if (i + 2 != e) 271 report_fatal_error("Array op not second to last"); 272 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 273 if (!EltEnc.isEncoding()) 274 report_fatal_error( 275 "Array element type has to be an encoding of a type"); 276 277 // Read all the elements. 278 switch (EltEnc.getEncoding()) { 279 default: 280 report_fatal_error("Array element type can't be an Array or a Blob"); 281 case BitCodeAbbrevOp::Fixed: 282 for (; NumElts; --NumElts) 283 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 284 Read((unsigned)EltEnc.getEncodingData())) 285 Vals.push_back(MaybeVal.get()); 286 else 287 return MaybeVal.takeError(); 288 break; 289 case BitCodeAbbrevOp::VBR: 290 for (; NumElts; --NumElts) 291 if (Expected<uint64_t> MaybeVal = 292 ReadVBR64((unsigned)EltEnc.getEncodingData())) 293 Vals.push_back(MaybeVal.get()); 294 else 295 return MaybeVal.takeError(); 296 break; 297 case BitCodeAbbrevOp::Char6: 298 for (; NumElts; --NumElts) 299 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 300 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 301 else 302 return MaybeVal.takeError(); 303 } 304 continue; 305 } 306 307 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 308 // Blob case. Read the number of bytes as a vbr6. 309 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 310 if (!MaybeNumElts) 311 return MaybeNumElts.takeError(); 312 uint32_t NumElts = MaybeNumElts.get(); 313 SkipToFourByteBoundary(); // 32-bit alignment 314 315 // Figure out where the end of this blob will be including tail padding. 316 size_t CurBitPos = GetCurrentBitNo(); 317 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; 318 319 // If this would read off the end of the bitcode file, just set the 320 // record to empty and return. 321 if (!canSkipToPos(NewEnd/8)) { 322 Vals.append(NumElts, 0); 323 skipToEnd(); 324 break; 325 } 326 327 // Otherwise, inform the streamer that we need these bytes in memory. Skip 328 // over tail padding first, in case jumping to NewEnd invalidates the Blob 329 // pointer. 330 if (Error Err = JumpToBit(NewEnd)) 331 return std::move(Err); 332 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 333 334 // If we can return a reference to the data, do so to avoid copying it. 335 if (Blob) { 336 *Blob = StringRef(Ptr, NumElts); 337 } else { 338 // Otherwise, unpack into Vals with zero extension. 339 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 340 Vals.append(UPtr, UPtr + NumElts); 341 } 342 } 343 344 return Code; 345 } 346 347 Error BitstreamCursor::ReadAbbrevRecord() { 348 auto Abbv = std::make_shared<BitCodeAbbrev>(); 349 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 350 if (!MaybeNumOpInfo) 351 return MaybeNumOpInfo.takeError(); 352 unsigned NumOpInfo = MaybeNumOpInfo.get(); 353 for (unsigned i = 0; i != NumOpInfo; ++i) { 354 Expected<word_t> MaybeIsLiteral = Read(1); 355 if (!MaybeIsLiteral) 356 return MaybeIsLiteral.takeError(); 357 bool IsLiteral = MaybeIsLiteral.get(); 358 if (IsLiteral) { 359 Expected<uint64_t> MaybeOp = ReadVBR64(8); 360 if (!MaybeOp) 361 return MaybeOp.takeError(); 362 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 363 continue; 364 } 365 366 Expected<word_t> MaybeEncoding = Read(3); 367 if (!MaybeEncoding) 368 return MaybeEncoding.takeError(); 369 BitCodeAbbrevOp::Encoding E = 370 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 371 if (BitCodeAbbrevOp::hasEncodingData(E)) { 372 Expected<uint64_t> MaybeData = ReadVBR64(5); 373 if (!MaybeData) 374 return MaybeData.takeError(); 375 uint64_t Data = MaybeData.get(); 376 377 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 378 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 379 // a slow path in Read() to have to handle reading zero bits. 380 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 381 Data == 0) { 382 Abbv->Add(BitCodeAbbrevOp(0)); 383 continue; 384 } 385 386 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 387 Data > MaxChunkSize) 388 report_fatal_error( 389 "Fixed or VBR abbrev record with size > MaxChunkData"); 390 391 Abbv->Add(BitCodeAbbrevOp(E, Data)); 392 } else 393 Abbv->Add(BitCodeAbbrevOp(E)); 394 } 395 396 if (Abbv->getNumOperandInfos() == 0) 397 report_fatal_error("Abbrev record with no operands"); 398 CurAbbrevs.push_back(std::move(Abbv)); 399 400 return Error::success(); 401 } 402 403 Expected<Optional<BitstreamBlockInfo>> 404 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 405 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 406 return std::move(Err); 407 408 BitstreamBlockInfo NewBlockInfo; 409 410 SmallVector<uint64_t, 64> Record; 411 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 412 413 // Read all the records for this module. 414 while (true) { 415 Expected<BitstreamEntry> MaybeEntry = 416 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 417 if (!MaybeEntry) 418 return MaybeEntry.takeError(); 419 BitstreamEntry Entry = MaybeEntry.get(); 420 421 switch (Entry.Kind) { 422 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 423 case llvm::BitstreamEntry::Error: 424 return None; 425 case llvm::BitstreamEntry::EndBlock: 426 return std::move(NewBlockInfo); 427 case llvm::BitstreamEntry::Record: 428 // The interesting case. 429 break; 430 } 431 432 // Read abbrev records, associate them with CurBID. 433 if (Entry.ID == bitc::DEFINE_ABBREV) { 434 if (!CurBlockInfo) return None; 435 if (Error Err = ReadAbbrevRecord()) 436 return std::move(Err); 437 438 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 439 // appropriate BlockInfo. 440 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 441 CurAbbrevs.pop_back(); 442 continue; 443 } 444 445 // Read a record. 446 Record.clear(); 447 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 448 if (!MaybeBlockInfo) 449 return MaybeBlockInfo.takeError(); 450 switch (MaybeBlockInfo.get()) { 451 default: 452 break; // Default behavior, ignore unknown content. 453 case bitc::BLOCKINFO_CODE_SETBID: 454 if (Record.size() < 1) 455 return None; 456 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 457 break; 458 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 459 if (!CurBlockInfo) 460 return None; 461 if (!ReadBlockInfoNames) 462 break; // Ignore name. 463 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 464 break; 465 } 466 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 467 if (!CurBlockInfo) return None; 468 if (!ReadBlockInfoNames) 469 break; // Ignore name. 470 CurBlockInfo->RecordNames.emplace_back( 471 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 472 break; 473 } 474 } 475 } 476 } 477