1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <optional> 13 #include <string> 14 15 using namespace llvm; 16 17 //===----------------------------------------------------------------------===// 18 // BitstreamCursor implementation 19 //===----------------------------------------------------------------------===// 20 // 21 static Error error(const char *Message) { 22 return createStringError(std::errc::illegal_byte_sequence, Message); 23 } 24 25 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 26 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 27 // Save the current block's state on BlockScope. 28 BlockScope.push_back(Block(CurCodeSize)); 29 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 30 31 // Add the abbrevs specific to this block to the CurAbbrevs list. 32 if (BlockInfo) { 33 if (const BitstreamBlockInfo::BlockInfo *Info = 34 BlockInfo->getBlockInfo(BlockID)) { 35 llvm::append_range(CurAbbrevs, Info->Abbrevs); 36 } 37 } 38 39 // Get the codesize of this block. 40 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 41 if (!MaybeVBR) 42 return MaybeVBR.takeError(); 43 CurCodeSize = MaybeVBR.get(); 44 45 if (CurCodeSize > MaxChunkSize) 46 return llvm::createStringError( 47 std::errc::illegal_byte_sequence, 48 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 49 CurCodeSize); 50 51 SkipToFourByteBoundary(); 52 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 53 if (!MaybeNum) 54 return MaybeNum.takeError(); 55 word_t NumWords = MaybeNum.get(); 56 if (NumWordsP) 57 *NumWordsP = NumWords; 58 59 if (CurCodeSize == 0) 60 return llvm::createStringError( 61 std::errc::illegal_byte_sequence, 62 "can't enter sub-block: current code size is 0"); 63 if (AtEndOfStream()) 64 return llvm::createStringError( 65 std::errc::illegal_byte_sequence, 66 "can't enter sub block: already at end of stream"); 67 68 return Error::success(); 69 } 70 71 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 72 const BitCodeAbbrevOp &Op) { 73 assert(!Op.isLiteral() && "Not to be used with literals!"); 74 75 // Decode the value as we are commanded. 76 switch (Op.getEncoding()) { 77 case BitCodeAbbrevOp::Array: 78 case BitCodeAbbrevOp::Blob: 79 llvm_unreachable("Should not reach here"); 80 case BitCodeAbbrevOp::Fixed: 81 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 82 return Cursor.Read((unsigned)Op.getEncodingData()); 83 case BitCodeAbbrevOp::VBR: 84 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 85 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 86 case BitCodeAbbrevOp::Char6: 87 if (Expected<unsigned> Res = Cursor.Read(6)) 88 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 89 else 90 return Res.takeError(); 91 } 92 llvm_unreachable("invalid abbreviation encoding"); 93 } 94 95 /// skipRecord - Read the current record and discard it. 96 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 97 // Skip unabbreviated records by reading past their entries. 98 if (AbbrevID == bitc::UNABBREV_RECORD) { 99 Expected<uint32_t> MaybeCode = ReadVBR(6); 100 if (!MaybeCode) 101 return MaybeCode.takeError(); 102 unsigned Code = MaybeCode.get(); 103 Expected<uint32_t> MaybeVBR = ReadVBR(6); 104 if (!MaybeVBR) 105 return MaybeVBR.takeError(); 106 unsigned NumElts = MaybeVBR.get(); 107 for (unsigned i = 0; i != NumElts; ++i) 108 if (Expected<uint64_t> Res = ReadVBR64(6)) 109 ; // Skip! 110 else 111 return Res.takeError(); 112 return Code; 113 } 114 115 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 116 if (!MaybeAbbv) 117 return MaybeAbbv.takeError(); 118 119 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 120 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 121 unsigned Code; 122 if (CodeOp.isLiteral()) 123 Code = CodeOp.getLiteralValue(); 124 else { 125 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 126 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 127 return llvm::createStringError( 128 std::errc::illegal_byte_sequence, 129 "Abbreviation starts with an Array or a Blob"); 130 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 131 if (!MaybeCode) 132 return MaybeCode.takeError(); 133 Code = MaybeCode.get(); 134 } 135 136 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 137 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 138 if (Op.isLiteral()) 139 continue; 140 141 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 142 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 143 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 144 continue; 145 else 146 return MaybeField.takeError(); 147 } 148 149 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 150 // Array case. Read the number of elements as a vbr6. 151 Expected<uint32_t> MaybeNum = ReadVBR(6); 152 if (!MaybeNum) 153 return MaybeNum.takeError(); 154 unsigned NumElts = MaybeNum.get(); 155 156 // Get the element encoding. 157 assert(i+2 == e && "array op not second to last?"); 158 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 159 160 // Read all the elements. 161 // Decode the value as we are commanded. 162 switch (EltEnc.getEncoding()) { 163 default: 164 return error("Array element type can't be an Array or a Blob"); 165 case BitCodeAbbrevOp::Fixed: 166 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 167 if (Error Err = 168 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 169 EltEnc.getEncodingData())) 170 return std::move(Err); 171 break; 172 case BitCodeAbbrevOp::VBR: 173 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 174 for (; NumElts; --NumElts) 175 if (Expected<uint64_t> Res = 176 ReadVBR64((unsigned)EltEnc.getEncodingData())) 177 ; // Skip! 178 else 179 return Res.takeError(); 180 break; 181 case BitCodeAbbrevOp::Char6: 182 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 183 return std::move(Err); 184 break; 185 } 186 continue; 187 } 188 189 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 190 // Blob case. Read the number of bytes as a vbr6. 191 Expected<uint32_t> MaybeNum = ReadVBR(6); 192 if (!MaybeNum) 193 return MaybeNum.takeError(); 194 unsigned NumElts = MaybeNum.get(); 195 SkipToFourByteBoundary(); // 32-bit alignment 196 197 // Figure out where the end of this blob will be including tail padding. 198 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; 199 200 // If this would read off the end of the bitcode file, just set the 201 // record to empty and return. 202 if (!canSkipToPos(NewEnd/8)) { 203 skipToEnd(); 204 break; 205 } 206 207 // Skip over the blob. 208 if (Error Err = JumpToBit(NewEnd)) 209 return std::move(Err); 210 } 211 return Code; 212 } 213 214 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 215 SmallVectorImpl<uint64_t> &Vals, 216 StringRef *Blob) { 217 if (AbbrevID == bitc::UNABBREV_RECORD) { 218 Expected<uint32_t> MaybeCode = ReadVBR(6); 219 if (!MaybeCode) 220 return MaybeCode.takeError(); 221 uint32_t Code = MaybeCode.get(); 222 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 223 if (!MaybeNumElts) 224 return error( 225 ("Failed to read size: " + toString(MaybeNumElts.takeError())) 226 .c_str()); 227 uint32_t NumElts = MaybeNumElts.get(); 228 if (!isSizePlausible(NumElts)) 229 return error("Size is not plausible"); 230 Vals.reserve(Vals.size() + NumElts); 231 232 for (unsigned i = 0; i != NumElts; ++i) 233 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 234 Vals.push_back(MaybeVal.get()); 235 else 236 return MaybeVal.takeError(); 237 return Code; 238 } 239 240 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 241 if (!MaybeAbbv) 242 return MaybeAbbv.takeError(); 243 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 244 245 // Read the record code first. 246 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 247 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 248 unsigned Code; 249 if (CodeOp.isLiteral()) 250 Code = CodeOp.getLiteralValue(); 251 else { 252 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 253 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 254 return error("Abbreviation starts with an Array or a Blob"); 255 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 256 Code = MaybeCode.get(); 257 else 258 return MaybeCode.takeError(); 259 } 260 261 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 262 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 263 if (Op.isLiteral()) { 264 Vals.push_back(Op.getLiteralValue()); 265 continue; 266 } 267 268 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 269 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 270 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 271 Vals.push_back(MaybeVal.get()); 272 else 273 return MaybeVal.takeError(); 274 continue; 275 } 276 277 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 278 // Array case. Read the number of elements as a vbr6. 279 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 280 if (!MaybeNumElts) 281 return error( 282 ("Failed to read size: " + toString(MaybeNumElts.takeError())) 283 .c_str()); 284 uint32_t NumElts = MaybeNumElts.get(); 285 if (!isSizePlausible(NumElts)) 286 return error("Size is not plausible"); 287 Vals.reserve(Vals.size() + NumElts); 288 289 // Get the element encoding. 290 if (i + 2 != e) 291 return error("Array op not second to last"); 292 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 293 if (!EltEnc.isEncoding()) 294 return error( 295 "Array element type has to be an encoding of a type"); 296 297 // Read all the elements. 298 switch (EltEnc.getEncoding()) { 299 default: 300 return error("Array element type can't be an Array or a Blob"); 301 case BitCodeAbbrevOp::Fixed: 302 for (; NumElts; --NumElts) 303 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 304 Read((unsigned)EltEnc.getEncodingData())) 305 Vals.push_back(MaybeVal.get()); 306 else 307 return MaybeVal.takeError(); 308 break; 309 case BitCodeAbbrevOp::VBR: 310 for (; NumElts; --NumElts) 311 if (Expected<uint64_t> MaybeVal = 312 ReadVBR64((unsigned)EltEnc.getEncodingData())) 313 Vals.push_back(MaybeVal.get()); 314 else 315 return MaybeVal.takeError(); 316 break; 317 case BitCodeAbbrevOp::Char6: 318 for (; NumElts; --NumElts) 319 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 320 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 321 else 322 return MaybeVal.takeError(); 323 } 324 continue; 325 } 326 327 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 328 // Blob case. Read the number of bytes as a vbr6. 329 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 330 if (!MaybeNumElts) 331 return MaybeNumElts.takeError(); 332 uint32_t NumElts = MaybeNumElts.get(); 333 SkipToFourByteBoundary(); // 32-bit alignment 334 335 // Figure out where the end of this blob will be including tail padding. 336 size_t CurBitPos = GetCurrentBitNo(); 337 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; 338 339 // Make sure the bitstream is large enough to contain the blob. 340 if (!canSkipToPos(NewEnd/8)) 341 return error("Blob ends too soon"); 342 343 // Otherwise, inform the streamer that we need these bytes in memory. Skip 344 // over tail padding first, in case jumping to NewEnd invalidates the Blob 345 // pointer. 346 if (Error Err = JumpToBit(NewEnd)) 347 return std::move(Err); 348 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 349 350 // If we can return a reference to the data, do so to avoid copying it. 351 if (Blob) { 352 *Blob = StringRef(Ptr, NumElts); 353 } else { 354 // Otherwise, unpack into Vals with zero extension. 355 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 356 Vals.append(UPtr, UPtr + NumElts); 357 } 358 } 359 360 return Code; 361 } 362 363 Error BitstreamCursor::ReadAbbrevRecord() { 364 auto Abbv = std::make_shared<BitCodeAbbrev>(); 365 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 366 if (!MaybeNumOpInfo) 367 return MaybeNumOpInfo.takeError(); 368 unsigned NumOpInfo = MaybeNumOpInfo.get(); 369 for (unsigned i = 0; i != NumOpInfo; ++i) { 370 Expected<word_t> MaybeIsLiteral = Read(1); 371 if (!MaybeIsLiteral) 372 return MaybeIsLiteral.takeError(); 373 bool IsLiteral = MaybeIsLiteral.get(); 374 if (IsLiteral) { 375 Expected<uint64_t> MaybeOp = ReadVBR64(8); 376 if (!MaybeOp) 377 return MaybeOp.takeError(); 378 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 379 continue; 380 } 381 382 Expected<word_t> MaybeEncoding = Read(3); 383 if (!MaybeEncoding) 384 return MaybeEncoding.takeError(); 385 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get())) 386 return error("Invalid encoding"); 387 388 BitCodeAbbrevOp::Encoding E = 389 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 390 if (BitCodeAbbrevOp::hasEncodingData(E)) { 391 Expected<uint64_t> MaybeData = ReadVBR64(5); 392 if (!MaybeData) 393 return MaybeData.takeError(); 394 uint64_t Data = MaybeData.get(); 395 396 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 397 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 398 // a slow path in Read() to have to handle reading zero bits. 399 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 400 Data == 0) { 401 Abbv->Add(BitCodeAbbrevOp(0)); 402 continue; 403 } 404 405 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 406 Data > MaxChunkSize) 407 return error("Fixed or VBR abbrev record with size > MaxChunkData"); 408 409 Abbv->Add(BitCodeAbbrevOp(E, Data)); 410 } else 411 Abbv->Add(BitCodeAbbrevOp(E)); 412 } 413 414 if (Abbv->getNumOperandInfos() == 0) 415 return error("Abbrev record with no operands"); 416 CurAbbrevs.push_back(std::move(Abbv)); 417 418 return Error::success(); 419 } 420 421 Expected<std::optional<BitstreamBlockInfo>> 422 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 423 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 424 return std::move(Err); 425 426 BitstreamBlockInfo NewBlockInfo; 427 428 SmallVector<uint64_t, 64> Record; 429 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 430 431 // Read all the records for this module. 432 while (true) { 433 Expected<BitstreamEntry> MaybeEntry = 434 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 435 if (!MaybeEntry) 436 return MaybeEntry.takeError(); 437 BitstreamEntry Entry = MaybeEntry.get(); 438 439 switch (Entry.Kind) { 440 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 441 case llvm::BitstreamEntry::Error: 442 return std::nullopt; 443 case llvm::BitstreamEntry::EndBlock: 444 return std::move(NewBlockInfo); 445 case llvm::BitstreamEntry::Record: 446 // The interesting case. 447 break; 448 } 449 450 // Read abbrev records, associate them with CurBID. 451 if (Entry.ID == bitc::DEFINE_ABBREV) { 452 if (!CurBlockInfo) 453 return std::nullopt; 454 if (Error Err = ReadAbbrevRecord()) 455 return std::move(Err); 456 457 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 458 // appropriate BlockInfo. 459 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 460 CurAbbrevs.pop_back(); 461 continue; 462 } 463 464 // Read a record. 465 Record.clear(); 466 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 467 if (!MaybeBlockInfo) 468 return MaybeBlockInfo.takeError(); 469 switch (MaybeBlockInfo.get()) { 470 default: 471 break; // Default behavior, ignore unknown content. 472 case bitc::BLOCKINFO_CODE_SETBID: 473 if (Record.size() < 1) 474 return std::nullopt; 475 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 476 break; 477 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 478 if (!CurBlockInfo) 479 return std::nullopt; 480 if (!ReadBlockInfoNames) 481 break; // Ignore name. 482 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 483 break; 484 } 485 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 486 if (!CurBlockInfo) 487 return std::nullopt; 488 if (!ReadBlockInfoNames) 489 break; // Ignore name. 490 CurBlockInfo->RecordNames.emplace_back( 491 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 492 break; 493 } 494 } 495 } 496 } 497