1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 // 20 static Error error(const char *Message) { 21 return createStringError(std::errc::illegal_byte_sequence, Message); 22 } 23 24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 26 // Save the current block's state on BlockScope. 27 BlockScope.push_back(Block(CurCodeSize)); 28 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 29 30 // Add the abbrevs specific to this block to the CurAbbrevs list. 31 if (BlockInfo) { 32 if (const BitstreamBlockInfo::BlockInfo *Info = 33 BlockInfo->getBlockInfo(BlockID)) { 34 llvm::append_range(CurAbbrevs, Info->Abbrevs); 35 } 36 } 37 38 // Get the codesize of this block. 39 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 40 if (!MaybeVBR) 41 return MaybeVBR.takeError(); 42 CurCodeSize = MaybeVBR.get(); 43 44 if (CurCodeSize > MaxChunkSize) 45 return llvm::createStringError( 46 std::errc::illegal_byte_sequence, 47 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 48 CurCodeSize); 49 50 SkipToFourByteBoundary(); 51 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 52 if (!MaybeNum) 53 return MaybeNum.takeError(); 54 word_t NumWords = MaybeNum.get(); 55 if (NumWordsP) 56 *NumWordsP = NumWords; 57 58 if (CurCodeSize == 0) 59 return llvm::createStringError( 60 std::errc::illegal_byte_sequence, 61 "can't enter sub-block: current code size is 0"); 62 if (AtEndOfStream()) 63 return llvm::createStringError( 64 std::errc::illegal_byte_sequence, 65 "can't enter sub block: already at end of stream"); 66 67 return Error::success(); 68 } 69 70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 71 const BitCodeAbbrevOp &Op) { 72 assert(!Op.isLiteral() && "Not to be used with literals!"); 73 74 // Decode the value as we are commanded. 75 switch (Op.getEncoding()) { 76 case BitCodeAbbrevOp::Array: 77 case BitCodeAbbrevOp::Blob: 78 llvm_unreachable("Should not reach here"); 79 case BitCodeAbbrevOp::Fixed: 80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 81 return Cursor.Read((unsigned)Op.getEncodingData()); 82 case BitCodeAbbrevOp::VBR: 83 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 84 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 85 case BitCodeAbbrevOp::Char6: 86 if (Expected<unsigned> Res = Cursor.Read(6)) 87 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 88 else 89 return Res.takeError(); 90 } 91 llvm_unreachable("invalid abbreviation encoding"); 92 } 93 94 /// skipRecord - Read the current record and discard it. 95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 96 // Skip unabbreviated records by reading past their entries. 97 if (AbbrevID == bitc::UNABBREV_RECORD) { 98 Expected<uint32_t> MaybeCode = ReadVBR(6); 99 if (!MaybeCode) 100 return MaybeCode.takeError(); 101 unsigned Code = MaybeCode.get(); 102 Expected<uint32_t> MaybeVBR = ReadVBR(6); 103 if (!MaybeVBR) 104 return MaybeVBR.takeError(); 105 unsigned NumElts = MaybeVBR.get(); 106 for (unsigned i = 0; i != NumElts; ++i) 107 if (Expected<uint64_t> Res = ReadVBR64(6)) 108 ; // Skip! 109 else 110 return Res.takeError(); 111 return Code; 112 } 113 114 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 115 if (!MaybeAbbv) 116 return MaybeAbbv.takeError(); 117 118 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 119 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 120 unsigned Code; 121 if (CodeOp.isLiteral()) 122 Code = CodeOp.getLiteralValue(); 123 else { 124 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 125 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 126 return llvm::createStringError( 127 std::errc::illegal_byte_sequence, 128 "Abbreviation starts with an Array or a Blob"); 129 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 130 if (!MaybeCode) 131 return MaybeCode.takeError(); 132 Code = MaybeCode.get(); 133 } 134 135 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 136 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 137 if (Op.isLiteral()) 138 continue; 139 140 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 141 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 142 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 143 continue; 144 else 145 return MaybeField.takeError(); 146 } 147 148 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 149 // Array case. Read the number of elements as a vbr6. 150 Expected<uint32_t> MaybeNum = ReadVBR(6); 151 if (!MaybeNum) 152 return MaybeNum.takeError(); 153 unsigned NumElts = MaybeNum.get(); 154 155 // Get the element encoding. 156 assert(i+2 == e && "array op not second to last?"); 157 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 158 159 // Read all the elements. 160 // Decode the value as we are commanded. 161 switch (EltEnc.getEncoding()) { 162 default: 163 return error("Array element type can't be an Array or a Blob"); 164 case BitCodeAbbrevOp::Fixed: 165 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 166 if (Error Err = 167 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 168 EltEnc.getEncodingData())) 169 return std::move(Err); 170 break; 171 case BitCodeAbbrevOp::VBR: 172 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 173 for (; NumElts; --NumElts) 174 if (Expected<uint64_t> Res = 175 ReadVBR64((unsigned)EltEnc.getEncodingData())) 176 ; // Skip! 177 else 178 return Res.takeError(); 179 break; 180 case BitCodeAbbrevOp::Char6: 181 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 182 return std::move(Err); 183 break; 184 } 185 continue; 186 } 187 188 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 189 // Blob case. Read the number of bytes as a vbr6. 190 Expected<uint32_t> MaybeNum = ReadVBR(6); 191 if (!MaybeNum) 192 return MaybeNum.takeError(); 193 unsigned NumElts = MaybeNum.get(); 194 SkipToFourByteBoundary(); // 32-bit alignment 195 196 // Figure out where the end of this blob will be including tail padding. 197 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; 198 199 // If this would read off the end of the bitcode file, just set the 200 // record to empty and return. 201 if (!canSkipToPos(NewEnd/8)) { 202 skipToEnd(); 203 break; 204 } 205 206 // Skip over the blob. 207 if (Error Err = JumpToBit(NewEnd)) 208 return std::move(Err); 209 } 210 return Code; 211 } 212 213 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 214 SmallVectorImpl<uint64_t> &Vals, 215 StringRef *Blob) { 216 if (AbbrevID == bitc::UNABBREV_RECORD) { 217 Expected<uint32_t> MaybeCode = ReadVBR(6); 218 if (!MaybeCode) 219 return MaybeCode.takeError(); 220 uint32_t Code = MaybeCode.get(); 221 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 222 if (!MaybeNumElts) 223 return error( 224 ("Failed to read size: " + toString(MaybeNumElts.takeError())) 225 .c_str()); 226 uint32_t NumElts = MaybeNumElts.get(); 227 if (!isSizePlausible(NumElts)) 228 return error("Size is not plausible"); 229 Vals.reserve(Vals.size() + NumElts); 230 231 for (unsigned i = 0; i != NumElts; ++i) 232 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 233 Vals.push_back(MaybeVal.get()); 234 else 235 return MaybeVal.takeError(); 236 return Code; 237 } 238 239 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 240 if (!MaybeAbbv) 241 return MaybeAbbv.takeError(); 242 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 243 244 // Read the record code first. 245 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 246 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 247 unsigned Code; 248 if (CodeOp.isLiteral()) 249 Code = CodeOp.getLiteralValue(); 250 else { 251 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 252 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 253 return error("Abbreviation starts with an Array or a Blob"); 254 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 255 Code = MaybeCode.get(); 256 else 257 return MaybeCode.takeError(); 258 } 259 260 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 261 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 262 if (Op.isLiteral()) { 263 Vals.push_back(Op.getLiteralValue()); 264 continue; 265 } 266 267 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 268 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 269 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 270 Vals.push_back(MaybeVal.get()); 271 else 272 return MaybeVal.takeError(); 273 continue; 274 } 275 276 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 277 // Array case. Read the number of elements as a vbr6. 278 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 279 if (!MaybeNumElts) 280 return error( 281 ("Failed to read size: " + toString(MaybeNumElts.takeError())) 282 .c_str()); 283 uint32_t NumElts = MaybeNumElts.get(); 284 if (!isSizePlausible(NumElts)) 285 return error("Size is not plausible"); 286 Vals.reserve(Vals.size() + NumElts); 287 288 // Get the element encoding. 289 if (i + 2 != e) 290 return error("Array op not second to last"); 291 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 292 if (!EltEnc.isEncoding()) 293 return error( 294 "Array element type has to be an encoding of a type"); 295 296 // Read all the elements. 297 switch (EltEnc.getEncoding()) { 298 default: 299 return error("Array element type can't be an Array or a Blob"); 300 case BitCodeAbbrevOp::Fixed: 301 for (; NumElts; --NumElts) 302 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 303 Read((unsigned)EltEnc.getEncodingData())) 304 Vals.push_back(MaybeVal.get()); 305 else 306 return MaybeVal.takeError(); 307 break; 308 case BitCodeAbbrevOp::VBR: 309 for (; NumElts; --NumElts) 310 if (Expected<uint64_t> MaybeVal = 311 ReadVBR64((unsigned)EltEnc.getEncodingData())) 312 Vals.push_back(MaybeVal.get()); 313 else 314 return MaybeVal.takeError(); 315 break; 316 case BitCodeAbbrevOp::Char6: 317 for (; NumElts; --NumElts) 318 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 319 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 320 else 321 return MaybeVal.takeError(); 322 } 323 continue; 324 } 325 326 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 327 // Blob case. Read the number of bytes as a vbr6. 328 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 329 if (!MaybeNumElts) 330 return MaybeNumElts.takeError(); 331 uint32_t NumElts = MaybeNumElts.get(); 332 SkipToFourByteBoundary(); // 32-bit alignment 333 334 // Figure out where the end of this blob will be including tail padding. 335 size_t CurBitPos = GetCurrentBitNo(); 336 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; 337 338 // Make sure the bitstream is large enough to contain the blob. 339 if (!canSkipToPos(NewEnd/8)) 340 return error("Blob ends too soon"); 341 342 // Otherwise, inform the streamer that we need these bytes in memory. Skip 343 // over tail padding first, in case jumping to NewEnd invalidates the Blob 344 // pointer. 345 if (Error Err = JumpToBit(NewEnd)) 346 return std::move(Err); 347 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 348 349 // If we can return a reference to the data, do so to avoid copying it. 350 if (Blob) { 351 *Blob = StringRef(Ptr, NumElts); 352 } else { 353 // Otherwise, unpack into Vals with zero extension. 354 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 355 Vals.append(UPtr, UPtr + NumElts); 356 } 357 } 358 359 return Code; 360 } 361 362 Error BitstreamCursor::ReadAbbrevRecord() { 363 auto Abbv = std::make_shared<BitCodeAbbrev>(); 364 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 365 if (!MaybeNumOpInfo) 366 return MaybeNumOpInfo.takeError(); 367 unsigned NumOpInfo = MaybeNumOpInfo.get(); 368 for (unsigned i = 0; i != NumOpInfo; ++i) { 369 Expected<word_t> MaybeIsLiteral = Read(1); 370 if (!MaybeIsLiteral) 371 return MaybeIsLiteral.takeError(); 372 bool IsLiteral = MaybeIsLiteral.get(); 373 if (IsLiteral) { 374 Expected<uint64_t> MaybeOp = ReadVBR64(8); 375 if (!MaybeOp) 376 return MaybeOp.takeError(); 377 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 378 continue; 379 } 380 381 Expected<word_t> MaybeEncoding = Read(3); 382 if (!MaybeEncoding) 383 return MaybeEncoding.takeError(); 384 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get())) 385 return error("Invalid encoding"); 386 387 BitCodeAbbrevOp::Encoding E = 388 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 389 if (BitCodeAbbrevOp::hasEncodingData(E)) { 390 Expected<uint64_t> MaybeData = ReadVBR64(5); 391 if (!MaybeData) 392 return MaybeData.takeError(); 393 uint64_t Data = MaybeData.get(); 394 395 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 396 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 397 // a slow path in Read() to have to handle reading zero bits. 398 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 399 Data == 0) { 400 Abbv->Add(BitCodeAbbrevOp(0)); 401 continue; 402 } 403 404 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 405 Data > MaxChunkSize) 406 return error("Fixed or VBR abbrev record with size > MaxChunkData"); 407 408 Abbv->Add(BitCodeAbbrevOp(E, Data)); 409 } else 410 Abbv->Add(BitCodeAbbrevOp(E)); 411 } 412 413 if (Abbv->getNumOperandInfos() == 0) 414 return error("Abbrev record with no operands"); 415 CurAbbrevs.push_back(std::move(Abbv)); 416 417 return Error::success(); 418 } 419 420 Expected<Optional<BitstreamBlockInfo>> 421 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 422 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 423 return std::move(Err); 424 425 BitstreamBlockInfo NewBlockInfo; 426 427 SmallVector<uint64_t, 64> Record; 428 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 429 430 // Read all the records for this module. 431 while (true) { 432 Expected<BitstreamEntry> MaybeEntry = 433 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 434 if (!MaybeEntry) 435 return MaybeEntry.takeError(); 436 BitstreamEntry Entry = MaybeEntry.get(); 437 438 switch (Entry.Kind) { 439 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 440 case llvm::BitstreamEntry::Error: 441 return None; 442 case llvm::BitstreamEntry::EndBlock: 443 return std::move(NewBlockInfo); 444 case llvm::BitstreamEntry::Record: 445 // The interesting case. 446 break; 447 } 448 449 // Read abbrev records, associate them with CurBID. 450 if (Entry.ID == bitc::DEFINE_ABBREV) { 451 if (!CurBlockInfo) return None; 452 if (Error Err = ReadAbbrevRecord()) 453 return std::move(Err); 454 455 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 456 // appropriate BlockInfo. 457 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 458 CurAbbrevs.pop_back(); 459 continue; 460 } 461 462 // Read a record. 463 Record.clear(); 464 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 465 if (!MaybeBlockInfo) 466 return MaybeBlockInfo.takeError(); 467 switch (MaybeBlockInfo.get()) { 468 default: 469 break; // Default behavior, ignore unknown content. 470 case bitc::BLOCKINFO_CODE_SETBID: 471 if (Record.size() < 1) 472 return None; 473 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 474 break; 475 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 476 if (!CurBlockInfo) 477 return None; 478 if (!ReadBlockInfoNames) 479 break; // Ignore name. 480 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 481 break; 482 } 483 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 484 if (!CurBlockInfo) return None; 485 if (!ReadBlockInfoNames) 486 break; // Ignore name. 487 CurBlockInfo->RecordNames.emplace_back( 488 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 489 break; 490 } 491 } 492 } 493 } 494