1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 22 // Save the current block's state on BlockScope. 23 BlockScope.push_back(Block(CurCodeSize)); 24 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 25 26 // Add the abbrevs specific to this block to the CurAbbrevs list. 27 if (BlockInfo) { 28 if (const BitstreamBlockInfo::BlockInfo *Info = 29 BlockInfo->getBlockInfo(BlockID)) { 30 CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), 31 Info->Abbrevs.end()); 32 } 33 } 34 35 // Get the codesize of this block. 36 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 37 if (!MaybeVBR) 38 return MaybeVBR.takeError(); 39 CurCodeSize = MaybeVBR.get(); 40 41 if (CurCodeSize > MaxChunkSize) 42 return llvm::createStringError( 43 std::errc::illegal_byte_sequence, 44 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 45 CurCodeSize); 46 47 SkipToFourByteBoundary(); 48 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 49 if (!MaybeNum) 50 return MaybeNum.takeError(); 51 word_t NumWords = MaybeNum.get(); 52 if (NumWordsP) 53 *NumWordsP = NumWords; 54 55 if (CurCodeSize == 0) 56 return llvm::createStringError( 57 std::errc::illegal_byte_sequence, 58 "can't enter sub-block: current code size is 0"); 59 if (AtEndOfStream()) 60 return llvm::createStringError( 61 std::errc::illegal_byte_sequence, 62 "can't enter sub block: already at end of stream"); 63 64 return Error::success(); 65 } 66 67 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 68 const BitCodeAbbrevOp &Op) { 69 assert(!Op.isLiteral() && "Not to be used with literals!"); 70 71 // Decode the value as we are commanded. 72 switch (Op.getEncoding()) { 73 case BitCodeAbbrevOp::Array: 74 case BitCodeAbbrevOp::Blob: 75 llvm_unreachable("Should not reach here"); 76 case BitCodeAbbrevOp::Fixed: 77 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 78 return Cursor.Read((unsigned)Op.getEncodingData()); 79 case BitCodeAbbrevOp::VBR: 80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 81 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 82 case BitCodeAbbrevOp::Char6: 83 if (Expected<unsigned> Res = Cursor.Read(6)) 84 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 85 else 86 return Res.takeError(); 87 } 88 llvm_unreachable("invalid abbreviation encoding"); 89 } 90 91 static Error skipAbbreviatedField(BitstreamCursor &Cursor, 92 const BitCodeAbbrevOp &Op) { 93 assert(!Op.isLiteral() && "Not to be used with literals!"); 94 95 // Decode the value as we are commanded. 96 switch (Op.getEncoding()) { 97 case BitCodeAbbrevOp::Array: 98 case BitCodeAbbrevOp::Blob: 99 llvm_unreachable("Should not reach here"); 100 case BitCodeAbbrevOp::Fixed: 101 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 102 if (Expected<unsigned> Res = Cursor.Read((unsigned)Op.getEncodingData())) 103 break; 104 else 105 return Res.takeError(); 106 case BitCodeAbbrevOp::VBR: 107 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 108 if (Expected<uint64_t> Res = 109 Cursor.ReadVBR64((unsigned)Op.getEncodingData())) 110 break; 111 else 112 return Res.takeError(); 113 case BitCodeAbbrevOp::Char6: 114 if (Expected<unsigned> Res = Cursor.Read(6)) 115 break; 116 else 117 return Res.takeError(); 118 } 119 return ErrorSuccess(); 120 } 121 122 /// skipRecord - Read the current record and discard it. 123 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 124 // Skip unabbreviated records by reading past their entries. 125 if (AbbrevID == bitc::UNABBREV_RECORD) { 126 Expected<uint32_t> MaybeCode = ReadVBR(6); 127 if (!MaybeCode) 128 return MaybeCode.takeError(); 129 unsigned Code = MaybeCode.get(); 130 Expected<uint32_t> MaybeVBR = ReadVBR(6); 131 if (!MaybeVBR) 132 return MaybeVBR.get(); 133 unsigned NumElts = MaybeVBR.get(); 134 for (unsigned i = 0; i != NumElts; ++i) 135 if (Expected<uint64_t> Res = ReadVBR64(6)) 136 ; // Skip! 137 else 138 return Res.takeError(); 139 return Code; 140 } 141 142 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 143 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 144 unsigned Code; 145 if (CodeOp.isLiteral()) 146 Code = CodeOp.getLiteralValue(); 147 else { 148 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 149 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 150 return llvm::createStringError( 151 std::errc::illegal_byte_sequence, 152 "Abbreviation starts with an Array or a Blob"); 153 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 154 if (!MaybeCode) 155 return MaybeCode.takeError(); 156 Code = MaybeCode.get(); 157 } 158 159 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 160 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 161 if (Op.isLiteral()) 162 continue; 163 164 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 165 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 166 if (Error Err = skipAbbreviatedField(*this, Op)) 167 return std::move(Err); 168 continue; 169 } 170 171 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 172 // Array case. Read the number of elements as a vbr6. 173 Expected<uint32_t> MaybeNum = ReadVBR(6); 174 if (!MaybeNum) 175 return MaybeNum.takeError(); 176 unsigned NumElts = MaybeNum.get(); 177 178 // Get the element encoding. 179 assert(i+2 == e && "array op not second to last?"); 180 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 181 182 // Read all the elements. 183 // Decode the value as we are commanded. 184 switch (EltEnc.getEncoding()) { 185 default: 186 report_fatal_error("Array element type can't be an Array or a Blob"); 187 case BitCodeAbbrevOp::Fixed: 188 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 189 if (Error Err = JumpToBit(GetCurrentBitNo() + 190 NumElts * EltEnc.getEncodingData())) 191 return std::move(Err); 192 break; 193 case BitCodeAbbrevOp::VBR: 194 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 195 for (; NumElts; --NumElts) 196 if (Expected<uint64_t> Res = 197 ReadVBR64((unsigned)EltEnc.getEncodingData())) 198 ; // Skip! 199 else 200 return Res.takeError(); 201 break; 202 case BitCodeAbbrevOp::Char6: 203 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 204 return std::move(Err); 205 break; 206 } 207 continue; 208 } 209 210 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 211 // Blob case. Read the number of bytes as a vbr6. 212 Expected<uint32_t> MaybeNum = ReadVBR(6); 213 if (!MaybeNum) 214 return MaybeNum.takeError(); 215 unsigned NumElts = MaybeNum.get(); 216 SkipToFourByteBoundary(); // 32-bit alignment 217 218 // Figure out where the end of this blob will be including tail padding. 219 size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8; 220 221 // If this would read off the end of the bitcode file, just set the 222 // record to empty and return. 223 if (!canSkipToPos(NewEnd/8)) { 224 skipToEnd(); 225 break; 226 } 227 228 // Skip over the blob. 229 if (Error Err = JumpToBit(NewEnd)) 230 return std::move(Err); 231 } 232 return Code; 233 } 234 235 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 236 SmallVectorImpl<uint64_t> &Vals, 237 StringRef *Blob) { 238 if (AbbrevID == bitc::UNABBREV_RECORD) { 239 Expected<uint32_t> MaybeCode = ReadVBR(6); 240 if (!MaybeCode) 241 return MaybeCode.takeError(); 242 uint32_t Code = MaybeCode.get(); 243 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 244 if (!MaybeNumElts) 245 return MaybeNumElts.takeError(); 246 uint32_t NumElts = MaybeNumElts.get(); 247 248 for (unsigned i = 0; i != NumElts; ++i) 249 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 250 Vals.push_back(MaybeVal.get()); 251 else 252 return MaybeVal.takeError(); 253 return Code; 254 } 255 256 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 257 258 // Read the record code first. 259 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 260 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 261 unsigned Code; 262 if (CodeOp.isLiteral()) 263 Code = CodeOp.getLiteralValue(); 264 else { 265 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 266 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 267 report_fatal_error("Abbreviation starts with an Array or a Blob"); 268 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 269 Code = MaybeCode.get(); 270 else 271 return MaybeCode.takeError(); 272 } 273 274 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 275 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 276 if (Op.isLiteral()) { 277 Vals.push_back(Op.getLiteralValue()); 278 continue; 279 } 280 281 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 282 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 283 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 284 Vals.push_back(MaybeVal.get()); 285 else 286 return MaybeVal.takeError(); 287 continue; 288 } 289 290 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 291 // Array case. Read the number of elements as a vbr6. 292 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 293 if (!MaybeNumElts) 294 return MaybeNumElts.takeError(); 295 uint32_t NumElts = MaybeNumElts.get(); 296 297 // Get the element encoding. 298 if (i + 2 != e) 299 report_fatal_error("Array op not second to last"); 300 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 301 if (!EltEnc.isEncoding()) 302 report_fatal_error( 303 "Array element type has to be an encoding of a type"); 304 305 // Read all the elements. 306 switch (EltEnc.getEncoding()) { 307 default: 308 report_fatal_error("Array element type can't be an Array or a Blob"); 309 case BitCodeAbbrevOp::Fixed: 310 for (; NumElts; --NumElts) 311 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 312 Read((unsigned)EltEnc.getEncodingData())) 313 Vals.push_back(MaybeVal.get()); 314 else 315 return MaybeVal.takeError(); 316 break; 317 case BitCodeAbbrevOp::VBR: 318 for (; NumElts; --NumElts) 319 if (Expected<uint64_t> MaybeVal = 320 ReadVBR64((unsigned)EltEnc.getEncodingData())) 321 Vals.push_back(MaybeVal.get()); 322 else 323 return MaybeVal.takeError(); 324 break; 325 case BitCodeAbbrevOp::Char6: 326 for (; NumElts; --NumElts) 327 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 328 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 329 else 330 return MaybeVal.takeError(); 331 } 332 continue; 333 } 334 335 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 336 // Blob case. Read the number of bytes as a vbr6. 337 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 338 if (!MaybeNumElts) 339 return MaybeNumElts.takeError(); 340 uint32_t NumElts = MaybeNumElts.get(); 341 SkipToFourByteBoundary(); // 32-bit alignment 342 343 // Figure out where the end of this blob will be including tail padding. 344 size_t CurBitPos = GetCurrentBitNo(); 345 size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8; 346 347 // If this would read off the end of the bitcode file, just set the 348 // record to empty and return. 349 if (!canSkipToPos(NewEnd/8)) { 350 Vals.append(NumElts, 0); 351 skipToEnd(); 352 break; 353 } 354 355 // Otherwise, inform the streamer that we need these bytes in memory. Skip 356 // over tail padding first, in case jumping to NewEnd invalidates the Blob 357 // pointer. 358 if (Error Err = JumpToBit(NewEnd)) 359 return std::move(Err); 360 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 361 362 // If we can return a reference to the data, do so to avoid copying it. 363 if (Blob) { 364 *Blob = StringRef(Ptr, NumElts); 365 } else { 366 // Otherwise, unpack into Vals with zero extension. 367 for (; NumElts; --NumElts) 368 Vals.push_back((unsigned char)*Ptr++); 369 } 370 } 371 372 return Code; 373 } 374 375 Error BitstreamCursor::ReadAbbrevRecord() { 376 auto Abbv = std::make_shared<BitCodeAbbrev>(); 377 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 378 if (!MaybeNumOpInfo) 379 return MaybeNumOpInfo.takeError(); 380 unsigned NumOpInfo = MaybeNumOpInfo.get(); 381 for (unsigned i = 0; i != NumOpInfo; ++i) { 382 Expected<word_t> MaybeIsLiteral = Read(1); 383 if (!MaybeIsLiteral) 384 return MaybeIsLiteral.takeError(); 385 bool IsLiteral = MaybeIsLiteral.get(); 386 if (IsLiteral) { 387 Expected<uint64_t> MaybeOp = ReadVBR64(8); 388 if (!MaybeOp) 389 return MaybeOp.takeError(); 390 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 391 continue; 392 } 393 394 Expected<word_t> MaybeEncoding = Read(3); 395 if (!MaybeEncoding) 396 return MaybeEncoding.takeError(); 397 BitCodeAbbrevOp::Encoding E = 398 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 399 if (BitCodeAbbrevOp::hasEncodingData(E)) { 400 Expected<uint64_t> MaybeData = ReadVBR64(5); 401 if (!MaybeData) 402 return MaybeData.takeError(); 403 uint64_t Data = MaybeData.get(); 404 405 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 406 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 407 // a slow path in Read() to have to handle reading zero bits. 408 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 409 Data == 0) { 410 Abbv->Add(BitCodeAbbrevOp(0)); 411 continue; 412 } 413 414 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 415 Data > MaxChunkSize) 416 report_fatal_error( 417 "Fixed or VBR abbrev record with size > MaxChunkData"); 418 419 Abbv->Add(BitCodeAbbrevOp(E, Data)); 420 } else 421 Abbv->Add(BitCodeAbbrevOp(E)); 422 } 423 424 if (Abbv->getNumOperandInfos() == 0) 425 report_fatal_error("Abbrev record with no operands"); 426 CurAbbrevs.push_back(std::move(Abbv)); 427 428 return Error::success(); 429 } 430 431 Expected<Optional<BitstreamBlockInfo>> 432 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 433 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 434 return std::move(Err); 435 436 BitstreamBlockInfo NewBlockInfo; 437 438 SmallVector<uint64_t, 64> Record; 439 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 440 441 // Read all the records for this module. 442 while (true) { 443 Expected<BitstreamEntry> MaybeEntry = 444 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 445 if (!MaybeEntry) 446 return MaybeEntry.takeError(); 447 BitstreamEntry Entry = MaybeEntry.get(); 448 449 switch (Entry.Kind) { 450 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 451 case llvm::BitstreamEntry::Error: 452 return None; 453 case llvm::BitstreamEntry::EndBlock: 454 return std::move(NewBlockInfo); 455 case llvm::BitstreamEntry::Record: 456 // The interesting case. 457 break; 458 } 459 460 // Read abbrev records, associate them with CurBID. 461 if (Entry.ID == bitc::DEFINE_ABBREV) { 462 if (!CurBlockInfo) return None; 463 if (Error Err = ReadAbbrevRecord()) 464 return std::move(Err); 465 466 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 467 // appropriate BlockInfo. 468 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 469 CurAbbrevs.pop_back(); 470 continue; 471 } 472 473 // Read a record. 474 Record.clear(); 475 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 476 if (!MaybeBlockInfo) 477 return MaybeBlockInfo.takeError(); 478 switch (MaybeBlockInfo.get()) { 479 default: 480 break; // Default behavior, ignore unknown content. 481 case bitc::BLOCKINFO_CODE_SETBID: 482 if (Record.size() < 1) 483 return None; 484 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 485 break; 486 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 487 if (!CurBlockInfo) 488 return None; 489 if (!ReadBlockInfoNames) 490 break; // Ignore name. 491 std::string Name; 492 for (unsigned i = 0, e = Record.size(); i != e; ++i) 493 Name += (char)Record[i]; 494 CurBlockInfo->Name = Name; 495 break; 496 } 497 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 498 if (!CurBlockInfo) return None; 499 if (!ReadBlockInfoNames) 500 break; // Ignore name. 501 std::string Name; 502 for (unsigned i = 1, e = Record.size(); i != e; ++i) 503 Name += (char)Record[i]; 504 CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0], 505 Name)); 506 break; 507 } 508 } 509 } 510 } 511