1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <optional>
13 #include <string>
14
15 using namespace llvm;
16
17 //===----------------------------------------------------------------------===//
18 // BitstreamCursor implementation
19 //===----------------------------------------------------------------------===//
20 //
error(const char * Message)21 static Error error(const char *Message) {
22 return createStringError(std::errc::illegal_byte_sequence, Message);
23 }
24
25 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
EnterSubBlock(unsigned BlockID,unsigned * NumWordsP)26 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
27 // Save the current block's state on BlockScope.
28 BlockScope.push_back(Block(CurCodeSize));
29 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
30
31 // Add the abbrevs specific to this block to the CurAbbrevs list.
32 if (BlockInfo) {
33 if (const BitstreamBlockInfo::BlockInfo *Info =
34 BlockInfo->getBlockInfo(BlockID)) {
35 llvm::append_range(CurAbbrevs, Info->Abbrevs);
36 }
37 }
38
39 // Get the codesize of this block.
40 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
41 if (!MaybeVBR)
42 return MaybeVBR.takeError();
43 CurCodeSize = MaybeVBR.get();
44
45 if (CurCodeSize > MaxChunkSize)
46 return llvm::createStringError(
47 std::errc::illegal_byte_sequence,
48 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
49 CurCodeSize);
50
51 SkipToFourByteBoundary();
52 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
53 if (!MaybeNum)
54 return MaybeNum.takeError();
55 word_t NumWords = MaybeNum.get();
56 if (NumWordsP)
57 *NumWordsP = NumWords;
58
59 if (CurCodeSize == 0)
60 return llvm::createStringError(
61 std::errc::illegal_byte_sequence,
62 "can't enter sub-block: current code size is 0");
63 if (AtEndOfStream())
64 return llvm::createStringError(
65 std::errc::illegal_byte_sequence,
66 "can't enter sub block: already at end of stream");
67
68 return Error::success();
69 }
70
readAbbreviatedField(BitstreamCursor & Cursor,const BitCodeAbbrevOp & Op)71 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
72 const BitCodeAbbrevOp &Op) {
73 assert(!Op.isLiteral() && "Not to be used with literals!");
74
75 // Decode the value as we are commanded.
76 switch (Op.getEncoding()) {
77 case BitCodeAbbrevOp::Array:
78 case BitCodeAbbrevOp::Blob:
79 llvm_unreachable("Should not reach here");
80 case BitCodeAbbrevOp::Fixed:
81 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
82 return Cursor.Read((unsigned)Op.getEncodingData());
83 case BitCodeAbbrevOp::VBR:
84 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
85 return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
86 case BitCodeAbbrevOp::Char6:
87 if (Expected<unsigned> Res = Cursor.Read(6))
88 return BitCodeAbbrevOp::DecodeChar6(Res.get());
89 else
90 return Res.takeError();
91 }
92 llvm_unreachable("invalid abbreviation encoding");
93 }
94
95 /// skipRecord - Read the current record and discard it.
skipRecord(unsigned AbbrevID)96 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
97 // Skip unabbreviated records by reading past their entries.
98 if (AbbrevID == bitc::UNABBREV_RECORD) {
99 Expected<uint32_t> MaybeCode = ReadVBR(6);
100 if (!MaybeCode)
101 return MaybeCode.takeError();
102 unsigned Code = MaybeCode.get();
103 Expected<uint32_t> MaybeVBR = ReadVBR(6);
104 if (!MaybeVBR)
105 return MaybeVBR.takeError();
106 unsigned NumElts = MaybeVBR.get();
107 for (unsigned i = 0; i != NumElts; ++i)
108 if (Expected<uint64_t> Res = ReadVBR64(6))
109 ; // Skip!
110 else
111 return Res.takeError();
112 return Code;
113 }
114
115 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
116 if (!MaybeAbbv)
117 return MaybeAbbv.takeError();
118
119 const BitCodeAbbrev *Abbv = MaybeAbbv.get();
120 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
121 unsigned Code;
122 if (CodeOp.isLiteral())
123 Code = CodeOp.getLiteralValue();
124 else {
125 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
126 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
127 return llvm::createStringError(
128 std::errc::illegal_byte_sequence,
129 "Abbreviation starts with an Array or a Blob");
130 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
131 if (!MaybeCode)
132 return MaybeCode.takeError();
133 Code = MaybeCode.get();
134 }
135
136 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
137 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
138 if (Op.isLiteral())
139 continue;
140
141 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
142 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
143 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
144 continue;
145 else
146 return MaybeField.takeError();
147 }
148
149 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
150 // Array case. Read the number of elements as a vbr6.
151 Expected<uint32_t> MaybeNum = ReadVBR(6);
152 if (!MaybeNum)
153 return MaybeNum.takeError();
154 unsigned NumElts = MaybeNum.get();
155
156 // Get the element encoding.
157 assert(i+2 == e && "array op not second to last?");
158 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
159
160 // Read all the elements.
161 // Decode the value as we are commanded.
162 switch (EltEnc.getEncoding()) {
163 default:
164 return error("Array element type can't be an Array or a Blob");
165 case BitCodeAbbrevOp::Fixed:
166 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
167 if (Error Err =
168 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
169 EltEnc.getEncodingData()))
170 return Err;
171 break;
172 case BitCodeAbbrevOp::VBR:
173 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
174 for (; NumElts; --NumElts)
175 if (Expected<uint64_t> Res =
176 ReadVBR64((unsigned)EltEnc.getEncodingData()))
177 ; // Skip!
178 else
179 return Res.takeError();
180 break;
181 case BitCodeAbbrevOp::Char6:
182 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
183 return Err;
184 break;
185 }
186 continue;
187 }
188
189 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
190 // Blob case. Read the number of bytes as a vbr6.
191 Expected<uint32_t> MaybeNum = ReadVBR(6);
192 if (!MaybeNum)
193 return MaybeNum.takeError();
194 unsigned NumElts = MaybeNum.get();
195 SkipToFourByteBoundary(); // 32-bit alignment
196
197 // Figure out where the end of this blob will be including tail padding.
198 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
199
200 // If this would read off the end of the bitcode file, just set the
201 // record to empty and return.
202 if (!canSkipToPos(NewEnd/8)) {
203 skipToEnd();
204 break;
205 }
206
207 // Skip over the blob.
208 if (Error Err = JumpToBit(NewEnd))
209 return Err;
210 }
211 return Code;
212 }
213
readRecord(unsigned AbbrevID,SmallVectorImpl<uint64_t> & Vals,StringRef * Blob)214 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
215 SmallVectorImpl<uint64_t> &Vals,
216 StringRef *Blob) {
217 if (AbbrevID == bitc::UNABBREV_RECORD) {
218 Expected<uint32_t> MaybeCode = ReadVBR(6);
219 if (!MaybeCode)
220 return MaybeCode.takeError();
221 uint32_t Code = MaybeCode.get();
222 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
223 if (!MaybeNumElts)
224 return error(
225 ("Failed to read size: " + toString(MaybeNumElts.takeError()))
226 .c_str());
227 uint32_t NumElts = MaybeNumElts.get();
228 if (!isSizePlausible(NumElts))
229 return error("Size is not plausible");
230 Vals.reserve(Vals.size() + NumElts);
231
232 for (unsigned i = 0; i != NumElts; ++i)
233 if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
234 Vals.push_back(MaybeVal.get());
235 else
236 return MaybeVal.takeError();
237 return Code;
238 }
239
240 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
241 if (!MaybeAbbv)
242 return MaybeAbbv.takeError();
243 const BitCodeAbbrev *Abbv = MaybeAbbv.get();
244
245 // Read the record code first.
246 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
247 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
248 unsigned Code;
249 if (CodeOp.isLiteral())
250 Code = CodeOp.getLiteralValue();
251 else {
252 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
253 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
254 return error("Abbreviation starts with an Array or a Blob");
255 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
256 Code = MaybeCode.get();
257 else
258 return MaybeCode.takeError();
259 }
260
261 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
262 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
263 if (Op.isLiteral()) {
264 Vals.push_back(Op.getLiteralValue());
265 continue;
266 }
267
268 if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
269 Op.getEncoding() != BitCodeAbbrevOp::Blob) {
270 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
271 Vals.push_back(MaybeVal.get());
272 else
273 return MaybeVal.takeError();
274 continue;
275 }
276
277 if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
278 // Array case. Read the number of elements as a vbr6.
279 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
280 if (!MaybeNumElts)
281 return error(
282 ("Failed to read size: " + toString(MaybeNumElts.takeError()))
283 .c_str());
284 uint32_t NumElts = MaybeNumElts.get();
285 if (!isSizePlausible(NumElts))
286 return error("Size is not plausible");
287 Vals.reserve(Vals.size() + NumElts);
288
289 // Get the element encoding.
290 if (i + 2 != e)
291 return error("Array op not second to last");
292 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
293 if (!EltEnc.isEncoding())
294 return error(
295 "Array element type has to be an encoding of a type");
296
297 // Read all the elements.
298 switch (EltEnc.getEncoding()) {
299 default:
300 return error("Array element type can't be an Array or a Blob");
301 case BitCodeAbbrevOp::Fixed:
302 for (; NumElts; --NumElts)
303 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
304 Read((unsigned)EltEnc.getEncodingData()))
305 Vals.push_back(MaybeVal.get());
306 else
307 return MaybeVal.takeError();
308 break;
309 case BitCodeAbbrevOp::VBR:
310 for (; NumElts; --NumElts)
311 if (Expected<uint64_t> MaybeVal =
312 ReadVBR64((unsigned)EltEnc.getEncodingData()))
313 Vals.push_back(MaybeVal.get());
314 else
315 return MaybeVal.takeError();
316 break;
317 case BitCodeAbbrevOp::Char6:
318 for (; NumElts; --NumElts)
319 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
320 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
321 else
322 return MaybeVal.takeError();
323 }
324 continue;
325 }
326
327 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
328 // Blob case. Read the number of bytes as a vbr6.
329 Expected<uint32_t> MaybeNumElts = ReadVBR(6);
330 if (!MaybeNumElts)
331 return MaybeNumElts.takeError();
332 uint32_t NumElts = MaybeNumElts.get();
333 SkipToFourByteBoundary(); // 32-bit alignment
334
335 // Figure out where the end of this blob will be including tail padding.
336 size_t CurBitPos = GetCurrentBitNo();
337 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
338
339 // Make sure the bitstream is large enough to contain the blob.
340 if (!canSkipToPos(NewEnd/8))
341 return error("Blob ends too soon");
342
343 // Otherwise, inform the streamer that we need these bytes in memory. Skip
344 // over tail padding first, in case jumping to NewEnd invalidates the Blob
345 // pointer.
346 if (Error Err = JumpToBit(NewEnd))
347 return Err;
348 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
349
350 // If we can return a reference to the data, do so to avoid copying it.
351 if (Blob) {
352 *Blob = StringRef(Ptr, NumElts);
353 } else {
354 // Otherwise, unpack into Vals with zero extension.
355 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
356 Vals.append(UPtr, UPtr + NumElts);
357 }
358 }
359
360 return Code;
361 }
362
ReadAbbrevRecord()363 Error BitstreamCursor::ReadAbbrevRecord() {
364 auto Abbv = std::make_shared<BitCodeAbbrev>();
365 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
366 if (!MaybeNumOpInfo)
367 return MaybeNumOpInfo.takeError();
368 unsigned NumOpInfo = MaybeNumOpInfo.get();
369 for (unsigned i = 0; i != NumOpInfo; ++i) {
370 Expected<word_t> MaybeIsLiteral = Read(1);
371 if (!MaybeIsLiteral)
372 return MaybeIsLiteral.takeError();
373 bool IsLiteral = MaybeIsLiteral.get();
374 if (IsLiteral) {
375 Expected<uint64_t> MaybeOp = ReadVBR64(8);
376 if (!MaybeOp)
377 return MaybeOp.takeError();
378 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
379 continue;
380 }
381
382 Expected<word_t> MaybeEncoding = Read(3);
383 if (!MaybeEncoding)
384 return MaybeEncoding.takeError();
385 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
386 return error("Invalid encoding");
387
388 BitCodeAbbrevOp::Encoding E =
389 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
390 if (BitCodeAbbrevOp::hasEncodingData(E)) {
391 Expected<uint64_t> MaybeData = ReadVBR64(5);
392 if (!MaybeData)
393 return MaybeData.takeError();
394 uint64_t Data = MaybeData.get();
395
396 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
397 // and vbr(0) as a literal zero. This is decoded the same way, and avoids
398 // a slow path in Read() to have to handle reading zero bits.
399 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
400 Data == 0) {
401 Abbv->Add(BitCodeAbbrevOp(0));
402 continue;
403 }
404
405 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
406 Data > MaxChunkSize)
407 return error("Fixed or VBR abbrev record with size > MaxChunkData");
408
409 Abbv->Add(BitCodeAbbrevOp(E, Data));
410 } else
411 Abbv->Add(BitCodeAbbrevOp(E));
412 }
413
414 if (Abbv->getNumOperandInfos() == 0)
415 return error("Abbrev record with no operands");
416 CurAbbrevs.push_back(std::move(Abbv));
417
418 return Error::success();
419 }
420
421 Expected<std::optional<BitstreamBlockInfo>>
ReadBlockInfoBlock(bool ReadBlockInfoNames)422 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
423 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
424 return Err;
425
426 BitstreamBlockInfo NewBlockInfo;
427
428 SmallVector<uint64_t, 64> Record;
429 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
430
431 // Read all the records for this module.
432 while (true) {
433 Expected<BitstreamEntry> MaybeEntry =
434 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
435 if (!MaybeEntry)
436 return MaybeEntry.takeError();
437 BitstreamEntry Entry = MaybeEntry.get();
438
439 switch (Entry.Kind) {
440 case llvm::BitstreamEntry::SubBlock: // Handled for us already.
441 case llvm::BitstreamEntry::Error:
442 return std::nullopt;
443 case llvm::BitstreamEntry::EndBlock:
444 return std::move(NewBlockInfo);
445 case llvm::BitstreamEntry::Record:
446 // The interesting case.
447 break;
448 }
449
450 // Read abbrev records, associate them with CurBID.
451 if (Entry.ID == bitc::DEFINE_ABBREV) {
452 if (!CurBlockInfo)
453 return std::nullopt;
454 if (Error Err = ReadAbbrevRecord())
455 return Err;
456
457 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the
458 // appropriate BlockInfo.
459 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
460 CurAbbrevs.pop_back();
461 continue;
462 }
463
464 // Read a record.
465 Record.clear();
466 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
467 if (!MaybeBlockInfo)
468 return MaybeBlockInfo.takeError();
469 switch (MaybeBlockInfo.get()) {
470 default:
471 break; // Default behavior, ignore unknown content.
472 case bitc::BLOCKINFO_CODE_SETBID:
473 if (Record.size() < 1)
474 return std::nullopt;
475 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
476 break;
477 case bitc::BLOCKINFO_CODE_BLOCKNAME: {
478 if (!CurBlockInfo)
479 return std::nullopt;
480 if (!ReadBlockInfoNames)
481 break; // Ignore name.
482 CurBlockInfo->Name = std::string(Record.begin(), Record.end());
483 break;
484 }
485 case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
486 if (!CurBlockInfo)
487 return std::nullopt;
488 if (!ReadBlockInfoNames)
489 break; // Ignore name.
490 CurBlockInfo->RecordNames.emplace_back(
491 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
492 break;
493 }
494 }
495 }
496 }
497