xref: /freebsd/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13 
14 using namespace llvm;
15 
16 //===----------------------------------------------------------------------===//
17 //  BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 
20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22   // Save the current block's state on BlockScope.
23   BlockScope.push_back(Block(CurCodeSize));
24   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25 
26   // Add the abbrevs specific to this block to the CurAbbrevs list.
27   if (BlockInfo) {
28     if (const BitstreamBlockInfo::BlockInfo *Info =
29             BlockInfo->getBlockInfo(BlockID)) {
30       CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
31                         Info->Abbrevs.end());
32     }
33   }
34 
35   // Get the codesize of this block.
36   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
37   if (!MaybeVBR)
38     return MaybeVBR.takeError();
39   CurCodeSize = MaybeVBR.get();
40 
41   if (CurCodeSize > MaxChunkSize)
42     return llvm::createStringError(
43         std::errc::illegal_byte_sequence,
44         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
45         CurCodeSize);
46 
47   SkipToFourByteBoundary();
48   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
49   if (!MaybeNum)
50     return MaybeNum.takeError();
51   word_t NumWords = MaybeNum.get();
52   if (NumWordsP)
53     *NumWordsP = NumWords;
54 
55   if (CurCodeSize == 0)
56     return llvm::createStringError(
57         std::errc::illegal_byte_sequence,
58         "can't enter sub-block: current code size is 0");
59   if (AtEndOfStream())
60     return llvm::createStringError(
61         std::errc::illegal_byte_sequence,
62         "can't enter sub block: already at end of stream");
63 
64   return Error::success();
65 }
66 
67 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
68                                                const BitCodeAbbrevOp &Op) {
69   assert(!Op.isLiteral() && "Not to be used with literals!");
70 
71   // Decode the value as we are commanded.
72   switch (Op.getEncoding()) {
73   case BitCodeAbbrevOp::Array:
74   case BitCodeAbbrevOp::Blob:
75     llvm_unreachable("Should not reach here");
76   case BitCodeAbbrevOp::Fixed:
77     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
78     return Cursor.Read((unsigned)Op.getEncodingData());
79   case BitCodeAbbrevOp::VBR:
80     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
82   case BitCodeAbbrevOp::Char6:
83     if (Expected<unsigned> Res = Cursor.Read(6))
84       return BitCodeAbbrevOp::DecodeChar6(Res.get());
85     else
86       return Res.takeError();
87   }
88   llvm_unreachable("invalid abbreviation encoding");
89 }
90 
91 static Error skipAbbreviatedField(BitstreamCursor &Cursor,
92                                   const BitCodeAbbrevOp &Op) {
93   assert(!Op.isLiteral() && "Not to be used with literals!");
94 
95   // Decode the value as we are commanded.
96   switch (Op.getEncoding()) {
97   case BitCodeAbbrevOp::Array:
98   case BitCodeAbbrevOp::Blob:
99     llvm_unreachable("Should not reach here");
100   case BitCodeAbbrevOp::Fixed:
101     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
102     if (Expected<unsigned> Res = Cursor.Read((unsigned)Op.getEncodingData()))
103       break;
104     else
105       return Res.takeError();
106   case BitCodeAbbrevOp::VBR:
107     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
108     if (Expected<uint64_t> Res =
109             Cursor.ReadVBR64((unsigned)Op.getEncodingData()))
110       break;
111     else
112       return Res.takeError();
113   case BitCodeAbbrevOp::Char6:
114     if (Expected<unsigned> Res = Cursor.Read(6))
115       break;
116     else
117       return Res.takeError();
118   }
119   return ErrorSuccess();
120 }
121 
122 /// skipRecord - Read the current record and discard it.
123 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
124   // Skip unabbreviated records by reading past their entries.
125   if (AbbrevID == bitc::UNABBREV_RECORD) {
126     Expected<uint32_t> MaybeCode = ReadVBR(6);
127     if (!MaybeCode)
128       return MaybeCode.takeError();
129     unsigned Code = MaybeCode.get();
130     Expected<uint32_t> MaybeVBR = ReadVBR(6);
131     if (!MaybeVBR)
132       return MaybeVBR.get();
133     unsigned NumElts = MaybeVBR.get();
134     for (unsigned i = 0; i != NumElts; ++i)
135       if (Expected<uint64_t> Res = ReadVBR64(6))
136         ; // Skip!
137       else
138         return Res.takeError();
139     return Code;
140   }
141 
142   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
143   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
144   unsigned Code;
145   if (CodeOp.isLiteral())
146     Code = CodeOp.getLiteralValue();
147   else {
148     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
149         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
150       return llvm::createStringError(
151           std::errc::illegal_byte_sequence,
152           "Abbreviation starts with an Array or a Blob");
153     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
154     if (!MaybeCode)
155       return MaybeCode.takeError();
156     Code = MaybeCode.get();
157   }
158 
159   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
160     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
161     if (Op.isLiteral())
162       continue;
163 
164     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
165         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
166       if (Error Err = skipAbbreviatedField(*this, Op))
167         return std::move(Err);
168       continue;
169     }
170 
171     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
172       // Array case.  Read the number of elements as a vbr6.
173       Expected<uint32_t> MaybeNum = ReadVBR(6);
174       if (!MaybeNum)
175         return MaybeNum.takeError();
176       unsigned NumElts = MaybeNum.get();
177 
178       // Get the element encoding.
179       assert(i+2 == e && "array op not second to last?");
180       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
181 
182       // Read all the elements.
183       // Decode the value as we are commanded.
184       switch (EltEnc.getEncoding()) {
185       default:
186         report_fatal_error("Array element type can't be an Array or a Blob");
187       case BitCodeAbbrevOp::Fixed:
188         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
189         if (Error Err = JumpToBit(GetCurrentBitNo() +
190                                   NumElts * EltEnc.getEncodingData()))
191           return std::move(Err);
192         break;
193       case BitCodeAbbrevOp::VBR:
194         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
195         for (; NumElts; --NumElts)
196           if (Expected<uint64_t> Res =
197                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
198             ; // Skip!
199           else
200             return Res.takeError();
201         break;
202       case BitCodeAbbrevOp::Char6:
203         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
204           return std::move(Err);
205         break;
206       }
207       continue;
208     }
209 
210     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
211     // Blob case.  Read the number of bytes as a vbr6.
212     Expected<uint32_t> MaybeNum = ReadVBR(6);
213     if (!MaybeNum)
214       return MaybeNum.takeError();
215     unsigned NumElts = MaybeNum.get();
216     SkipToFourByteBoundary();  // 32-bit alignment
217 
218     // Figure out where the end of this blob will be including tail padding.
219     size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
220 
221     // If this would read off the end of the bitcode file, just set the
222     // record to empty and return.
223     if (!canSkipToPos(NewEnd/8)) {
224       skipToEnd();
225       break;
226     }
227 
228     // Skip over the blob.
229     if (Error Err = JumpToBit(NewEnd))
230       return std::move(Err);
231   }
232   return Code;
233 }
234 
235 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
236                                                SmallVectorImpl<uint64_t> &Vals,
237                                                StringRef *Blob) {
238   if (AbbrevID == bitc::UNABBREV_RECORD) {
239     Expected<uint32_t> MaybeCode = ReadVBR(6);
240     if (!MaybeCode)
241       return MaybeCode.takeError();
242     uint32_t Code = MaybeCode.get();
243     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
244     if (!MaybeNumElts)
245       return MaybeNumElts.takeError();
246     uint32_t NumElts = MaybeNumElts.get();
247 
248     for (unsigned i = 0; i != NumElts; ++i)
249       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
250         Vals.push_back(MaybeVal.get());
251       else
252         return MaybeVal.takeError();
253     return Code;
254   }
255 
256   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
257 
258   // Read the record code first.
259   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
260   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
261   unsigned Code;
262   if (CodeOp.isLiteral())
263     Code = CodeOp.getLiteralValue();
264   else {
265     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
266         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
267       report_fatal_error("Abbreviation starts with an Array or a Blob");
268     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
269       Code = MaybeCode.get();
270     else
271       return MaybeCode.takeError();
272   }
273 
274   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
275     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
276     if (Op.isLiteral()) {
277       Vals.push_back(Op.getLiteralValue());
278       continue;
279     }
280 
281     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
282         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
283       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
284         Vals.push_back(MaybeVal.get());
285       else
286         return MaybeVal.takeError();
287       continue;
288     }
289 
290     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
291       // Array case.  Read the number of elements as a vbr6.
292       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
293       if (!MaybeNumElts)
294         return MaybeNumElts.takeError();
295       uint32_t NumElts = MaybeNumElts.get();
296 
297       // Get the element encoding.
298       if (i + 2 != e)
299         report_fatal_error("Array op not second to last");
300       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
301       if (!EltEnc.isEncoding())
302         report_fatal_error(
303             "Array element type has to be an encoding of a type");
304 
305       // Read all the elements.
306       switch (EltEnc.getEncoding()) {
307       default:
308         report_fatal_error("Array element type can't be an Array or a Blob");
309       case BitCodeAbbrevOp::Fixed:
310         for (; NumElts; --NumElts)
311           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
312                   Read((unsigned)EltEnc.getEncodingData()))
313             Vals.push_back(MaybeVal.get());
314           else
315             return MaybeVal.takeError();
316         break;
317       case BitCodeAbbrevOp::VBR:
318         for (; NumElts; --NumElts)
319           if (Expected<uint64_t> MaybeVal =
320                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
321             Vals.push_back(MaybeVal.get());
322           else
323             return MaybeVal.takeError();
324         break;
325       case BitCodeAbbrevOp::Char6:
326         for (; NumElts; --NumElts)
327           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
328             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
329           else
330             return MaybeVal.takeError();
331       }
332       continue;
333     }
334 
335     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
336     // Blob case.  Read the number of bytes as a vbr6.
337     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
338     if (!MaybeNumElts)
339       return MaybeNumElts.takeError();
340     uint32_t NumElts = MaybeNumElts.get();
341     SkipToFourByteBoundary();  // 32-bit alignment
342 
343     // Figure out where the end of this blob will be including tail padding.
344     size_t CurBitPos = GetCurrentBitNo();
345     size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
346 
347     // If this would read off the end of the bitcode file, just set the
348     // record to empty and return.
349     if (!canSkipToPos(NewEnd/8)) {
350       Vals.append(NumElts, 0);
351       skipToEnd();
352       break;
353     }
354 
355     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
356     // over tail padding first, in case jumping to NewEnd invalidates the Blob
357     // pointer.
358     if (Error Err = JumpToBit(NewEnd))
359       return std::move(Err);
360     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
361 
362     // If we can return a reference to the data, do so to avoid copying it.
363     if (Blob) {
364       *Blob = StringRef(Ptr, NumElts);
365     } else {
366       // Otherwise, unpack into Vals with zero extension.
367       for (; NumElts; --NumElts)
368         Vals.push_back((unsigned char)*Ptr++);
369     }
370   }
371 
372   return Code;
373 }
374 
375 Error BitstreamCursor::ReadAbbrevRecord() {
376   auto Abbv = std::make_shared<BitCodeAbbrev>();
377   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
378   if (!MaybeNumOpInfo)
379     return MaybeNumOpInfo.takeError();
380   unsigned NumOpInfo = MaybeNumOpInfo.get();
381   for (unsigned i = 0; i != NumOpInfo; ++i) {
382     Expected<word_t> MaybeIsLiteral = Read(1);
383     if (!MaybeIsLiteral)
384       return MaybeIsLiteral.takeError();
385     bool IsLiteral = MaybeIsLiteral.get();
386     if (IsLiteral) {
387       Expected<uint64_t> MaybeOp = ReadVBR64(8);
388       if (!MaybeOp)
389         return MaybeOp.takeError();
390       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
391       continue;
392     }
393 
394     Expected<word_t> MaybeEncoding = Read(3);
395     if (!MaybeEncoding)
396       return MaybeEncoding.takeError();
397     BitCodeAbbrevOp::Encoding E =
398         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
399     if (BitCodeAbbrevOp::hasEncodingData(E)) {
400       Expected<uint64_t> MaybeData = ReadVBR64(5);
401       if (!MaybeData)
402         return MaybeData.takeError();
403       uint64_t Data = MaybeData.get();
404 
405       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
406       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
407       // a slow path in Read() to have to handle reading zero bits.
408       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
409           Data == 0) {
410         Abbv->Add(BitCodeAbbrevOp(0));
411         continue;
412       }
413 
414       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
415           Data > MaxChunkSize)
416         report_fatal_error(
417             "Fixed or VBR abbrev record with size > MaxChunkData");
418 
419       Abbv->Add(BitCodeAbbrevOp(E, Data));
420     } else
421       Abbv->Add(BitCodeAbbrevOp(E));
422   }
423 
424   if (Abbv->getNumOperandInfos() == 0)
425     report_fatal_error("Abbrev record with no operands");
426   CurAbbrevs.push_back(std::move(Abbv));
427 
428   return Error::success();
429 }
430 
431 Expected<Optional<BitstreamBlockInfo>>
432 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
433   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
434     return std::move(Err);
435 
436   BitstreamBlockInfo NewBlockInfo;
437 
438   SmallVector<uint64_t, 64> Record;
439   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
440 
441   // Read all the records for this module.
442   while (true) {
443     Expected<BitstreamEntry> MaybeEntry =
444         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
445     if (!MaybeEntry)
446       return MaybeEntry.takeError();
447     BitstreamEntry Entry = MaybeEntry.get();
448 
449     switch (Entry.Kind) {
450     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
451     case llvm::BitstreamEntry::Error:
452       return None;
453     case llvm::BitstreamEntry::EndBlock:
454       return std::move(NewBlockInfo);
455     case llvm::BitstreamEntry::Record:
456       // The interesting case.
457       break;
458     }
459 
460     // Read abbrev records, associate them with CurBID.
461     if (Entry.ID == bitc::DEFINE_ABBREV) {
462       if (!CurBlockInfo) return None;
463       if (Error Err = ReadAbbrevRecord())
464         return std::move(Err);
465 
466       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
467       // appropriate BlockInfo.
468       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
469       CurAbbrevs.pop_back();
470       continue;
471     }
472 
473     // Read a record.
474     Record.clear();
475     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
476     if (!MaybeBlockInfo)
477       return MaybeBlockInfo.takeError();
478     switch (MaybeBlockInfo.get()) {
479     default:
480       break; // Default behavior, ignore unknown content.
481     case bitc::BLOCKINFO_CODE_SETBID:
482       if (Record.size() < 1)
483         return None;
484       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
485       break;
486     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
487       if (!CurBlockInfo)
488         return None;
489       if (!ReadBlockInfoNames)
490         break; // Ignore name.
491       std::string Name;
492       for (unsigned i = 0, e = Record.size(); i != e; ++i)
493         Name += (char)Record[i];
494       CurBlockInfo->Name = Name;
495       break;
496     }
497       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
498         if (!CurBlockInfo) return None;
499         if (!ReadBlockInfoNames)
500           break; // Ignore name.
501         std::string Name;
502         for (unsigned i = 1, e = Record.size(); i != e; ++i)
503           Name += (char)Record[i];
504         CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
505                                                            Name));
506         break;
507       }
508       }
509   }
510 }
511