xref: /freebsd/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision d14c38ceb8aa10bd94913d0456ec0f726693379b)
1  //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/Bitstream/BitstreamReader.h"
10  #include "llvm/ADT/StringRef.h"
11  #include <cassert>
12  #include <optional>
13  #include <string>
14  
15  using namespace llvm;
16  
17  //===----------------------------------------------------------------------===//
18  //  BitstreamCursor implementation
19  //===----------------------------------------------------------------------===//
20  //
21  static Error error(const char *Message) {
22    return createStringError(std::errc::illegal_byte_sequence, Message);
23  }
24  
25  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
26  Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
27    // Save the current block's state on BlockScope.
28    BlockScope.push_back(Block(CurCodeSize));
29    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
30  
31    // Add the abbrevs specific to this block to the CurAbbrevs list.
32    if (BlockInfo) {
33      if (const BitstreamBlockInfo::BlockInfo *Info =
34              BlockInfo->getBlockInfo(BlockID)) {
35        llvm::append_range(CurAbbrevs, Info->Abbrevs);
36      }
37    }
38  
39    // Get the codesize of this block.
40    Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
41    if (!MaybeVBR)
42      return MaybeVBR.takeError();
43    CurCodeSize = MaybeVBR.get();
44  
45    if (CurCodeSize > MaxChunkSize)
46      return llvm::createStringError(
47          std::errc::illegal_byte_sequence,
48          "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
49          CurCodeSize);
50  
51    SkipToFourByteBoundary();
52    Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
53    if (!MaybeNum)
54      return MaybeNum.takeError();
55    word_t NumWords = MaybeNum.get();
56    if (NumWordsP)
57      *NumWordsP = NumWords;
58  
59    if (CurCodeSize == 0)
60      return llvm::createStringError(
61          std::errc::illegal_byte_sequence,
62          "can't enter sub-block: current code size is 0");
63    if (AtEndOfStream())
64      return llvm::createStringError(
65          std::errc::illegal_byte_sequence,
66          "can't enter sub block: already at end of stream");
67  
68    return Error::success();
69  }
70  
71  static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
72                                                 const BitCodeAbbrevOp &Op) {
73    assert(!Op.isLiteral() && "Not to be used with literals!");
74  
75    // Decode the value as we are commanded.
76    switch (Op.getEncoding()) {
77    case BitCodeAbbrevOp::Array:
78    case BitCodeAbbrevOp::Blob:
79      llvm_unreachable("Should not reach here");
80    case BitCodeAbbrevOp::Fixed:
81      assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
82      return Cursor.Read((unsigned)Op.getEncodingData());
83    case BitCodeAbbrevOp::VBR:
84      assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
85      return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
86    case BitCodeAbbrevOp::Char6:
87      if (Expected<unsigned> Res = Cursor.Read(6))
88        return BitCodeAbbrevOp::DecodeChar6(Res.get());
89      else
90        return Res.takeError();
91    }
92    llvm_unreachable("invalid abbreviation encoding");
93  }
94  
95  /// skipRecord - Read the current record and discard it.
96  Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
97    // Skip unabbreviated records by reading past their entries.
98    if (AbbrevID == bitc::UNABBREV_RECORD) {
99      Expected<uint32_t> MaybeCode = ReadVBR(6);
100      if (!MaybeCode)
101        return MaybeCode.takeError();
102      unsigned Code = MaybeCode.get();
103      Expected<uint32_t> MaybeVBR = ReadVBR(6);
104      if (!MaybeVBR)
105        return MaybeVBR.takeError();
106      unsigned NumElts = MaybeVBR.get();
107      for (unsigned i = 0; i != NumElts; ++i)
108        if (Expected<uint64_t> Res = ReadVBR64(6))
109          ; // Skip!
110        else
111          return Res.takeError();
112      return Code;
113    }
114  
115    Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
116    if (!MaybeAbbv)
117      return MaybeAbbv.takeError();
118  
119    const BitCodeAbbrev *Abbv = MaybeAbbv.get();
120    const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
121    unsigned Code;
122    if (CodeOp.isLiteral())
123      Code = CodeOp.getLiteralValue();
124    else {
125      if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
126          CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
127        return llvm::createStringError(
128            std::errc::illegal_byte_sequence,
129            "Abbreviation starts with an Array or a Blob");
130      Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
131      if (!MaybeCode)
132        return MaybeCode.takeError();
133      Code = MaybeCode.get();
134    }
135  
136    for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
137      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
138      if (Op.isLiteral())
139        continue;
140  
141      if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
142          Op.getEncoding() != BitCodeAbbrevOp::Blob) {
143        if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
144          continue;
145        else
146          return MaybeField.takeError();
147      }
148  
149      if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
150        // Array case.  Read the number of elements as a vbr6.
151        Expected<uint32_t> MaybeNum = ReadVBR(6);
152        if (!MaybeNum)
153          return MaybeNum.takeError();
154        unsigned NumElts = MaybeNum.get();
155  
156        // Get the element encoding.
157        assert(i+2 == e && "array op not second to last?");
158        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
159  
160        // Read all the elements.
161        // Decode the value as we are commanded.
162        switch (EltEnc.getEncoding()) {
163        default:
164          return error("Array element type can't be an Array or a Blob");
165        case BitCodeAbbrevOp::Fixed:
166          assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
167          if (Error Err =
168                  JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
169                                                    EltEnc.getEncodingData()))
170            return Err;
171          break;
172        case BitCodeAbbrevOp::VBR:
173          assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
174          for (; NumElts; --NumElts)
175            if (Expected<uint64_t> Res =
176                    ReadVBR64((unsigned)EltEnc.getEncodingData()))
177              ; // Skip!
178            else
179              return Res.takeError();
180          break;
181        case BitCodeAbbrevOp::Char6:
182          if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
183            return Err;
184          break;
185        }
186        continue;
187      }
188  
189      assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
190      // Blob case.  Read the number of bytes as a vbr6.
191      Expected<uint32_t> MaybeNum = ReadVBR(6);
192      if (!MaybeNum)
193        return MaybeNum.takeError();
194      unsigned NumElts = MaybeNum.get();
195      SkipToFourByteBoundary();  // 32-bit alignment
196  
197      // Figure out where the end of this blob will be including tail padding.
198      const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
199  
200      // If this would read off the end of the bitcode file, just set the
201      // record to empty and return.
202      if (!canSkipToPos(NewEnd/8)) {
203        skipToEnd();
204        break;
205      }
206  
207      // Skip over the blob.
208      if (Error Err = JumpToBit(NewEnd))
209        return Err;
210    }
211    return Code;
212  }
213  
214  Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
215                                                 SmallVectorImpl<uint64_t> &Vals,
216                                                 StringRef *Blob) {
217    if (AbbrevID == bitc::UNABBREV_RECORD) {
218      Expected<uint32_t> MaybeCode = ReadVBR(6);
219      if (!MaybeCode)
220        return MaybeCode.takeError();
221      uint32_t Code = MaybeCode.get();
222      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
223      if (!MaybeNumElts)
224        return error(
225            ("Failed to read size: " + toString(MaybeNumElts.takeError()))
226                .c_str());
227      uint32_t NumElts = MaybeNumElts.get();
228      if (!isSizePlausible(NumElts))
229        return error("Size is not plausible");
230      Vals.reserve(Vals.size() + NumElts);
231  
232      for (unsigned i = 0; i != NumElts; ++i)
233        if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
234          Vals.push_back(MaybeVal.get());
235        else
236          return MaybeVal.takeError();
237      return Code;
238    }
239  
240    Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
241    if (!MaybeAbbv)
242      return MaybeAbbv.takeError();
243    const BitCodeAbbrev *Abbv = MaybeAbbv.get();
244  
245    // Read the record code first.
246    assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
247    const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
248    unsigned Code;
249    if (CodeOp.isLiteral())
250      Code = CodeOp.getLiteralValue();
251    else {
252      if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
253          CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
254        return error("Abbreviation starts with an Array or a Blob");
255      if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
256        Code = MaybeCode.get();
257      else
258        return MaybeCode.takeError();
259    }
260  
261    for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
262      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
263      if (Op.isLiteral()) {
264        Vals.push_back(Op.getLiteralValue());
265        continue;
266      }
267  
268      if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
269          Op.getEncoding() != BitCodeAbbrevOp::Blob) {
270        if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
271          Vals.push_back(MaybeVal.get());
272        else
273          return MaybeVal.takeError();
274        continue;
275      }
276  
277      if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
278        // Array case.  Read the number of elements as a vbr6.
279        Expected<uint32_t> MaybeNumElts = ReadVBR(6);
280        if (!MaybeNumElts)
281          return error(
282              ("Failed to read size: " + toString(MaybeNumElts.takeError()))
283                  .c_str());
284        uint32_t NumElts = MaybeNumElts.get();
285        if (!isSizePlausible(NumElts))
286          return error("Size is not plausible");
287        Vals.reserve(Vals.size() + NumElts);
288  
289        // Get the element encoding.
290        if (i + 2 != e)
291          return error("Array op not second to last");
292        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
293        if (!EltEnc.isEncoding())
294          return error(
295              "Array element type has to be an encoding of a type");
296  
297        // Read all the elements.
298        switch (EltEnc.getEncoding()) {
299        default:
300          return error("Array element type can't be an Array or a Blob");
301        case BitCodeAbbrevOp::Fixed:
302          for (; NumElts; --NumElts)
303            if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
304                    Read((unsigned)EltEnc.getEncodingData()))
305              Vals.push_back(MaybeVal.get());
306            else
307              return MaybeVal.takeError();
308          break;
309        case BitCodeAbbrevOp::VBR:
310          for (; NumElts; --NumElts)
311            if (Expected<uint64_t> MaybeVal =
312                    ReadVBR64((unsigned)EltEnc.getEncodingData()))
313              Vals.push_back(MaybeVal.get());
314            else
315              return MaybeVal.takeError();
316          break;
317        case BitCodeAbbrevOp::Char6:
318          for (; NumElts; --NumElts)
319            if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
320              Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
321            else
322              return MaybeVal.takeError();
323        }
324        continue;
325      }
326  
327      assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
328      // Blob case.  Read the number of bytes as a vbr6.
329      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
330      if (!MaybeNumElts)
331        return MaybeNumElts.takeError();
332      uint32_t NumElts = MaybeNumElts.get();
333      SkipToFourByteBoundary();  // 32-bit alignment
334  
335      // Figure out where the end of this blob will be including tail padding.
336      size_t CurBitPos = GetCurrentBitNo();
337      const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
338  
339      // Make sure the bitstream is large enough to contain the blob.
340      if (!canSkipToPos(NewEnd/8))
341        return error("Blob ends too soon");
342  
343      // Otherwise, inform the streamer that we need these bytes in memory.  Skip
344      // over tail padding first, in case jumping to NewEnd invalidates the Blob
345      // pointer.
346      if (Error Err = JumpToBit(NewEnd))
347        return Err;
348      const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
349  
350      // If we can return a reference to the data, do so to avoid copying it.
351      if (Blob) {
352        *Blob = StringRef(Ptr, NumElts);
353      } else {
354        // Otherwise, unpack into Vals with zero extension.
355        auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
356        Vals.append(UPtr, UPtr + NumElts);
357      }
358    }
359  
360    return Code;
361  }
362  
363  Error BitstreamCursor::ReadAbbrevRecord() {
364    auto Abbv = std::make_shared<BitCodeAbbrev>();
365    Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
366    if (!MaybeNumOpInfo)
367      return MaybeNumOpInfo.takeError();
368    unsigned NumOpInfo = MaybeNumOpInfo.get();
369    for (unsigned i = 0; i != NumOpInfo; ++i) {
370      Expected<word_t> MaybeIsLiteral = Read(1);
371      if (!MaybeIsLiteral)
372        return MaybeIsLiteral.takeError();
373      bool IsLiteral = MaybeIsLiteral.get();
374      if (IsLiteral) {
375        Expected<uint64_t> MaybeOp = ReadVBR64(8);
376        if (!MaybeOp)
377          return MaybeOp.takeError();
378        Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
379        continue;
380      }
381  
382      Expected<word_t> MaybeEncoding = Read(3);
383      if (!MaybeEncoding)
384        return MaybeEncoding.takeError();
385      if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
386        return error("Invalid encoding");
387  
388      BitCodeAbbrevOp::Encoding E =
389          (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
390      if (BitCodeAbbrevOp::hasEncodingData(E)) {
391        Expected<uint64_t> MaybeData = ReadVBR64(5);
392        if (!MaybeData)
393          return MaybeData.takeError();
394        uint64_t Data = MaybeData.get();
395  
396        // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
397        // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
398        // a slow path in Read() to have to handle reading zero bits.
399        if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
400            Data == 0) {
401          Abbv->Add(BitCodeAbbrevOp(0));
402          continue;
403        }
404  
405        if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
406            Data > MaxChunkSize)
407          return error("Fixed or VBR abbrev record with size > MaxChunkData");
408  
409        Abbv->Add(BitCodeAbbrevOp(E, Data));
410      } else
411        Abbv->Add(BitCodeAbbrevOp(E));
412    }
413  
414    if (Abbv->getNumOperandInfos() == 0)
415      return error("Abbrev record with no operands");
416    CurAbbrevs.push_back(std::move(Abbv));
417  
418    return Error::success();
419  }
420  
421  Expected<std::optional<BitstreamBlockInfo>>
422  BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
423    if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
424      return Err;
425  
426    BitstreamBlockInfo NewBlockInfo;
427  
428    SmallVector<uint64_t, 64> Record;
429    BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
430  
431    // Read all the records for this module.
432    while (true) {
433      Expected<BitstreamEntry> MaybeEntry =
434          advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
435      if (!MaybeEntry)
436        return MaybeEntry.takeError();
437      BitstreamEntry Entry = MaybeEntry.get();
438  
439      switch (Entry.Kind) {
440      case llvm::BitstreamEntry::SubBlock: // Handled for us already.
441      case llvm::BitstreamEntry::Error:
442        return std::nullopt;
443      case llvm::BitstreamEntry::EndBlock:
444        return std::move(NewBlockInfo);
445      case llvm::BitstreamEntry::Record:
446        // The interesting case.
447        break;
448      }
449  
450      // Read abbrev records, associate them with CurBID.
451      if (Entry.ID == bitc::DEFINE_ABBREV) {
452        if (!CurBlockInfo)
453          return std::nullopt;
454        if (Error Err = ReadAbbrevRecord())
455          return Err;
456  
457        // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
458        // appropriate BlockInfo.
459        CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
460        CurAbbrevs.pop_back();
461        continue;
462      }
463  
464      // Read a record.
465      Record.clear();
466      Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
467      if (!MaybeBlockInfo)
468        return MaybeBlockInfo.takeError();
469      switch (MaybeBlockInfo.get()) {
470      default:
471        break; // Default behavior, ignore unknown content.
472      case bitc::BLOCKINFO_CODE_SETBID:
473        if (Record.size() < 1)
474          return std::nullopt;
475        CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
476        break;
477      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
478        if (!CurBlockInfo)
479          return std::nullopt;
480        if (!ReadBlockInfoNames)
481          break; // Ignore name.
482        CurBlockInfo->Name = std::string(Record.begin(), Record.end());
483        break;
484      }
485        case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
486        if (!CurBlockInfo)
487          return std::nullopt;
488        if (!ReadBlockInfoNames)
489          break; // Ignore name.
490        CurBlockInfo->RecordNames.emplace_back(
491            (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
492        break;
493        }
494        }
495    }
496  }
497