xref: /freebsd/contrib/llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision 6e75b2fbf9a03e6876e0a3c089e0b3ad71876125)
1  //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/Bitstream/BitstreamReader.h"
10  #include "llvm/ADT/StringRef.h"
11  #include <cassert>
12  #include <string>
13  
14  using namespace llvm;
15  
16  //===----------------------------------------------------------------------===//
17  //  BitstreamCursor implementation
18  //===----------------------------------------------------------------------===//
19  
20  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
21  Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22    // Save the current block's state on BlockScope.
23    BlockScope.push_back(Block(CurCodeSize));
24    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25  
26    // Add the abbrevs specific to this block to the CurAbbrevs list.
27    if (BlockInfo) {
28      if (const BitstreamBlockInfo::BlockInfo *Info =
29              BlockInfo->getBlockInfo(BlockID)) {
30        llvm::append_range(CurAbbrevs, Info->Abbrevs);
31      }
32    }
33  
34    // Get the codesize of this block.
35    Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
36    if (!MaybeVBR)
37      return MaybeVBR.takeError();
38    CurCodeSize = MaybeVBR.get();
39  
40    if (CurCodeSize > MaxChunkSize)
41      return llvm::createStringError(
42          std::errc::illegal_byte_sequence,
43          "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
44          CurCodeSize);
45  
46    SkipToFourByteBoundary();
47    Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
48    if (!MaybeNum)
49      return MaybeNum.takeError();
50    word_t NumWords = MaybeNum.get();
51    if (NumWordsP)
52      *NumWordsP = NumWords;
53  
54    if (CurCodeSize == 0)
55      return llvm::createStringError(
56          std::errc::illegal_byte_sequence,
57          "can't enter sub-block: current code size is 0");
58    if (AtEndOfStream())
59      return llvm::createStringError(
60          std::errc::illegal_byte_sequence,
61          "can't enter sub block: already at end of stream");
62  
63    return Error::success();
64  }
65  
66  static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
67                                                 const BitCodeAbbrevOp &Op) {
68    assert(!Op.isLiteral() && "Not to be used with literals!");
69  
70    // Decode the value as we are commanded.
71    switch (Op.getEncoding()) {
72    case BitCodeAbbrevOp::Array:
73    case BitCodeAbbrevOp::Blob:
74      llvm_unreachable("Should not reach here");
75    case BitCodeAbbrevOp::Fixed:
76      assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
77      return Cursor.Read((unsigned)Op.getEncodingData());
78    case BitCodeAbbrevOp::VBR:
79      assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
80      return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
81    case BitCodeAbbrevOp::Char6:
82      if (Expected<unsigned> Res = Cursor.Read(6))
83        return BitCodeAbbrevOp::DecodeChar6(Res.get());
84      else
85        return Res.takeError();
86    }
87    llvm_unreachable("invalid abbreviation encoding");
88  }
89  
90  /// skipRecord - Read the current record and discard it.
91  Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
92    // Skip unabbreviated records by reading past their entries.
93    if (AbbrevID == bitc::UNABBREV_RECORD) {
94      Expected<uint32_t> MaybeCode = ReadVBR(6);
95      if (!MaybeCode)
96        return MaybeCode.takeError();
97      unsigned Code = MaybeCode.get();
98      Expected<uint32_t> MaybeVBR = ReadVBR(6);
99      if (!MaybeVBR)
100        return MaybeVBR.get();
101      unsigned NumElts = MaybeVBR.get();
102      for (unsigned i = 0; i != NumElts; ++i)
103        if (Expected<uint64_t> Res = ReadVBR64(6))
104          ; // Skip!
105        else
106          return Res.takeError();
107      return Code;
108    }
109  
110    const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
111    const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
112    unsigned Code;
113    if (CodeOp.isLiteral())
114      Code = CodeOp.getLiteralValue();
115    else {
116      if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
117          CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
118        return llvm::createStringError(
119            std::errc::illegal_byte_sequence,
120            "Abbreviation starts with an Array or a Blob");
121      Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
122      if (!MaybeCode)
123        return MaybeCode.takeError();
124      Code = MaybeCode.get();
125    }
126  
127    for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
128      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
129      if (Op.isLiteral())
130        continue;
131  
132      if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
133          Op.getEncoding() != BitCodeAbbrevOp::Blob) {
134        if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
135          continue;
136        else
137          return MaybeField.takeError();
138      }
139  
140      if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
141        // Array case.  Read the number of elements as a vbr6.
142        Expected<uint32_t> MaybeNum = ReadVBR(6);
143        if (!MaybeNum)
144          return MaybeNum.takeError();
145        unsigned NumElts = MaybeNum.get();
146  
147        // Get the element encoding.
148        assert(i+2 == e && "array op not second to last?");
149        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
150  
151        // Read all the elements.
152        // Decode the value as we are commanded.
153        switch (EltEnc.getEncoding()) {
154        default:
155          report_fatal_error("Array element type can't be an Array or a Blob");
156        case BitCodeAbbrevOp::Fixed:
157          assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
158          if (Error Err =
159                  JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
160                                                    EltEnc.getEncodingData()))
161            return std::move(Err);
162          break;
163        case BitCodeAbbrevOp::VBR:
164          assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
165          for (; NumElts; --NumElts)
166            if (Expected<uint64_t> Res =
167                    ReadVBR64((unsigned)EltEnc.getEncodingData()))
168              ; // Skip!
169            else
170              return Res.takeError();
171          break;
172        case BitCodeAbbrevOp::Char6:
173          if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
174            return std::move(Err);
175          break;
176        }
177        continue;
178      }
179  
180      assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
181      // Blob case.  Read the number of bytes as a vbr6.
182      Expected<uint32_t> MaybeNum = ReadVBR(6);
183      if (!MaybeNum)
184        return MaybeNum.takeError();
185      unsigned NumElts = MaybeNum.get();
186      SkipToFourByteBoundary();  // 32-bit alignment
187  
188      // Figure out where the end of this blob will be including tail padding.
189      const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
190  
191      // If this would read off the end of the bitcode file, just set the
192      // record to empty and return.
193      if (!canSkipToPos(NewEnd/8)) {
194        skipToEnd();
195        break;
196      }
197  
198      // Skip over the blob.
199      if (Error Err = JumpToBit(NewEnd))
200        return std::move(Err);
201    }
202    return Code;
203  }
204  
205  Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
206                                                 SmallVectorImpl<uint64_t> &Vals,
207                                                 StringRef *Blob) {
208    if (AbbrevID == bitc::UNABBREV_RECORD) {
209      Expected<uint32_t> MaybeCode = ReadVBR(6);
210      if (!MaybeCode)
211        return MaybeCode.takeError();
212      uint32_t Code = MaybeCode.get();
213      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
214      if (!MaybeNumElts)
215        return MaybeNumElts.takeError();
216      uint32_t NumElts = MaybeNumElts.get();
217      Vals.reserve(Vals.size() + NumElts);
218  
219      for (unsigned i = 0; i != NumElts; ++i)
220        if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
221          Vals.push_back(MaybeVal.get());
222        else
223          return MaybeVal.takeError();
224      return Code;
225    }
226  
227    const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
228  
229    // Read the record code first.
230    assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
231    const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
232    unsigned Code;
233    if (CodeOp.isLiteral())
234      Code = CodeOp.getLiteralValue();
235    else {
236      if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
237          CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
238        report_fatal_error("Abbreviation starts with an Array or a Blob");
239      if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
240        Code = MaybeCode.get();
241      else
242        return MaybeCode.takeError();
243    }
244  
245    for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
246      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
247      if (Op.isLiteral()) {
248        Vals.push_back(Op.getLiteralValue());
249        continue;
250      }
251  
252      if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
253          Op.getEncoding() != BitCodeAbbrevOp::Blob) {
254        if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
255          Vals.push_back(MaybeVal.get());
256        else
257          return MaybeVal.takeError();
258        continue;
259      }
260  
261      if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
262        // Array case.  Read the number of elements as a vbr6.
263        Expected<uint32_t> MaybeNumElts = ReadVBR(6);
264        if (!MaybeNumElts)
265          return MaybeNumElts.takeError();
266        uint32_t NumElts = MaybeNumElts.get();
267        Vals.reserve(Vals.size() + NumElts);
268  
269        // Get the element encoding.
270        if (i + 2 != e)
271          report_fatal_error("Array op not second to last");
272        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
273        if (!EltEnc.isEncoding())
274          report_fatal_error(
275              "Array element type has to be an encoding of a type");
276  
277        // Read all the elements.
278        switch (EltEnc.getEncoding()) {
279        default:
280          report_fatal_error("Array element type can't be an Array or a Blob");
281        case BitCodeAbbrevOp::Fixed:
282          for (; NumElts; --NumElts)
283            if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
284                    Read((unsigned)EltEnc.getEncodingData()))
285              Vals.push_back(MaybeVal.get());
286            else
287              return MaybeVal.takeError();
288          break;
289        case BitCodeAbbrevOp::VBR:
290          for (; NumElts; --NumElts)
291            if (Expected<uint64_t> MaybeVal =
292                    ReadVBR64((unsigned)EltEnc.getEncodingData()))
293              Vals.push_back(MaybeVal.get());
294            else
295              return MaybeVal.takeError();
296          break;
297        case BitCodeAbbrevOp::Char6:
298          for (; NumElts; --NumElts)
299            if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
300              Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
301            else
302              return MaybeVal.takeError();
303        }
304        continue;
305      }
306  
307      assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
308      // Blob case.  Read the number of bytes as a vbr6.
309      Expected<uint32_t> MaybeNumElts = ReadVBR(6);
310      if (!MaybeNumElts)
311        return MaybeNumElts.takeError();
312      uint32_t NumElts = MaybeNumElts.get();
313      SkipToFourByteBoundary();  // 32-bit alignment
314  
315      // Figure out where the end of this blob will be including tail padding.
316      size_t CurBitPos = GetCurrentBitNo();
317      const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
318  
319      // If this would read off the end of the bitcode file, just set the
320      // record to empty and return.
321      if (!canSkipToPos(NewEnd/8)) {
322        Vals.append(NumElts, 0);
323        skipToEnd();
324        break;
325      }
326  
327      // Otherwise, inform the streamer that we need these bytes in memory.  Skip
328      // over tail padding first, in case jumping to NewEnd invalidates the Blob
329      // pointer.
330      if (Error Err = JumpToBit(NewEnd))
331        return std::move(Err);
332      const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
333  
334      // If we can return a reference to the data, do so to avoid copying it.
335      if (Blob) {
336        *Blob = StringRef(Ptr, NumElts);
337      } else {
338        // Otherwise, unpack into Vals with zero extension.
339        auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
340        Vals.append(UPtr, UPtr + NumElts);
341      }
342    }
343  
344    return Code;
345  }
346  
347  Error BitstreamCursor::ReadAbbrevRecord() {
348    auto Abbv = std::make_shared<BitCodeAbbrev>();
349    Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
350    if (!MaybeNumOpInfo)
351      return MaybeNumOpInfo.takeError();
352    unsigned NumOpInfo = MaybeNumOpInfo.get();
353    for (unsigned i = 0; i != NumOpInfo; ++i) {
354      Expected<word_t> MaybeIsLiteral = Read(1);
355      if (!MaybeIsLiteral)
356        return MaybeIsLiteral.takeError();
357      bool IsLiteral = MaybeIsLiteral.get();
358      if (IsLiteral) {
359        Expected<uint64_t> MaybeOp = ReadVBR64(8);
360        if (!MaybeOp)
361          return MaybeOp.takeError();
362        Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
363        continue;
364      }
365  
366      Expected<word_t> MaybeEncoding = Read(3);
367      if (!MaybeEncoding)
368        return MaybeEncoding.takeError();
369      BitCodeAbbrevOp::Encoding E =
370          (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
371      if (BitCodeAbbrevOp::hasEncodingData(E)) {
372        Expected<uint64_t> MaybeData = ReadVBR64(5);
373        if (!MaybeData)
374          return MaybeData.takeError();
375        uint64_t Data = MaybeData.get();
376  
377        // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
378        // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
379        // a slow path in Read() to have to handle reading zero bits.
380        if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
381            Data == 0) {
382          Abbv->Add(BitCodeAbbrevOp(0));
383          continue;
384        }
385  
386        if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
387            Data > MaxChunkSize)
388          report_fatal_error(
389              "Fixed or VBR abbrev record with size > MaxChunkData");
390  
391        Abbv->Add(BitCodeAbbrevOp(E, Data));
392      } else
393        Abbv->Add(BitCodeAbbrevOp(E));
394    }
395  
396    if (Abbv->getNumOperandInfos() == 0)
397      report_fatal_error("Abbrev record with no operands");
398    CurAbbrevs.push_back(std::move(Abbv));
399  
400    return Error::success();
401  }
402  
403  Expected<Optional<BitstreamBlockInfo>>
404  BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
405    if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
406      return std::move(Err);
407  
408    BitstreamBlockInfo NewBlockInfo;
409  
410    SmallVector<uint64_t, 64> Record;
411    BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
412  
413    // Read all the records for this module.
414    while (true) {
415      Expected<BitstreamEntry> MaybeEntry =
416          advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
417      if (!MaybeEntry)
418        return MaybeEntry.takeError();
419      BitstreamEntry Entry = MaybeEntry.get();
420  
421      switch (Entry.Kind) {
422      case llvm::BitstreamEntry::SubBlock: // Handled for us already.
423      case llvm::BitstreamEntry::Error:
424        return None;
425      case llvm::BitstreamEntry::EndBlock:
426        return std::move(NewBlockInfo);
427      case llvm::BitstreamEntry::Record:
428        // The interesting case.
429        break;
430      }
431  
432      // Read abbrev records, associate them with CurBID.
433      if (Entry.ID == bitc::DEFINE_ABBREV) {
434        if (!CurBlockInfo) return None;
435        if (Error Err = ReadAbbrevRecord())
436          return std::move(Err);
437  
438        // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
439        // appropriate BlockInfo.
440        CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
441        CurAbbrevs.pop_back();
442        continue;
443      }
444  
445      // Read a record.
446      Record.clear();
447      Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
448      if (!MaybeBlockInfo)
449        return MaybeBlockInfo.takeError();
450      switch (MaybeBlockInfo.get()) {
451      default:
452        break; // Default behavior, ignore unknown content.
453      case bitc::BLOCKINFO_CODE_SETBID:
454        if (Record.size() < 1)
455          return None;
456        CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
457        break;
458      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
459        if (!CurBlockInfo)
460          return None;
461        if (!ReadBlockInfoNames)
462          break; // Ignore name.
463        CurBlockInfo->Name = std::string(Record.begin(), Record.end());
464        break;
465      }
466        case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
467          if (!CurBlockInfo) return None;
468          if (!ReadBlockInfoNames)
469            break; // Ignore name.
470          CurBlockInfo->RecordNames.emplace_back(
471              (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
472          break;
473        }
474        }
475    }
476  }
477