xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/DecoderEmitter.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // It contains the tablegen backend that emits the decoder functions for
10 // targets with fixed/variable length instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Common/CodeGenHwModes.h"
15 #include "Common/CodeGenInstruction.h"
16 #include "Common/CodeGenTarget.h"
17 #include "Common/InfoByHwMode.h"
18 #include "Common/VarLenCodeEmitterGen.h"
19 #include "TableGenBackends.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/CachedHashString.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/FormatVariadic.h"
36 #include "llvm/Support/FormattedStream.h"
37 #include "llvm/Support/LEB128.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/TableGen/Error.h"
41 #include "llvm/TableGen/Record.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <cstdint>
46 #include <map>
47 #include <memory>
48 #include <set>
49 #include <string>
50 #include <utility>
51 #include <vector>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "decoder-emitter"
56 
57 extern cl::OptionCategory DisassemblerEmitterCat;
58 
59 enum SuppressLevel {
60   SUPPRESSION_DISABLE,
61   SUPPRESSION_LEVEL1,
62   SUPPRESSION_LEVEL2
63 };
64 
65 static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
66     "suppress-per-hwmode-duplicates",
67     cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
68     cl::values(
69         clEnumValN(
70             SUPPRESSION_DISABLE, "O0",
71             "Do not prevent DecoderTable duplications caused by HwModes"),
72         clEnumValN(
73             SUPPRESSION_LEVEL1, "O1",
74             "Remove duplicate DecoderTable entries generated due to HwModes"),
75         clEnumValN(
76             SUPPRESSION_LEVEL2, "O2",
77             "Extract HwModes-specific instructions into new DecoderTables, "
78             "significantly reducing Table Duplications")),
79     cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
80 
81 static cl::opt<bool> LargeTable(
82     "large-decoder-table",
83     cl::desc("Use large decoder table format. This uses 24 bits for offset\n"
84              "in the table instead of the default 16 bits."),
85     cl::init(false), cl::cat(DisassemblerEmitterCat));
86 
87 static cl::opt<bool> UseFnTableInDecodeToMCInst(
88     "use-fn-table-in-decode-to-mcinst",
89     cl::desc(
90         "Use a table of function pointers instead of a switch case in the\n"
91         "generated `decodeToMCInst` function. Helps improve compile time\n"
92         "of the generated code."),
93     cl::init(false), cl::cat(DisassemblerEmitterCat));
94 
95 STATISTIC(NumEncodings, "Number of encodings considered");
96 STATISTIC(NumEncodingsLackingDisasm,
97           "Number of encodings without disassembler info");
98 STATISTIC(NumInstructions, "Number of instructions considered");
99 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
100 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
101 
102 static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
103 
104 namespace {
105 
106 struct EncodingField {
107   unsigned Base, Width, Offset;
108   EncodingField(unsigned B, unsigned W, unsigned O)
109       : Base(B), Width(W), Offset(O) {}
110 };
111 
112 struct OperandInfo {
113   std::vector<EncodingField> Fields;
114   std::string Decoder;
115   bool HasCompleteDecoder;
116   uint64_t InitValue = 0;
117 
118   OperandInfo(std::string D, bool HCD) : Decoder(D), HasCompleteDecoder(HCD) {}
119 
120   void addField(unsigned Base, unsigned Width, unsigned Offset) {
121     Fields.push_back(EncodingField(Base, Width, Offset));
122   }
123 
124   unsigned numFields() const { return Fields.size(); }
125 
126   typedef std::vector<EncodingField>::const_iterator const_iterator;
127 
128   const_iterator begin() const { return Fields.begin(); }
129   const_iterator end() const { return Fields.end(); }
130 };
131 
132 typedef std::vector<uint32_t> FixupList;
133 typedef std::vector<FixupList> FixupScopeList;
134 typedef SmallSetVector<CachedHashString, 16> PredicateSet;
135 typedef SmallSetVector<CachedHashString, 16> DecoderSet;
136 
137 class DecoderTable {
138 public:
139   DecoderTable() { Data.reserve(16384); }
140 
141   void clear() { Data.clear(); }
142   void push_back(uint8_t Item) { Data.push_back(Item); }
143   size_t size() const { return Data.size(); }
144   const uint8_t *data() const { return Data.data(); }
145 
146   using const_iterator = std::vector<uint8_t>::const_iterator;
147   const_iterator begin() const { return Data.begin(); }
148   const_iterator end() const { return Data.end(); }
149 
150   // Insert a ULEB128 encoded value into the table.
151   void insertULEB128(uint64_t Value) {
152     // Encode and emit the value to filter against.
153     uint8_t Buffer[16];
154     unsigned Len = encodeULEB128(Value, Buffer);
155     Data.insert(Data.end(), Buffer, Buffer + Len);
156   }
157 
158   // Insert space for `NumToSkip` and return the position
159   // in the table for patching.
160   size_t insertNumToSkip() {
161     size_t Size = Data.size();
162     Data.insert(Data.end(), getNumToSkipInBytes(), 0);
163     return Size;
164   }
165 
166   void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
167     // Calculate the distance from the byte following the fixup entry byte
168     // to the destination. The Target is calculated from after the
169     // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
170     // `getNumToSkipInBytes()` from the displacement here to account for that.
171     assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
172            "Expecting a forward jump in the decoding table");
173     uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
174     if (!isUIntN(8 * getNumToSkipInBytes(), Delta))
175       PrintFatalError(
176           "disassembler decoding table too large, try --large-decoder-table");
177 
178     Data[FixupIdx] = static_cast<uint8_t>(Delta);
179     Data[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
180     if (getNumToSkipInBytes() == 3)
181       Data[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
182   }
183 
184 private:
185   std::vector<uint8_t> Data;
186 };
187 
188 struct DecoderTableInfo {
189   DecoderTable Table;
190   FixupScopeList FixupStack;
191   PredicateSet Predicates;
192   DecoderSet Decoders;
193 
194   bool isOutermostScope() const { return FixupStack.size() == 1; }
195 };
196 
197 struct EncodingAndInst {
198   const Record *EncodingDef;
199   const CodeGenInstruction *Inst;
200   StringRef HwModeName;
201 
202   EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
203                   StringRef HwModeName = "")
204       : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
205 };
206 
207 struct EncodingIDAndOpcode {
208   unsigned EncodingID;
209   unsigned Opcode;
210 
211   EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
212   EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
213       : EncodingID(EncodingID), Opcode(Opcode) {}
214 };
215 
216 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
217 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
218 
219 class DecoderEmitter {
220   const RecordKeeper &RK;
221   std::vector<EncodingAndInst> NumberedEncodings;
222 
223 public:
224   DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
225       : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
226 
227   // Emit the decoder state machine table. Returns a mask of MCD decoder ops
228   // that were emitted.
229   unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
230                      unsigned BitWidth, StringRef Namespace,
231                      const EncodingIDsVec &EncodingIDs) const;
232   void emitInstrLenTable(formatted_raw_ostream &OS,
233                          ArrayRef<unsigned> InstrLen) const;
234   void emitPredicateFunction(formatted_raw_ostream &OS,
235                              PredicateSet &Predicates) const;
236   void emitDecoderFunction(formatted_raw_ostream &OS,
237                            DecoderSet &Decoders) const;
238 
239   // run - Output the code emitter
240   void run(raw_ostream &o);
241 
242 private:
243   CodeGenTarget Target;
244 
245 public:
246   StringRef PredicateNamespace;
247 };
248 
249 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
250 // for a bit value.
251 //
252 // BIT_UNFILTERED is used as the init value for a filter position.  It is used
253 // only for filter processings.
254 struct BitValue {
255   enum bit_value_t : uint8_t {
256     BIT_FALSE,     // '0'
257     BIT_TRUE,      // '1'
258     BIT_UNSET,     // '?', printed as '_'
259     BIT_UNFILTERED // unfiltered, printed as '.'
260   };
261 
262   BitValue(bit_value_t V) : V(V) {}
263   explicit BitValue(const Init *Init) {
264     if (const auto *Bit = dyn_cast<BitInit>(Init))
265       V = Bit->getValue() ? BIT_TRUE : BIT_FALSE;
266     else
267       V = BIT_UNSET;
268   }
269   BitValue(const BitsInit &Bits, unsigned Idx) : BitValue(Bits.getBit(Idx)) {}
270 
271   bool isSet() const { return V == BIT_TRUE || V == BIT_FALSE; }
272   bool isUnset() const { return V == BIT_UNSET; }
273   std::optional<uint64_t> getValue() const {
274     if (isSet())
275       return static_cast<uint64_t>(V);
276     return std::nullopt;
277   }
278 
279   // For printing a bit value.
280   operator StringRef() const {
281     switch (V) {
282     case BIT_FALSE:
283       return "0";
284     case BIT_TRUE:
285       return "1";
286     case BIT_UNSET:
287       return "_";
288     case BIT_UNFILTERED:
289       return ".";
290     }
291     llvm_unreachable("Unknow bit value");
292   }
293 
294   bool operator==(bit_value_t Other) const { return Other == V; }
295   bool operator!=(bit_value_t Other) const { return Other != V; }
296 
297 private:
298   bit_value_t V;
299 };
300 
301 } // end anonymous namespace
302 
303 static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
304   if (Value.EncodingDef != Value.Inst->TheDef)
305     OS << Value.EncodingDef->getName() << ":";
306   OS << Value.Inst->TheDef->getName();
307   return OS;
308 }
309 
310 // Prints the bit value for each position.
311 static void dumpBits(raw_ostream &OS, const BitsInit &Bits) {
312   for (const Init *Bit : reverse(Bits.getBits()))
313     OS << BitValue(Bit);
314 }
315 
316 static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
317   const RecordVal *RV = Def.getValue(FieldName);
318   if (const BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
319     return *Bits;
320 
321   // Handle variable length instructions.
322   VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
323   SmallVector<const Init *, 16> Bits;
324 
325   for (const auto &SI : VLI) {
326     if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value))
327       llvm::append_range(Bits, BI->getBits());
328     else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value))
329       Bits.push_back(BI);
330     else
331       Bits.append(SI.BitWidth, UnsetInit::get(Def.getRecords()));
332   }
333 
334   return *BitsInit::get(Def.getRecords(), Bits);
335 }
336 
337 // Representation of the instruction to work on.
338 typedef std::vector<BitValue> insn_t;
339 
340 namespace {
341 
342 static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL =
343     std::numeric_limits<uint64_t>::max();
344 
345 class FilterChooser;
346 
347 /// Filter - Filter works with FilterChooser to produce the decoding tree for
348 /// the ISA.
349 ///
350 /// It is useful to think of a Filter as governing the switch stmts of the
351 /// decoding tree in a certain level.  Each case stmt delegates to an inferior
352 /// FilterChooser to decide what further decoding logic to employ, or in another
353 /// words, what other remaining bits to look at.  The FilterChooser eventually
354 /// chooses a best Filter to do its job.
355 ///
356 /// This recursive scheme ends when the number of Opcodes assigned to the
357 /// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
358 /// the Filter/FilterChooser combo does not know how to distinguish among the
359 /// Opcodes assigned.
360 ///
361 /// An example of a conflict is
362 ///
363 /// Conflict:
364 ///                     111101000.00........00010000....
365 ///                     111101000.00........0001........
366 ///                     1111010...00........0001........
367 ///                     1111010...00....................
368 ///                     1111010.........................
369 ///                     1111............................
370 ///                     ................................
371 ///     VST4q8a         111101000_00________00010000____
372 ///     VST4q8b         111101000_00________00010000____
373 ///
374 /// The Debug output shows the path that the decoding tree follows to reach the
375 /// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
376 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
377 ///
378 /// The encoding info in the .td files does not specify this meta information,
379 /// which could have been used by the decoder to resolve the conflict.  The
380 /// decoder could try to decode the even/odd register numbering and assign to
381 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
382 /// version and return the Opcode since the two have the same Asm format string.
383 class Filter {
384 protected:
385   const FilterChooser &Owner; // FilterChooser who owns this filter
386   unsigned StartBit; // the starting bit position
387   unsigned NumBits;  // number of bits to filter
388   bool Mixed;        // a mixed region contains both set and unset bits
389 
390   // Map of well-known segment value to the set of uid's with that value.
391   std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
392 
393   // Set of uid's with non-constant segment values.
394   std::vector<EncodingIDAndOpcode> VariableInstructions;
395 
396   // Map of well-known segment value to its delegate.
397   std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
398 
399   // Number of instructions which fall under FilteredInstructions category.
400   unsigned NumFiltered;
401 
402   // Keeps track of the last opcode in the filtered bucket.
403   EncodingIDAndOpcode LastOpcFiltered;
404 
405 public:
406   Filter(Filter &&f);
407   Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
408          bool mixed);
409 
410   ~Filter() = default;
411 
412   unsigned getNumFiltered() const { return NumFiltered; }
413 
414   EncodingIDAndOpcode getSingletonOpc() const {
415     assert(NumFiltered == 1);
416     return LastOpcFiltered;
417   }
418 
419   // Return the filter chooser for the group of instructions without constant
420   // segment values.
421   const FilterChooser &getVariableFC() const {
422     assert(NumFiltered == 1 && FilterChooserMap.size() == 1);
423     return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second);
424   }
425 
426   // Divides the decoding task into sub tasks and delegates them to the
427   // inferior FilterChooser's.
428   //
429   // A special case arises when there's only one entry in the filtered
430   // instructions.  In order to unambiguously decode the singleton, we need to
431   // match the remaining undecoded encoding bits against the singleton.
432   void recurse();
433 
434   // Emit table entries to decode instructions given a segment or segments of
435   // bits.
436   void emitTableEntry(DecoderTableInfo &TableInfo) const;
437 
438   // Returns the number of fanout produced by the filter.  More fanout implies
439   // the filter distinguishes more categories of instructions.
440   unsigned usefulness() const;
441 }; // end class Filter
442 
443 // These are states of our finite state machines used in FilterChooser's
444 // filterProcessor() which produces the filter candidates to use.
445 enum bitAttr_t {
446   ATTR_NONE,
447   ATTR_FILTERED,
448   ATTR_ALL_SET,
449   ATTR_ALL_UNSET,
450   ATTR_MIXED
451 };
452 
453 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters
454 /// in order to perform the decoding of instructions at the current level.
455 ///
456 /// Decoding proceeds from the top down.  Based on the well-known encoding bits
457 /// of instructions available, FilterChooser builds up the possible Filters that
458 /// can further the task of decoding by distinguishing among the remaining
459 /// candidate instructions.
460 ///
461 /// Once a filter has been chosen, it is called upon to divide the decoding task
462 /// into sub-tasks and delegates them to its inferior FilterChoosers for further
463 /// processings.
464 ///
465 /// It is useful to think of a Filter as governing the switch stmts of the
466 /// decoding tree.  And each case is delegated to an inferior FilterChooser to
467 /// decide what further remaining bits to look at.
468 
469 class FilterChooser {
470 protected:
471   friend class Filter;
472 
473   // Vector of codegen instructions to choose our filter.
474   ArrayRef<EncodingAndInst> AllInstructions;
475 
476   // Vector of uid's for this filter chooser to work on.
477   // The first member of the pair is the opcode id being decoded, the second is
478   // the opcode id that should be emitted.
479   ArrayRef<EncodingIDAndOpcode> Opcodes;
480 
481   // Lookup table for the operand decoding of instructions.
482   const std::map<unsigned, std::vector<OperandInfo>> &Operands;
483 
484   // Vector of candidate filters.
485   std::vector<Filter> Filters;
486 
487   // Array of bit values passed down from our parent.
488   // Set to all BIT_UNFILTERED's for Parent == NULL.
489   std::vector<BitValue> FilterBitValues;
490 
491   // Links to the FilterChooser above us in the decoding tree.
492   const FilterChooser *Parent;
493 
494   // Index of the best filter from Filters.
495   int BestIndex;
496 
497   // Width of instructions
498   unsigned BitWidth;
499 
500   // Parent emitter
501   const DecoderEmitter *Emitter;
502 
503   struct Island {
504     unsigned StartBit;
505     unsigned NumBits;
506     uint64_t FieldVal;
507   };
508 
509 public:
510   FilterChooser(ArrayRef<EncodingAndInst> Insts,
511                 ArrayRef<EncodingIDAndOpcode> IDs,
512                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
513                 unsigned BW, const DecoderEmitter *E)
514       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
515         FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr),
516         BestIndex(-1), BitWidth(BW), Emitter(E) {
517     doFilter();
518   }
519 
520   FilterChooser(ArrayRef<EncodingAndInst> Insts,
521                 ArrayRef<EncodingIDAndOpcode> IDs,
522                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
523                 const std::vector<BitValue> &ParentFilterBitValues,
524                 const FilterChooser &parent)
525       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
526         FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
527         BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
528     doFilter();
529   }
530 
531   FilterChooser(const FilterChooser &) = delete;
532   void operator=(const FilterChooser &) = delete;
533 
534   unsigned getBitWidth() const { return BitWidth; }
535 
536 protected:
537   // Populates the insn given the uid.
538   void insnWithID(insn_t &Insn, unsigned Opcode) const {
539     const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
540     const BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
541     Insn.resize(std::max(BitWidth, Bits.getNumBits()), BitValue::BIT_UNSET);
542     // We may have a SoftFail bitmask, which specifies a mask where an encoding
543     // may differ from the value in "Inst" and yet still be valid, but the
544     // disassembler should return SoftFail instead of Success.
545     //
546     // This is used for marking UNPREDICTABLE instructions in the ARM world.
547     const RecordVal *RV = EncodingDef->getValue("SoftFail");
548     const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
549     for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
550       if (SFBits && BitValue(*SFBits, i) == BitValue::BIT_TRUE)
551         Insn[i] = BitValue::BIT_UNSET;
552       else
553         Insn[i] = BitValue(Bits, i);
554     }
555   }
556 
557   // Populates the field of the insn given the start position and the number of
558   // consecutive bits to scan for.
559   //
560   // Returns a pair of values (indicator, field), where the indicator is false
561   // if there exists any uninitialized bit value in the range and true if all
562   // bits are well-known. The second value is the potentially populated field.
563   std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
564                                           unsigned NumBits) const;
565 
566   /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
567   /// filter array as a series of chars.
568   void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const;
569 
570   /// dumpStack - dumpStack traverses the filter chooser chain and calls
571   /// dumpFilterArray on each filter chooser up to the top level one.
572   void dumpStack(raw_ostream &OS, const char *prefix) const;
573 
574   Filter &bestFilter() {
575     assert(BestIndex != -1 && "BestIndex not set");
576     return Filters[BestIndex];
577   }
578 
579   bool PositionFiltered(unsigned Idx) const {
580     return FilterBitValues[Idx].isSet();
581   }
582 
583   // Calculates the island(s) needed to decode the instruction.
584   // This returns a list of undecoded bits of an instructions, for example,
585   // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
586   // decoded bits in order to verify that the instruction matches the Opcode.
587   unsigned getIslands(std::vector<Island> &Islands, const insn_t &Insn) const;
588 
589   // Emits code to check the Predicates member of an instruction are true.
590   // Returns true if predicate matches were emitted, false otherwise.
591   bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const;
592   bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
593                              raw_ostream &OS) const;
594 
595   bool doesOpcodeNeedPredicate(unsigned Opc) const;
596   unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
597   void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
598 
599   void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
600 
601   // Emits table entries to decode the singleton.
602   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
603                                EncodingIDAndOpcode Opc) const;
604 
605   // Emits code to decode the singleton, and then to decode the rest.
606   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
607                                const Filter &Best) const;
608 
609   bool emitBinaryParser(raw_ostream &OS, indent Indent,
610                         const OperandInfo &OpInfo) const;
611 
612   bool emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc) const;
613   std::pair<unsigned, bool> getDecoderIndex(DecoderSet &Decoders,
614                                             unsigned Opc) const;
615 
616   // Assign a single filter and run with it.
617   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
618 
619   // reportRegion is a helper function for filterProcessor to mark a region as
620   // eligible for use as a filter region.
621   void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
622                     bool AllowMixed);
623 
624   // FilterProcessor scans the well-known encoding bits of the instructions and
625   // builds up a list of candidate filters.  It chooses the best filter and
626   // recursively descends down the decoding tree.
627   bool filterProcessor(bool AllowMixed, bool Greedy = true);
628 
629   // Decides on the best configuration of filter(s) to use in order to decode
630   // the instructions.  A conflict of instructions may occur, in which case we
631   // dump the conflict set to the standard error.
632   void doFilter();
633 
634 public:
635   // emitTableEntries - Emit state machine entries to decode our share of
636   // instructions.
637   void emitTableEntries(DecoderTableInfo &TableInfo) const;
638 };
639 
640 } // end anonymous namespace
641 
642 ///////////////////////////
643 //                       //
644 // Filter Implementation //
645 //                       //
646 ///////////////////////////
647 
648 Filter::Filter(Filter &&f)
649     : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
650       FilteredInstructions(std::move(f.FilteredInstructions)),
651       VariableInstructions(std::move(f.VariableInstructions)),
652       FilterChooserMap(std::move(f.FilterChooserMap)),
653       NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {}
654 
655 Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
656                bool mixed)
657     : Owner(owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
658   assert(StartBit + NumBits - 1 < Owner.BitWidth);
659 
660   NumFiltered = 0;
661   LastOpcFiltered = {0, 0};
662 
663   for (const auto &OpcPair : Owner.Opcodes) {
664     insn_t Insn;
665 
666     // Populates the insn given the uid.
667     Owner.insnWithID(Insn, OpcPair.EncodingID);
668 
669     // Scans the segment for possibly well-specified encoding bits.
670     auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits);
671 
672     if (Ok) {
673       // The encoding bits are well-known.  Lets add the uid of the
674       // instruction into the bucket keyed off the constant field value.
675       LastOpcFiltered = OpcPair;
676       FilteredInstructions[Field].push_back(LastOpcFiltered);
677       ++NumFiltered;
678     } else {
679       // Some of the encoding bit(s) are unspecified.  This contributes to
680       // one additional member of "Variable" instructions.
681       VariableInstructions.push_back(OpcPair);
682     }
683   }
684 
685   assert((FilteredInstructions.size() + VariableInstructions.size() > 0) &&
686          "Filter returns no instruction categories");
687 }
688 
689 // Divides the decoding task into sub tasks and delegates them to the
690 // inferior FilterChooser's.
691 //
692 // A special case arises when there's only one entry in the filtered
693 // instructions.  In order to unambiguously decode the singleton, we need to
694 // match the remaining undecoded encoding bits against the singleton.
695 void Filter::recurse() {
696   // Starts by inheriting our parent filter chooser's filter bit values.
697   std::vector<BitValue> BitValueArray(Owner.FilterBitValues);
698 
699   if (!VariableInstructions.empty()) {
700     // Conservatively marks each segment position as BIT_UNSET.
701     for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
702       BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNSET;
703 
704     // Delegates to an inferior filter chooser for further processing on this
705     // group of instructions whose segment values are variable.
706     FilterChooserMap.try_emplace(
707         NO_FIXED_SEGMENTS_SENTINEL,
708         std::make_unique<FilterChooser>(Owner.AllInstructions,
709                                         VariableInstructions, Owner.Operands,
710                                         BitValueArray, Owner));
711   }
712 
713   // No need to recurse for a singleton filtered instruction.
714   // See also Filter::emit*().
715   if (getNumFiltered() == 1) {
716     assert(FilterChooserMap.size() == 1);
717     return;
718   }
719 
720   // Otherwise, create sub choosers.
721   for (const auto &Inst : FilteredInstructions) {
722     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
723     for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
724       BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex)
725                                                ? BitValue::BIT_TRUE
726                                                : BitValue::BIT_FALSE;
727 
728     // Delegates to an inferior filter chooser for further processing on this
729     // category of instructions.
730     FilterChooserMap.try_emplace(
731         Inst.first,
732         std::make_unique<FilterChooser>(Owner.AllInstructions, Inst.second,
733                                         Owner.Operands, BitValueArray, Owner));
734   }
735 }
736 
737 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
738                                uint32_t DestIdx) {
739   // Any NumToSkip fixups in the current scope can resolve to the
740   // current location.
741   for (uint32_t FixupIdx : Fixups)
742     Table.patchNumToSkip(FixupIdx, DestIdx);
743 }
744 
745 // Emit table entries to decode instructions given a segment or segments
746 // of bits.
747 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
748   assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
749   TableInfo.Table.push_back(MCD::OPC_ExtractField);
750 
751   TableInfo.Table.insertULEB128(StartBit);
752   TableInfo.Table.push_back(NumBits);
753 
754   // If the NO_FIXED_SEGMENTS_SENTINEL is present, we need to add a new scope
755   // for this filter. Otherwise, we can skip adding a new scope and any
756   // patching added will automatically be added to the enclosing scope.
757 
758   // If NO_FIXED_SEGMENTS_SENTINEL is present, it will be last entry in
759   // FilterChooserMap.
760 
761   const uint64_t LastFilter = FilterChooserMap.rbegin()->first;
762   bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL;
763   if (HasFallthrough)
764     TableInfo.FixupStack.emplace_back();
765 
766   DecoderTable &Table = TableInfo.Table;
767 
768   size_t PrevFilter = 0;
769   for (const auto &[FilterVal, Delegate] : FilterChooserMap) {
770     // Field value NO_FIXED_SEGMENTS_SENTINEL implies a non-empty set of
771     // variable instructions. See also recurse().
772     if (FilterVal == NO_FIXED_SEGMENTS_SENTINEL) {
773       // Each scope should always have at least one filter value to check
774       // for.
775       assert(PrevFilter != 0 && "empty filter set!");
776       FixupList &CurScope = TableInfo.FixupStack.back();
777       // Resolve any NumToSkip fixups in the current scope.
778       resolveTableFixups(Table, CurScope, Table.size());
779 
780       // Delete the scope we have added here.
781       TableInfo.FixupStack.pop_back();
782 
783       PrevFilter = 0; // Don't re-process the filter's fallthrough.
784     } else {
785       // The last filtervalue emitted can be OPC_FilterValue if we are at
786       // outermost scope.
787       const uint8_t DecoderOp =
788           FilterVal == LastFilter && TableInfo.isOutermostScope()
789               ? MCD::OPC_FilterValueOrFail
790               : MCD::OPC_FilterValue;
791       Table.push_back(DecoderOp);
792       Table.insertULEB128(FilterVal);
793       if (DecoderOp == MCD::OPC_FilterValue) {
794         // Reserve space for the NumToSkip entry. We'll backpatch the value
795         // later.
796         PrevFilter = Table.insertNumToSkip();
797       } else {
798         PrevFilter = 0;
799       }
800     }
801 
802     // We arrive at a category of instructions with the same segment value.
803     // Now delegate to the sub filter chooser for further decodings.
804     // The case may fallthrough, which happens if the remaining well-known
805     // encoding bits do not match exactly.
806     Delegate->emitTableEntries(TableInfo);
807 
808     // Now that we've emitted the body of the handler, update the NumToSkip
809     // of the filter itself to be able to skip forward when false.
810     if (PrevFilter)
811       Table.patchNumToSkip(PrevFilter, Table.size());
812   }
813 
814   // If there is no fallthrough and the final filter was not in the outermost
815   // scope, then it must be fixed up according to the enclosing scope rather
816   // than the current position.
817   if (PrevFilter)
818     TableInfo.FixupStack.back().push_back(PrevFilter);
819 }
820 
821 // Returns the number of fanout produced by the filter.  More fanout implies
822 // the filter distinguishes more categories of instructions.
823 unsigned Filter::usefulness() const {
824   return FilteredInstructions.size() + VariableInstructions.empty();
825 }
826 
827 //////////////////////////////////
828 //                              //
829 // Filterchooser Implementation //
830 //                              //
831 //////////////////////////////////
832 
833 // Emit the decoder state machine table. Returns a mask of MCD decoder ops
834 // that were emitted.
835 unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
836                                    DecoderTable &Table, unsigned BitWidth,
837                                    StringRef Namespace,
838                                    const EncodingIDsVec &EncodingIDs) const {
839   // We'll need to be able to map from a decoded opcode into the corresponding
840   // EncodingID for this specific combination of BitWidth and Namespace. This
841   // is used below to index into NumberedEncodings.
842   DenseMap<unsigned, unsigned> OpcodeToEncodingID;
843   OpcodeToEncodingID.reserve(EncodingIDs.size());
844   for (const auto &EI : EncodingIDs)
845     OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
846 
847   OS << "static const uint8_t DecoderTable" << Namespace << BitWidth
848      << "[] = {\n";
849 
850   // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
851   auto emitULEB128 = [](DecoderTable::const_iterator &I,
852                         formatted_raw_ostream &OS) {
853     while (*I >= 128)
854       OS << (unsigned)*I++ << ", ";
855     OS << (unsigned)*I++ << ", ";
856   };
857 
858   // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
859   // NumToSkip value.
860   auto emitNumToSkip = [](DecoderTable::const_iterator &I,
861                           formatted_raw_ostream &OS) {
862     uint8_t Byte = *I++;
863     uint32_t NumToSkip = Byte;
864     OS << (unsigned)Byte << ", ";
865     Byte = *I++;
866     OS << (unsigned)Byte << ", ";
867     NumToSkip |= Byte << 8;
868     if (getNumToSkipInBytes() == 3) {
869       Byte = *I++;
870       OS << (unsigned)(Byte) << ", ";
871       NumToSkip |= Byte << 16;
872     }
873     return NumToSkip;
874   };
875 
876   // FIXME: We may be able to use the NumToSkip values to recover
877   // appropriate indentation levels.
878   DecoderTable::const_iterator I = Table.begin();
879   DecoderTable::const_iterator E = Table.end();
880   const uint8_t *const EndPtr = Table.data() + Table.size();
881 
882   auto emitNumToSkipComment = [&](uint32_t NumToSkip, bool InComment = false) {
883     uint32_t Index = ((I - Table.begin()) + NumToSkip);
884     OS << (InComment ? ", " : "// ");
885     OS << "Skip to: " << Index;
886     if (*(I + NumToSkip) == MCD::OPC_Fail)
887       OS << " (Fail)";
888   };
889 
890   unsigned OpcodeMask = 0;
891 
892   while (I != E) {
893     assert(I < E && "incomplete decode table entry!");
894 
895     uint64_t Pos = I - Table.begin();
896     OS << "/* " << Pos << " */";
897     OS.PadToColumn(12);
898 
899     const uint8_t DecoderOp = *I++;
900     OpcodeMask |= (1 << DecoderOp);
901     switch (DecoderOp) {
902     default:
903       PrintFatalError("Invalid decode table opcode: " + Twine((int)DecoderOp) +
904                       " at index " + Twine(Pos));
905     case MCD::OPC_ExtractField: {
906       OS << "  MCD::OPC_ExtractField, ";
907 
908       // ULEB128 encoded start value.
909       const char *ErrMsg = nullptr;
910       unsigned Start = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
911       assert(ErrMsg == nullptr && "ULEB128 value too large!");
912       emitULEB128(I, OS);
913 
914       unsigned Len = *I++;
915       OS << Len << ",  // Inst{";
916       if (Len > 1)
917         OS << (Start + Len - 1) << "-";
918       OS << Start << "} ...\n";
919       break;
920     }
921     case MCD::OPC_FilterValue:
922     case MCD::OPC_FilterValueOrFail: {
923       bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
924       OS << "  MCD::OPC_FilterValue" << (IsFail ? "OrFail, " : ", ");
925       // The filter value is ULEB128 encoded.
926       emitULEB128(I, OS);
927 
928       if (!IsFail) {
929         uint32_t NumToSkip = emitNumToSkip(I, OS);
930         emitNumToSkipComment(NumToSkip);
931       }
932       OS << '\n';
933       break;
934     }
935     case MCD::OPC_CheckField:
936     case MCD::OPC_CheckFieldOrFail: {
937       bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
938       OS << "  MCD::OPC_CheckField" << (IsFail ? "OrFail, " : ", ");
939       // ULEB128 encoded start value.
940       emitULEB128(I, OS);
941       // 8-bit length.
942       unsigned Len = *I++;
943       OS << Len << ", ";
944       // ULEB128 encoded field value.
945       emitULEB128(I, OS);
946 
947       if (!IsFail) {
948         uint32_t NumToSkip = emitNumToSkip(I, OS);
949         emitNumToSkipComment(NumToSkip);
950       }
951       OS << '\n';
952       break;
953     }
954     case MCD::OPC_CheckPredicate:
955     case MCD::OPC_CheckPredicateOrFail: {
956       bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
957 
958       OS << "  MCD::OPC_CheckPredicate" << (IsFail ? "OrFail, " : ", ");
959       emitULEB128(I, OS);
960 
961       if (!IsFail) {
962         uint32_t NumToSkip = emitNumToSkip(I, OS);
963         emitNumToSkipComment(NumToSkip);
964       }
965       OS << '\n';
966       break;
967     }
968     case MCD::OPC_Decode:
969     case MCD::OPC_TryDecode:
970     case MCD::OPC_TryDecodeOrFail: {
971       bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
972       bool IsTry = DecoderOp == MCD::OPC_TryDecode || IsFail;
973       // Decode the Opcode value.
974       const char *ErrMsg = nullptr;
975       unsigned Opc = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
976       assert(ErrMsg == nullptr && "ULEB128 value too large!");
977 
978       OS << "  MCD::OPC_" << (IsTry ? "Try" : "") << "Decode"
979          << (IsFail ? "OrFail, " : ", ");
980       emitULEB128(I, OS);
981 
982       // Decoder index.
983       unsigned DecodeIdx = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
984       assert(ErrMsg == nullptr && "ULEB128 value too large!");
985       emitULEB128(I, OS);
986 
987       auto EncI = OpcodeToEncodingID.find(Opc);
988       assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
989       auto EncodingID = EncI->second;
990 
991       if (!IsTry) {
992         OS << "// Opcode: " << NumberedEncodings[EncodingID]
993            << ", DecodeIdx: " << DecodeIdx << '\n';
994         break;
995       }
996 
997       // Fallthrough for OPC_TryDecode.
998       if (!IsFail) {
999         uint32_t NumToSkip = emitNumToSkip(I, OS);
1000         OS << "// Opcode: " << NumberedEncodings[EncodingID]
1001            << ", DecodeIdx: " << DecodeIdx;
1002         emitNumToSkipComment(NumToSkip, /*InComment=*/true);
1003       }
1004       OS << '\n';
1005       break;
1006     }
1007     case MCD::OPC_SoftFail: {
1008       OS << "  MCD::OPC_SoftFail, ";
1009       // Decode the positive mask.
1010       const char *ErrMsg = nullptr;
1011       uint64_t PositiveMask = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
1012       assert(ErrMsg == nullptr && "ULEB128 value too large!");
1013       emitULEB128(I, OS);
1014 
1015       // Decode the negative mask.
1016       uint64_t NegativeMask = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
1017       assert(ErrMsg == nullptr && "ULEB128 value too large!");
1018       emitULEB128(I, OS);
1019       OS << "// +ve mask: 0x";
1020       OS.write_hex(PositiveMask);
1021       OS << ", -ve mask: 0x";
1022       OS.write_hex(NegativeMask);
1023       OS << '\n';
1024       break;
1025     }
1026     case MCD::OPC_Fail:
1027       OS << "  MCD::OPC_Fail,\n";
1028       break;
1029     }
1030   }
1031   OS << "  0\n";
1032   OS << "};\n\n";
1033 
1034   return OpcodeMask;
1035 }
1036 
1037 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
1038                                        ArrayRef<unsigned> InstrLen) const {
1039   OS << "static const uint8_t InstrLenTable[] = {\n";
1040   for (unsigned Len : InstrLen)
1041     OS << Len << ",\n";
1042   OS << "};\n\n";
1043 }
1044 
1045 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1046                                            PredicateSet &Predicates) const {
1047   // The predicate function is just a big switch statement based on the
1048   // input predicate index.
1049   OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
1050         "&Bits) {\n";
1051   OS << "  switch (Idx) {\n";
1052   OS << "  default: llvm_unreachable(\"Invalid index!\");\n";
1053   for (const auto &[Index, Predicate] : enumerate(Predicates)) {
1054     OS << "  case " << Index << ":\n";
1055     OS << "    return (" << Predicate << ");\n";
1056   }
1057   OS << "  }\n";
1058   OS << "}\n\n";
1059 }
1060 
1061 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1062                                          DecoderSet &Decoders) const {
1063   // The decoder function is just a big switch statement or a table of function
1064   // pointers based on the input decoder index.
1065 
1066   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1067   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
1068   // possible but fall back to an InsnType-sized tmp for truly large fields.
1069   StringRef TmpTypeDecl =
1070       "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
1071       "InsnType, uint64_t>;\n";
1072   StringRef DecodeParams =
1073       "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
1074       "MCDisassembler *Decoder, bool &DecodeComplete";
1075 
1076   if (UseFnTableInDecodeToMCInst) {
1077     // Emit a function for each case first.
1078     for (const auto &[Index, Decoder] : enumerate(Decoders)) {
1079       OS << "template <typename InsnType>\n";
1080       OS << "DecodeStatus decodeFn" << Index << "(" << DecodeParams << ") {\n";
1081       OS << "  " << TmpTypeDecl;
1082       OS << "  [[maybe_unused]] TmpType tmp;\n";
1083       OS << Decoder;
1084       OS << "  return S;\n";
1085       OS << "}\n\n";
1086     }
1087   }
1088 
1089   OS << "// Handling " << Decoders.size() << " cases.\n";
1090   OS << "template <typename InsnType>\n";
1091   OS << "static DecodeStatus decodeToMCInst(unsigned Idx, " << DecodeParams
1092      << ") {\n";
1093   OS << "  DecodeComplete = true;\n";
1094 
1095   if (UseFnTableInDecodeToMCInst) {
1096     // Build a table of function pointers.
1097     OS << "  using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n";
1098     OS << "  static constexpr DecodeFnTy decodeFnTable[] = {\n";
1099     for (size_t Index : llvm::seq(Decoders.size()))
1100       OS << "    decodeFn" << Index << ",\n";
1101     OS << "  };\n";
1102     OS << "  if (Idx >= " << Decoders.size() << ")\n";
1103     OS << "    llvm_unreachable(\"Invalid index!\");\n";
1104     OS << "  return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
1105           "DecodeComplete);\n";
1106   } else {
1107     OS << "  " << TmpTypeDecl;
1108     OS << "  TmpType tmp;\n";
1109     OS << "  switch (Idx) {\n";
1110     OS << "  default: llvm_unreachable(\"Invalid index!\");\n";
1111     for (const auto &[Index, Decoder] : enumerate(Decoders)) {
1112       OS << "  case " << Index << ":\n";
1113       OS << Decoder;
1114       OS << "    return S;\n";
1115     }
1116     OS << "  }\n";
1117   }
1118   OS << "}\n";
1119 }
1120 
1121 // Populates the field of the insn given the start position and the number of
1122 // consecutive bits to scan for.
1123 //
1124 // Returns a pair of values (indicator, field), where the indicator is false
1125 // if there exists any uninitialized bit value in the range and true if all
1126 // bits are well-known. The second value is the potentially populated field.
1127 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1128                                                        unsigned StartBit,
1129                                                        unsigned NumBits) const {
1130   uint64_t Field = 0;
1131 
1132   for (unsigned i = 0; i < NumBits; ++i) {
1133     if (Insn[StartBit + i] == BitValue::BIT_UNSET)
1134       return {false, Field};
1135 
1136     if (Insn[StartBit + i] == BitValue::BIT_TRUE)
1137       Field = Field | (1ULL << i);
1138   }
1139 
1140   return {true, Field};
1141 }
1142 
1143 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1144 /// filter array as a series of chars.
1145 void FilterChooser::dumpFilterArray(raw_ostream &OS,
1146                                     ArrayRef<BitValue> Filter) const {
1147   for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--)
1148     OS << Filter[bitIndex - 1];
1149 }
1150 
1151 /// dumpStack - dumpStack traverses the filter chooser chain and calls
1152 /// dumpFilterArray on each filter chooser up to the top level one.
1153 void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const {
1154   const FilterChooser *current = this;
1155 
1156   while (current) {
1157     OS << prefix;
1158     dumpFilterArray(OS, current->FilterBitValues);
1159     OS << '\n';
1160     current = current->Parent;
1161   }
1162 }
1163 
1164 // Calculates the island(s) needed to decode the instruction.
1165 // This returns a list of undecoded bits of an instructions, for example,
1166 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1167 // decoded bits in order to verify that the instruction matches the Opcode.
1168 unsigned FilterChooser::getIslands(std::vector<Island> &Islands,
1169                                    const insn_t &Insn) const {
1170   uint64_t FieldVal;
1171   unsigned StartBit;
1172 
1173   // 0: Init
1174   // 1: Water (the bit value does not affect decoding)
1175   // 2: Island (well-known bit value needed for decoding)
1176   unsigned State = 0;
1177 
1178   for (unsigned i = 0; i < BitWidth; ++i) {
1179     std::optional<uint64_t> Val = Insn[i].getValue();
1180     bool Filtered = PositionFiltered(i);
1181     switch (State) {
1182     default:
1183       llvm_unreachable("Unreachable code!");
1184     case 0:
1185     case 1:
1186       if (Filtered || !Val) {
1187         State = 1; // Still in Water
1188       } else {
1189         State = 2; // Into the Island
1190         StartBit = i;
1191         FieldVal = *Val;
1192       }
1193       break;
1194     case 2:
1195       if (Filtered || !Val) {
1196         State = 1; // Into the Water
1197         Islands.push_back({StartBit, i - StartBit, FieldVal});
1198       } else {
1199         State = 2; // Still in Island
1200         FieldVal |= *Val << (i - StartBit);
1201       }
1202       break;
1203     }
1204   }
1205   // If we are still in Island after the loop, do some housekeeping.
1206   if (State == 2)
1207     Islands.push_back({StartBit, BitWidth - StartBit, FieldVal});
1208 
1209   return Islands.size();
1210 }
1211 
1212 bool FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent,
1213                                      const OperandInfo &OpInfo) const {
1214   const std::string &Decoder = OpInfo.Decoder;
1215 
1216   bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
1217 
1218   if (UseInsertBits) {
1219     OS << Indent << "tmp = 0x";
1220     OS.write_hex(OpInfo.InitValue);
1221     OS << ";\n";
1222   }
1223 
1224   for (const EncodingField &EF : OpInfo) {
1225     OS << Indent;
1226     if (UseInsertBits)
1227       OS << "insertBits(tmp, ";
1228     else
1229       OS << "tmp = ";
1230     OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
1231     if (UseInsertBits)
1232       OS << ", " << EF.Offset << ", " << EF.Width << ')';
1233     else if (EF.Offset != 0)
1234       OS << " << " << EF.Offset;
1235     OS << ";\n";
1236   }
1237 
1238   bool OpHasCompleteDecoder;
1239   if (!Decoder.empty()) {
1240     OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1241     OS << Indent << "if (!Check(S, " << Decoder
1242        << "(MI, tmp, Address, Decoder))) { "
1243        << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
1244        << "return MCDisassembler::Fail; }\n";
1245   } else {
1246     OpHasCompleteDecoder = true;
1247     OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
1248   }
1249   return OpHasCompleteDecoder;
1250 }
1251 
1252 bool FilterChooser::emitDecoder(raw_ostream &OS, indent Indent,
1253                                 unsigned Opc) const {
1254   bool HasCompleteDecoder = true;
1255 
1256   for (const auto &Op : Operands.find(Opc)->second) {
1257     // If a custom instruction decoder was specified, use that.
1258     if (Op.numFields() == 0 && !Op.Decoder.empty()) {
1259       HasCompleteDecoder = Op.HasCompleteDecoder;
1260       OS << Indent << "if (!Check(S, " << Op.Decoder
1261          << "(MI, insn, Address, Decoder))) { "
1262          << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1263          << "return MCDisassembler::Fail; }\n";
1264       break;
1265     }
1266 
1267     HasCompleteDecoder &= emitBinaryParser(OS, Indent, Op);
1268   }
1269   return HasCompleteDecoder;
1270 }
1271 
1272 std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
1273                                                          unsigned Opc) const {
1274   // Build up the predicate string.
1275   SmallString<256> Decoder;
1276   // FIXME: emitDecoder() function can take a buffer directly rather than
1277   // a stream.
1278   raw_svector_ostream S(Decoder);
1279   indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
1280   bool HasCompleteDecoder = emitDecoder(S, Indent, Opc);
1281 
1282   // Using the full decoder string as the key value here is a bit
1283   // heavyweight, but is effective. If the string comparisons become a
1284   // performance concern, we can implement a mangling of the predicate
1285   // data easily enough with a map back to the actual string. That's
1286   // overkill for now, though.
1287 
1288   // Make sure the predicate is in the table.
1289   Decoders.insert(CachedHashString(Decoder));
1290   // Now figure out the index for when we write out the table.
1291   DecoderSet::const_iterator P = find(Decoders, Decoder.str());
1292   return {(unsigned)(P - Decoders.begin()), HasCompleteDecoder};
1293 }
1294 
1295 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
1296 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1297                                           raw_ostream &OS) const {
1298   if (const auto *D = dyn_cast<DefInit>(&Val)) {
1299     if (!D->getDef()->isSubClassOf("SubtargetFeature"))
1300       return true;
1301     OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1302        << "]";
1303     return false;
1304   }
1305   if (const auto *D = dyn_cast<DagInit>(&Val)) {
1306     std::string Op = D->getOperator()->getAsString();
1307     if (Op == "not" && D->getNumArgs() == 1) {
1308       OS << '!';
1309       return emitPredicateMatchAux(*D->getArg(0), true, OS);
1310     }
1311     if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
1312       bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
1313       if (Paren)
1314         OS << '(';
1315       ListSeparator LS(Op == "any_of" ? " || " : " && ");
1316       for (auto *Arg : D->getArgs()) {
1317         OS << LS;
1318         if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
1319           return true;
1320       }
1321       if (Paren)
1322         OS << ')';
1323       return false;
1324     }
1325   }
1326   return true;
1327 }
1328 
1329 bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const {
1330   const ListInit *Predicates =
1331       AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1332   bool IsFirstEmission = true;
1333   for (unsigned i = 0; i < Predicates->size(); ++i) {
1334     const Record *Pred = Predicates->getElementAsRecord(i);
1335     if (!Pred->getValue("AssemblerMatcherPredicate"))
1336       continue;
1337 
1338     if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1339       continue;
1340 
1341     if (!IsFirstEmission)
1342       OS << " && ";
1343     if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"),
1344                               Predicates->size() > 1, OS))
1345       PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
1346     IsFirstEmission = false;
1347   }
1348   return !Predicates->empty();
1349 }
1350 
1351 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1352   const ListInit *Predicates =
1353       AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1354   for (unsigned i = 0; i < Predicates->size(); ++i) {
1355     const Record *Pred = Predicates->getElementAsRecord(i);
1356     if (!Pred->getValue("AssemblerMatcherPredicate"))
1357       continue;
1358 
1359     if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1360       return true;
1361   }
1362   return false;
1363 }
1364 
1365 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1366                                           StringRef Predicate) const {
1367   // Using the full predicate string as the key value here is a bit
1368   // heavyweight, but is effective. If the string comparisons become a
1369   // performance concern, we can implement a mangling of the predicate
1370   // data easily enough with a map back to the actual string. That's
1371   // overkill for now, though.
1372 
1373   // Make sure the predicate is in the table.
1374   TableInfo.Predicates.insert(CachedHashString(Predicate));
1375   // Now figure out the index for when we write out the table.
1376   PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
1377   return (unsigned)(P - TableInfo.Predicates.begin());
1378 }
1379 
1380 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1381                                             unsigned Opc) const {
1382   if (!doesOpcodeNeedPredicate(Opc))
1383     return;
1384 
1385   // Build up the predicate string.
1386   SmallString<256> Predicate;
1387   // FIXME: emitPredicateMatch() functions can take a buffer directly rather
1388   // than a stream.
1389   raw_svector_ostream PS(Predicate);
1390   emitPredicateMatch(PS, Opc);
1391 
1392   // Figure out the index into the predicate table for the predicate just
1393   // computed.
1394   unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
1395 
1396   const uint8_t DecoderOp = TableInfo.isOutermostScope()
1397                                 ? MCD::OPC_CheckPredicateOrFail
1398                                 : MCD::OPC_CheckPredicate;
1399   TableInfo.Table.push_back(DecoderOp);
1400   TableInfo.Table.insertULEB128(PIdx);
1401 
1402   if (DecoderOp == MCD::OPC_CheckPredicate) {
1403     // Push location for NumToSkip backpatching.
1404     TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1405   }
1406 }
1407 
1408 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1409                                            unsigned Opc) const {
1410   const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
1411   const RecordVal *RV = EncodingDef->getValue("SoftFail");
1412   const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
1413 
1414   if (!SFBits)
1415     return;
1416   const BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst");
1417 
1418   APInt PositiveMask(BitWidth, 0ULL);
1419   APInt NegativeMask(BitWidth, 0ULL);
1420   for (unsigned i = 0; i < BitWidth; ++i) {
1421     BitValue B(*SFBits, i);
1422     BitValue IB(*InstBits, i);
1423 
1424     if (B != BitValue::BIT_TRUE)
1425       continue;
1426 
1427     if (IB == BitValue::BIT_FALSE) {
1428       // The bit is meant to be false, so emit a check to see if it is true.
1429       PositiveMask.setBit(i);
1430     } else if (IB == BitValue::BIT_TRUE) {
1431       // The bit is meant to be true, so emit a check to see if it is false.
1432       NegativeMask.setBit(i);
1433     } else {
1434       // The bit is not set; this must be an error!
1435       errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1436              << AllInstructions[Opc] << " is set but Inst{" << i
1437              << "} is unset!\n"
1438              << "  - You can only mark a bit as SoftFail if it is fully defined"
1439              << " (1/0 - not '?') in Inst\n";
1440       return;
1441     }
1442   }
1443 
1444   bool NeedPositiveMask = PositiveMask.getBoolValue();
1445   bool NeedNegativeMask = NegativeMask.getBoolValue();
1446 
1447   if (!NeedPositiveMask && !NeedNegativeMask)
1448     return;
1449 
1450   TableInfo.Table.push_back(MCD::OPC_SoftFail);
1451   TableInfo.Table.insertULEB128(PositiveMask.getZExtValue());
1452   TableInfo.Table.insertULEB128(NegativeMask.getZExtValue());
1453 }
1454 
1455 // Emits table entries to decode the singleton.
1456 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1457                                             EncodingIDAndOpcode Opc) const {
1458   std::vector<Island> Islands;
1459   insn_t Insn;
1460   insnWithID(Insn, Opc.EncodingID);
1461 
1462   // Look for islands of undecoded bits of the singleton.
1463   getIslands(Islands, Insn);
1464 
1465   // Emit the predicate table entry if one is needed.
1466   emitPredicateTableEntry(TableInfo, Opc.EncodingID);
1467 
1468   // Check any additional encoding fields needed.
1469   for (const Island &Ilnd : reverse(Islands)) {
1470     unsigned NumBits = Ilnd.NumBits;
1471     assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
1472     const uint8_t DecoderOp = TableInfo.isOutermostScope()
1473                                   ? MCD::OPC_CheckFieldOrFail
1474                                   : MCD::OPC_CheckField;
1475     TableInfo.Table.push_back(DecoderOp);
1476 
1477     TableInfo.Table.insertULEB128(Ilnd.StartBit);
1478     TableInfo.Table.push_back(NumBits);
1479     TableInfo.Table.insertULEB128(Ilnd.FieldVal);
1480 
1481     if (DecoderOp == MCD::OPC_CheckField) {
1482       // Allocate space in the table for fixup so all our relative position
1483       // calculations work OK even before we fully resolve the real value here.
1484 
1485       // Push location for NumToSkip backpatching.
1486       TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1487     }
1488   }
1489 
1490   // Check for soft failure of the match.
1491   emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
1492 
1493   auto [DIdx, HasCompleteDecoder] =
1494       getDecoderIndex(TableInfo.Decoders, Opc.EncodingID);
1495 
1496   // Produce OPC_Decode or OPC_TryDecode opcode based on the information
1497   // whether the instruction decoder is complete or not. If it is complete
1498   // then it handles all possible values of remaining variable/unfiltered bits
1499   // and for any value can determine if the bitpattern is a valid instruction
1500   // or not. This means OPC_Decode will be the final step in the decoding
1501   // process. If it is not complete, then the Fail return code from the
1502   // decoder method indicates that additional processing should be done to see
1503   // if there is any other instruction that also matches the bitpattern and
1504   // can decode it.
1505   const uint8_t DecoderOp = HasCompleteDecoder ? MCD::OPC_Decode
1506                                                : (TableInfo.isOutermostScope()
1507                                                       ? MCD::OPC_TryDecodeOrFail
1508                                                       : MCD::OPC_TryDecode);
1509   TableInfo.Table.push_back(DecoderOp);
1510   NumEncodingsSupported++;
1511   TableInfo.Table.insertULEB128(Opc.Opcode);
1512   TableInfo.Table.insertULEB128(DIdx);
1513 
1514   if (DecoderOp == MCD::OPC_TryDecode) {
1515     // Push location for NumToSkip backpatching.
1516     TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1517   }
1518 }
1519 
1520 // Emits table entries to decode the singleton, and then to decode the rest.
1521 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1522                                             const Filter &Best) const {
1523   EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1524 
1525   // complex singletons need predicate checks from the first singleton
1526   // to refer forward to the variable filterchooser that follows.
1527   TableInfo.FixupStack.emplace_back();
1528 
1529   emitSingletonTableEntry(TableInfo, Opc);
1530 
1531   resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
1532                      TableInfo.Table.size());
1533   TableInfo.FixupStack.pop_back();
1534 
1535   Best.getVariableFC().emitTableEntries(TableInfo);
1536 }
1537 
1538 // Assign a single filter and run with it.  Top level API client can initialize
1539 // with a single filter to start the filtering process.
1540 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1541                                     bool mixed) {
1542   Filters.clear();
1543   Filters.emplace_back(*this, startBit, numBit, true);
1544   BestIndex = 0; // Sole Filter instance to choose from.
1545   bestFilter().recurse();
1546 }
1547 
1548 // reportRegion is a helper function for filterProcessor to mark a region as
1549 // eligible for use as a filter region.
1550 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1551                                  unsigned BitIndex, bool AllowMixed) {
1552   if (RA == ATTR_MIXED && AllowMixed)
1553     Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true);
1554   else if (RA == ATTR_ALL_SET && !AllowMixed)
1555     Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false);
1556 }
1557 
1558 // FilterProcessor scans the well-known encoding bits of the instructions and
1559 // builds up a list of candidate filters.  It chooses the best filter and
1560 // recursively descends down the decoding tree.
1561 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1562   Filters.clear();
1563   BestIndex = -1;
1564   unsigned numInstructions = Opcodes.size();
1565 
1566   assert(numInstructions && "Filter created with no instructions");
1567 
1568   // No further filtering is necessary.
1569   if (numInstructions == 1)
1570     return true;
1571 
1572   // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
1573   // instructions is 3.
1574   if (AllowMixed && !Greedy) {
1575     assert(numInstructions == 3);
1576 
1577     for (const auto &Opcode : Opcodes) {
1578       std::vector<Island> Islands;
1579       insn_t Insn;
1580 
1581       insnWithID(Insn, Opcode.EncodingID);
1582 
1583       // Look for islands of undecoded bits of any instruction.
1584       if (getIslands(Islands, Insn) > 0) {
1585         // Found an instruction with island(s).  Now just assign a filter.
1586         runSingleFilter(Islands[0].StartBit, Islands[0].NumBits, true);
1587         return true;
1588       }
1589     }
1590   }
1591 
1592   unsigned BitIndex;
1593 
1594   // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1595   // The automaton consumes the corresponding bit from each
1596   // instruction.
1597   //
1598   //   Input symbols: 0, 1, and _ (unset).
1599   //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1600   //   Initial state: NONE.
1601   //
1602   // (NONE) ------- [01] -> (ALL_SET)
1603   // (NONE) ------- _ ----> (ALL_UNSET)
1604   // (ALL_SET) ---- [01] -> (ALL_SET)
1605   // (ALL_SET) ---- _ ----> (MIXED)
1606   // (ALL_UNSET) -- [01] -> (MIXED)
1607   // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1608   // (MIXED) ------ . ----> (MIXED)
1609   // (FILTERED)---- . ----> (FILTERED)
1610 
1611   std::vector<bitAttr_t> bitAttrs(BitWidth, ATTR_NONE);
1612 
1613   // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1614   // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1615   for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1616     if (FilterBitValues[BitIndex].isSet())
1617       bitAttrs[BitIndex] = ATTR_FILTERED;
1618 
1619   for (const auto &OpcPair : Opcodes) {
1620     insn_t insn;
1621 
1622     insnWithID(insn, OpcPair.EncodingID);
1623 
1624     for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1625       switch (bitAttrs[BitIndex]) {
1626       case ATTR_NONE:
1627         if (insn[BitIndex] == BitValue::BIT_UNSET)
1628           bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1629         else
1630           bitAttrs[BitIndex] = ATTR_ALL_SET;
1631         break;
1632       case ATTR_ALL_SET:
1633         if (insn[BitIndex] == BitValue::BIT_UNSET)
1634           bitAttrs[BitIndex] = ATTR_MIXED;
1635         break;
1636       case ATTR_ALL_UNSET:
1637         if (insn[BitIndex] != BitValue::BIT_UNSET)
1638           bitAttrs[BitIndex] = ATTR_MIXED;
1639         break;
1640       case ATTR_MIXED:
1641       case ATTR_FILTERED:
1642         break;
1643       }
1644     }
1645   }
1646 
1647   // The regionAttr automaton consumes the bitAttrs automatons' state,
1648   // lowest-to-highest.
1649   //
1650   //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1651   //   States:        NONE, ALL_SET, MIXED
1652   //   Initial state: NONE
1653   //
1654   // (NONE) ----- F --> (NONE)
1655   // (NONE) ----- S --> (ALL_SET)     ; and set region start
1656   // (NONE) ----- U --> (NONE)
1657   // (NONE) ----- M --> (MIXED)       ; and set region start
1658   // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
1659   // (ALL_SET) -- S --> (ALL_SET)
1660   // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
1661   // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
1662   // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
1663   // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
1664   // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
1665   // (MIXED) ---- M --> (MIXED)
1666 
1667   bitAttr_t RA = ATTR_NONE;
1668   unsigned StartBit = 0;
1669 
1670   for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1671     bitAttr_t bitAttr = bitAttrs[BitIndex];
1672 
1673     assert(bitAttr != ATTR_NONE && "Bit without attributes");
1674 
1675     switch (RA) {
1676     case ATTR_NONE:
1677       switch (bitAttr) {
1678       case ATTR_FILTERED:
1679         break;
1680       case ATTR_ALL_SET:
1681         StartBit = BitIndex;
1682         RA = ATTR_ALL_SET;
1683         break;
1684       case ATTR_ALL_UNSET:
1685         break;
1686       case ATTR_MIXED:
1687         StartBit = BitIndex;
1688         RA = ATTR_MIXED;
1689         break;
1690       default:
1691         llvm_unreachable("Unexpected bitAttr!");
1692       }
1693       break;
1694     case ATTR_ALL_SET:
1695       switch (bitAttr) {
1696       case ATTR_FILTERED:
1697         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1698         RA = ATTR_NONE;
1699         break;
1700       case ATTR_ALL_SET:
1701         break;
1702       case ATTR_ALL_UNSET:
1703         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1704         RA = ATTR_NONE;
1705         break;
1706       case ATTR_MIXED:
1707         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1708         StartBit = BitIndex;
1709         RA = ATTR_MIXED;
1710         break;
1711       default:
1712         llvm_unreachable("Unexpected bitAttr!");
1713       }
1714       break;
1715     case ATTR_MIXED:
1716       switch (bitAttr) {
1717       case ATTR_FILTERED:
1718         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1719         StartBit = BitIndex;
1720         RA = ATTR_NONE;
1721         break;
1722       case ATTR_ALL_SET:
1723         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724         StartBit = BitIndex;
1725         RA = ATTR_ALL_SET;
1726         break;
1727       case ATTR_ALL_UNSET:
1728         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1729         RA = ATTR_NONE;
1730         break;
1731       case ATTR_MIXED:
1732         break;
1733       default:
1734         llvm_unreachable("Unexpected bitAttr!");
1735       }
1736       break;
1737     case ATTR_ALL_UNSET:
1738       llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1739     case ATTR_FILTERED:
1740       llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1741     }
1742   }
1743 
1744   // At the end, if we're still in ALL_SET or MIXED states, report a region
1745   switch (RA) {
1746   case ATTR_NONE:
1747     break;
1748   case ATTR_FILTERED:
1749     break;
1750   case ATTR_ALL_SET:
1751     reportRegion(RA, StartBit, BitIndex, AllowMixed);
1752     break;
1753   case ATTR_ALL_UNSET:
1754     break;
1755   case ATTR_MIXED:
1756     reportRegion(RA, StartBit, BitIndex, AllowMixed);
1757     break;
1758   }
1759 
1760   // We have finished with the filter processings.  Now it's time to choose
1761   // the best performing filter.
1762   BestIndex = 0;
1763   bool AllUseless = true;
1764   unsigned BestScore = 0;
1765 
1766   for (const auto &[Idx, Filter] : enumerate(Filters)) {
1767     unsigned Usefulness = Filter.usefulness();
1768 
1769     if (Usefulness)
1770       AllUseless = false;
1771 
1772     if (Usefulness > BestScore) {
1773       BestIndex = Idx;
1774       BestScore = Usefulness;
1775     }
1776   }
1777 
1778   if (!AllUseless)
1779     bestFilter().recurse();
1780 
1781   return !AllUseless;
1782 } // end of FilterChooser::filterProcessor(bool)
1783 
1784 // Decides on the best configuration of filter(s) to use in order to decode
1785 // the instructions.  A conflict of instructions may occur, in which case we
1786 // dump the conflict set to the standard error.
1787 void FilterChooser::doFilter() {
1788   unsigned Num = Opcodes.size();
1789   assert(Num && "FilterChooser created with no instructions");
1790 
1791   // Try regions of consecutive known bit values first.
1792   if (filterProcessor(false))
1793     return;
1794 
1795   // Then regions of mixed bits (both known and unitialized bit values allowed).
1796   if (filterProcessor(true))
1797     return;
1798 
1799   // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1800   // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1801   // well-known encoding pattern.  In such case, we backtrack and scan for the
1802   // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1803   if (Num == 3 && filterProcessor(true, false))
1804     return;
1805 
1806   // If we come to here, the instruction decoding has failed.
1807   // Set the BestIndex to -1 to indicate so.
1808   BestIndex = -1;
1809 }
1810 
1811 // emitTableEntries - Emit state machine entries to decode our share of
1812 // instructions.
1813 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1814   if (Opcodes.size() == 1) {
1815     // There is only one instruction in the set, which is great!
1816     // Call emitSingletonDecoder() to see whether there are any remaining
1817     // encodings bits.
1818     emitSingletonTableEntry(TableInfo, Opcodes[0]);
1819     return;
1820   }
1821 
1822   // Choose the best filter to do the decodings!
1823   if (BestIndex != -1) {
1824     const Filter &Best = Filters[BestIndex];
1825     if (Best.getNumFiltered() == 1)
1826       emitSingletonTableEntry(TableInfo, Best);
1827     else
1828       Best.emitTableEntry(TableInfo);
1829     return;
1830   }
1831 
1832   // We don't know how to decode these instructions! Dump the
1833   // conflict set and bail.
1834 
1835   // Print out useful conflict information for postmortem analysis.
1836   errs() << "Decoding Conflict:\n";
1837 
1838   dumpStack(errs(), "\t\t");
1839 
1840   for (auto Opcode : Opcodes) {
1841     const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID];
1842     errs() << '\t' << Enc << ' ';
1843     dumpBits(errs(), getBitsField(*Enc.EncodingDef, "Inst"));
1844     errs() << '\n';
1845   }
1846   PrintFatalError("Decoding conflict encountered");
1847 }
1848 
1849 static std::string findOperandDecoderMethod(const Record *Record) {
1850   std::string Decoder;
1851 
1852   const RecordVal *DecoderString = Record->getValue("DecoderMethod");
1853   const StringInit *String =
1854       DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
1855   if (String) {
1856     Decoder = String->getValue().str();
1857     if (!Decoder.empty())
1858       return Decoder;
1859   }
1860 
1861   if (Record->isSubClassOf("RegisterOperand"))
1862     // Allows use of a DecoderMethod in referenced RegisterClass if set.
1863     return findOperandDecoderMethod(Record->getValueAsDef("RegClass"));
1864 
1865   if (Record->isSubClassOf("RegisterClass")) {
1866     Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1867   } else if (Record->isSubClassOf("PointerLikeRegClass")) {
1868     Decoder = "DecodePointerLikeRegClass" +
1869               utostr(Record->getValueAsInt("RegClassKind"));
1870   }
1871 
1872   return Decoder;
1873 }
1874 
1875 OperandInfo getOpInfo(const Record *TypeRecord) {
1876   const RecordVal *HasCompleteDecoderVal =
1877       TypeRecord->getValue("hasCompleteDecoder");
1878   const BitInit *HasCompleteDecoderBit =
1879       HasCompleteDecoderVal
1880           ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
1881           : nullptr;
1882   bool HasCompleteDecoder =
1883       HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1884 
1885   return OperandInfo(findOperandDecoderMethod(TypeRecord), HasCompleteDecoder);
1886 }
1887 
1888 static void parseVarLenInstOperand(const Record &Def,
1889                                    std::vector<OperandInfo> &Operands,
1890                                    const CodeGenInstruction &CGI) {
1891 
1892   const RecordVal *RV = Def.getValue("Inst");
1893   VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
1894   SmallVector<int> TiedTo;
1895 
1896   for (const auto &[Idx, Op] : enumerate(CGI.Operands)) {
1897     if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
1898       for (auto *Arg : Op.MIOperandInfo->getArgs())
1899         Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
1900     else
1901       Operands.push_back(getOpInfo(Op.Rec));
1902 
1903     int TiedReg = Op.getTiedRegister();
1904     TiedTo.push_back(-1);
1905     if (TiedReg != -1) {
1906       TiedTo[Idx] = TiedReg;
1907       TiedTo[TiedReg] = Idx;
1908     }
1909   }
1910 
1911   unsigned CurrBitPos = 0;
1912   for (const auto &EncodingSegment : VLI) {
1913     unsigned Offset = 0;
1914     StringRef OpName;
1915 
1916     if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
1917       OpName = SI->getValue();
1918     } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
1919       OpName = cast<StringInit>(DI->getArg(0))->getValue();
1920       Offset = cast<IntInit>(DI->getArg(2))->getValue();
1921     }
1922 
1923     if (!OpName.empty()) {
1924       auto OpSubOpPair =
1925           const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1926               OpName);
1927       unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
1928       Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1929       if (!EncodingSegment.CustomDecoder.empty())
1930         Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1931 
1932       int TiedReg = TiedTo[OpSubOpPair.first];
1933       if (TiedReg != -1) {
1934         unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1935             {TiedReg, OpSubOpPair.second});
1936         Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1937       }
1938     }
1939 
1940     CurrBitPos += EncodingSegment.BitWidth;
1941   }
1942 }
1943 
1944 static void debugDumpRecord(const Record &Rec) {
1945   // Dump the record, so we can see what's going on.
1946   PrintNote([&Rec](raw_ostream &OS) {
1947     OS << "Dumping record for previous error:\n";
1948     OS << Rec;
1949   });
1950 }
1951 
1952 /// For an operand field named OpName: populate OpInfo.InitValue with the
1953 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1954 /// insert from the decoded instruction.
1955 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1956                                 std::map<StringRef, StringRef> &TiedNames,
1957                                 StringRef OpName, OperandInfo &OpInfo) {
1958   // Some bits of the operand may be required to be 1 depending on the
1959   // instruction's encoding. Collect those bits.
1960   if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName))
1961     if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
1962       for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
1963         if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
1964           if (OpBit->getValue())
1965             OpInfo.InitValue |= 1ULL << I;
1966 
1967   for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) {
1968     const VarInit *Var;
1969     unsigned Offset = 0;
1970     for (; J != Bits.getNumBits(); ++J) {
1971       const VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J));
1972       if (BJ) {
1973         Var = dyn_cast<VarInit>(BJ->getBitVar());
1974         if (I == J)
1975           Offset = BJ->getBitNum();
1976         else if (BJ->getBitNum() != Offset + J - I)
1977           break;
1978       } else {
1979         Var = dyn_cast<VarInit>(Bits.getBit(J));
1980       }
1981       if (!Var ||
1982           (Var->getName() != OpName && Var->getName() != TiedNames[OpName]))
1983         break;
1984     }
1985     if (I == J)
1986       ++J;
1987     else
1988       OpInfo.addField(I, J - I, Offset);
1989   }
1990 }
1991 
1992 static unsigned
1993 populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
1994                     const CodeGenInstruction &CGI, unsigned Opc,
1995                     std::map<unsigned, std::vector<OperandInfo>> &Operands,
1996                     bool IsVarLenInst) {
1997   const Record &Def = *CGI.TheDef;
1998   // If all the bit positions are not specified; do not decode this instruction.
1999   // We are bound to fail!  For proper disassembly, the well-known encoding bits
2000   // of the instruction must be fully specified.
2001 
2002   const BitsInit &Bits = getBitsField(EncodingDef, "Inst");
2003   if (Bits.allInComplete())
2004     return 0;
2005 
2006   std::vector<OperandInfo> InsnOperands;
2007 
2008   // If the instruction has specified a custom decoding hook, use that instead
2009   // of trying to auto-generate the decoder.
2010   StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
2011   if (!InstDecoder.empty()) {
2012     bool HasCompleteInstDecoder =
2013         EncodingDef.getValueAsBit("hasCompleteDecoder");
2014     InsnOperands.push_back(
2015         OperandInfo(InstDecoder.str(), HasCompleteInstDecoder));
2016     Operands[Opc] = std::move(InsnOperands);
2017     return Bits.getNumBits();
2018   }
2019 
2020   // Generate a description of the operand of the instruction that we know
2021   // how to decode automatically.
2022   // FIXME: We'll need to have a way to manually override this as needed.
2023 
2024   // Gather the outputs/inputs of the instruction, so we can find their
2025   // positions in the encoding.  This assumes for now that they appear in the
2026   // MCInst in the order that they're listed.
2027   std::vector<std::pair<const Init *, StringRef>> InOutOperands;
2028   const DagInit *Out = Def.getValueAsDag("OutOperandList");
2029   const DagInit *In = Def.getValueAsDag("InOperandList");
2030   for (const auto &[Idx, Arg] : enumerate(Out->getArgs()))
2031     InOutOperands.emplace_back(Arg, Out->getArgNameStr(Idx));
2032   for (const auto &[Idx, Arg] : enumerate(In->getArgs()))
2033     InOutOperands.emplace_back(Arg, In->getArgNameStr(Idx));
2034 
2035   // Search for tied operands, so that we can correctly instantiate
2036   // operands that are not explicitly represented in the encoding.
2037   std::map<StringRef, StringRef> TiedNames;
2038   for (const auto &Op : CGI.Operands) {
2039     for (const auto &[J, CI] : enumerate(Op.Constraints)) {
2040       if (!CI.isTied())
2041         continue;
2042       std::pair<unsigned, unsigned> SO =
2043           CGI.Operands.getSubOperandNumber(CI.getTiedOperand());
2044       StringRef TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
2045       if (TiedName.empty())
2046         TiedName = CGI.Operands[SO.first].Name;
2047       StringRef MyName = Op.SubOpNames[J];
2048       if (MyName.empty())
2049         MyName = Op.Name;
2050 
2051       TiedNames[MyName] = TiedName;
2052       TiedNames[TiedName] = MyName;
2053     }
2054   }
2055 
2056   if (IsVarLenInst) {
2057     parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
2058   } else {
2059     // For each operand, see if we can figure out where it is encoded.
2060     for (const auto &Op : InOutOperands) {
2061       const Init *OpInit = Op.first;
2062       StringRef OpName = Op.second;
2063 
2064       // We're ready to find the instruction encoding locations for this
2065       // operand.
2066 
2067       // First, find the operand type ("OpInit"), and sub-op names
2068       // ("SubArgDag") if present.
2069       const DagInit *SubArgDag = dyn_cast<DagInit>(OpInit);
2070       if (SubArgDag)
2071         OpInit = SubArgDag->getOperator();
2072       const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
2073       // Lookup the sub-operands from the operand type record (note that only
2074       // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2075       const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
2076                                   ? OpTypeRec->getValueAsDag("MIOperandInfo")
2077                                   : nullptr;
2078 
2079       // Lookup the decoder method and construct a new OperandInfo to hold our
2080       // result.
2081       OperandInfo OpInfo = getOpInfo(OpTypeRec);
2082 
2083       // If we have named sub-operands...
2084       if (SubArgDag) {
2085         // Then there should not be a custom decoder specified on the top-level
2086         // type.
2087         if (!OpInfo.Decoder.empty()) {
2088           PrintError(EncodingDef.getLoc(),
2089                      "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2090                          OpInit->getAsString() +
2091                          "\" with a custom DecoderMethod, but also named "
2092                          "sub-operands.");
2093           continue;
2094         }
2095 
2096         // Decode each of the sub-ops separately.
2097         assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2098         for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) {
2099           StringRef SubOpName = SubArgDag->getArgNameStr(I);
2100           OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef());
2101 
2102           addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName,
2103                               SubOpInfo);
2104           InsnOperands.push_back(std::move(SubOpInfo));
2105         }
2106         continue;
2107       }
2108 
2109       // Otherwise, if we have an operand with sub-operands, but they aren't
2110       // named...
2111       if (SubOps && OpInfo.Decoder.empty()) {
2112         // If it's a single sub-operand, and no custom decoder, use the decoder
2113         // from the one sub-operand.
2114         if (SubOps->getNumArgs() == 1)
2115           OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef());
2116 
2117         // If we have multiple sub-ops, there'd better have a custom
2118         // decoder. (Otherwise we don't know how to populate them properly...)
2119         if (SubOps->getNumArgs() > 1) {
2120           PrintError(EncodingDef.getLoc(),
2121                      "DecoderEmitter: operand \"" + OpName +
2122                          "\" uses MIOperandInfo with multiple ops, but doesn't "
2123                          "have a custom decoder!");
2124           debugDumpRecord(EncodingDef);
2125           continue;
2126         }
2127       }
2128 
2129       addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2130       // FIXME: it should be an error not to find a definition for a given
2131       // operand, rather than just failing to add it to the resulting
2132       // instruction! (This is a longstanding bug, which will be addressed in an
2133       // upcoming change.)
2134       if (OpInfo.numFields() > 0)
2135         InsnOperands.push_back(std::move(OpInfo));
2136     }
2137   }
2138   Operands[Opc] = std::move(InsnOperands);
2139 
2140 #if 0
2141   LLVM_DEBUG({
2142       // Dumps the instruction encoding bits.
2143       dumpBits(errs(), Bits);
2144 
2145       errs() << '\n';
2146 
2147       // Dumps the list of operand info.
2148       for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
2149         const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2150         const std::string &OperandName = Info.Name;
2151         const Record &OperandDef = *Info.Rec;
2152 
2153         errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2154       }
2155     });
2156 #endif
2157 
2158   return Bits.getNumBits();
2159 }
2160 
2161 // emitFieldFromInstruction - Emit the templated helper function
2162 // fieldFromInstruction().
2163 // On Windows we make sure that this function is not inlined when
2164 // using the VS compiler. It has a bug which causes the function
2165 // to be optimized out in some circumstances. See llvm.org/pr38292
2166 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2167   OS << R"(
2168 // Helper functions for extracting fields from encoded instructions.
2169 // InsnType must either be integral or an APInt-like object that must:
2170 // * be default-constructible and copy-constructible
2171 // * Support extractBitsAsZExtValue(numBits, startBit)
2172 // * Support the ~, &, ==, and != operators with other objects of the same type
2173 // * Support the != and bitwise & with uint64_t
2174 template <typename InsnType>
2175 #if defined(_MSC_VER) && !defined(__clang__)
2176 __declspec(noinline)
2177 #endif
2178 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2179 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2180                      unsigned numBits) {
2181   assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2182   assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2183          "Instruction field out of bounds!");
2184   InsnType fieldMask;
2185   if (numBits == sizeof(InsnType) * 8)
2186     fieldMask = (InsnType)(-1LL);
2187   else
2188     fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2189   return (insn & fieldMask) >> startBit;
2190 }
2191 
2192 template <typename InsnType>
2193 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2194 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2195                      unsigned numBits) {
2196   return insn.extractBitsAsZExtValue(numBits, startBit);
2197 }
2198 )";
2199 }
2200 
2201 // emitInsertBits - Emit the templated helper function insertBits().
2202 static void emitInsertBits(formatted_raw_ostream &OS) {
2203   OS << R"(
2204 // Helper function for inserting bits extracted from an encoded instruction into
2205 // an integer-typed field.
2206 template <typename IntType>
2207 static std::enable_if_t<std::is_integral_v<IntType>, void>
2208 insertBits(IntType &field, IntType bits, unsigned startBit, unsigned numBits) {
2209   // Check that no bit beyond numBits is set, so that a simple bitwise |
2210   // is sufficient.
2211   assert((~(((IntType)1 << numBits) - 1) & bits) == 0 &&
2212            "bits has more than numBits bits set");
2213   assert(startBit + numBits <= sizeof(IntType) * 8);
2214   (void)numBits;
2215   field |= bits << startBit;
2216 }
2217 )";
2218 }
2219 
2220 // emitDecodeInstruction - Emit the templated helper function
2221 // decodeInstruction().
2222 static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
2223                                   unsigned OpcodeMask) {
2224   const bool HasTryDecode = OpcodeMask & ((1 << MCD::OPC_TryDecode) |
2225                                           (1 << MCD::OPC_TryDecodeOrFail));
2226   const bool HasCheckPredicate =
2227       OpcodeMask &
2228       ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2229   const bool HasSoftFail = OpcodeMask & (1 << MCD::OPC_SoftFail);
2230 
2231   OS << R"(
2232 static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
2233   unsigned NumToSkip = *Ptr++;
2234   NumToSkip |= (*Ptr++) << 8;
2235 )";
2236   if (getNumToSkipInBytes() == 3)
2237     OS << "  NumToSkip |= (*Ptr++) << 16;\n";
2238   OS << R"(  return NumToSkip;
2239 }
2240 
2241 template <typename InsnType>
2242 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2243                                       InsnType insn, uint64_t Address,
2244                                       const MCDisassembler *DisAsm,
2245                                       const MCSubtargetInfo &STI)";
2246   if (IsVarLenInst) {
2247     OS << ",\n                                      "
2248           "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2249   }
2250   OS << ") {\n";
2251   if (HasCheckPredicate)
2252     OS << "  const FeatureBitset &Bits = STI.getFeatureBits();\n";
2253 
2254   OS << R"(
2255   const uint8_t *Ptr = DecodeTable;
2256   uint64_t CurFieldValue = 0;
2257   DecodeStatus S = MCDisassembler::Success;
2258   while (true) {
2259     ptrdiff_t Loc = Ptr - DecodeTable;
2260     const uint8_t DecoderOp = *Ptr++;
2261     switch (DecoderOp) {
2262     default:
2263       errs() << Loc << ": Unexpected decode table opcode: "
2264              << (int)DecoderOp << '\n';
2265       return MCDisassembler::Fail;
2266     case MCD::OPC_ExtractField: {
2267       // Decode the start value.
2268       unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2269       unsigned Len = *Ptr++;)";
2270   if (IsVarLenInst)
2271     OS << "\n      makeUp(insn, Start + Len);";
2272   OS << R"(
2273       CurFieldValue = fieldFromInstruction(insn, Start, Len);
2274       LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2275                    << Len << "): " << CurFieldValue << "\n");
2276       break;
2277     }
2278     case MCD::OPC_FilterValue:
2279     case MCD::OPC_FilterValueOrFail: {
2280       bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
2281       // Decode the field value.
2282       uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
2283       bool Failed = Val != CurFieldValue;
2284       unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2285 
2286       // Note: Print NumToSkip even for OPC_FilterValueOrFail to simplify debug
2287       // prints.
2288       LLVM_DEBUG({
2289         StringRef OpName = IsFail ? "OPC_FilterValueOrFail" : "OPC_FilterValue";
2290         dbgs() << Loc << ": " << OpName << '(' << Val << ", " << NumToSkip
2291                 << ") " << (Failed ? "FAIL:" : "PASS:")
2292                 << " continuing at " << (Ptr - DecodeTable) << '\n';
2293       });
2294 
2295       // Perform the filter operation.
2296       if (Failed) {
2297         if (IsFail)
2298           return MCDisassembler::Fail;
2299         Ptr += NumToSkip;
2300       }
2301       break;
2302     }
2303     case MCD::OPC_CheckField:
2304     case MCD::OPC_CheckFieldOrFail: {
2305       bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
2306       // Decode the start value.
2307       unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2308       unsigned Len = *Ptr;)";
2309   if (IsVarLenInst)
2310     OS << "\n      makeUp(insn, Start + Len);";
2311   OS << R"(
2312       uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2313       // Decode the field value.
2314       unsigned PtrLen = 0;
2315       uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2316       Ptr += PtrLen;
2317       bool Failed = ExpectedValue != FieldValue;
2318       unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2319 
2320       LLVM_DEBUG({
2321         StringRef OpName = IsFail ? "OPC_CheckFieldOrFail" : "OPC_CheckField";
2322         dbgs() << Loc << ": " << OpName << '(' << Start << ", " << Len << ", "
2323                 << ExpectedValue << ", " << NumToSkip << "): FieldValue = "
2324                 << FieldValue << ", ExpectedValue = " << ExpectedValue << ": "
2325                 << (Failed ? "FAIL\n" : "PASS\n");
2326       });
2327 
2328       // If the actual and expected values don't match, skip or fail.
2329       if (Failed) {
2330         if (IsFail)
2331           return MCDisassembler::Fail;
2332         Ptr += NumToSkip;
2333       }
2334       break;
2335     })";
2336   if (HasCheckPredicate) {
2337     OS << R"(
2338     case MCD::OPC_CheckPredicate:
2339     case MCD::OPC_CheckPredicateOrFail: {
2340       bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
2341       // Decode the Predicate Index value.
2342       unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
2343       unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2344       // Check the predicate.
2345       bool Failed = !checkDecoderPredicate(PIdx, Bits);
2346 
2347       LLVM_DEBUG({
2348         StringRef OpName = IsFail ? "OPC_CheckPredicateOrFail" : "OPC_CheckPredicate";
2349         dbgs() << Loc << ": " << OpName << '(' << PIdx << ", " << NumToSkip
2350                << "): " << (Failed ? "FAIL\n" : "PASS\n");
2351       });
2352 
2353       if (Failed) {
2354         if (IsFail)
2355           return MCDisassembler::Fail;
2356         Ptr += NumToSkip;
2357       }
2358       break;
2359     })";
2360   }
2361   OS << R"(
2362     case MCD::OPC_Decode: {
2363       // Decode the Opcode value.
2364       unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2365       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2366 
2367       MI.clear();
2368       MI.setOpcode(Opc);
2369       bool DecodeComplete;)";
2370   if (IsVarLenInst) {
2371     OS << "\n      unsigned Len = InstrLenTable[Opc];\n"
2372        << "      makeUp(insn, Len);";
2373   }
2374   OS << R"(
2375       S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
2376       assert(DecodeComplete);
2377 
2378       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2379                    << ", using decoder " << DecodeIdx << ": "
2380                    << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2381       return S;
2382     })";
2383   if (HasTryDecode) {
2384     OS << R"(
2385     case MCD::OPC_TryDecode:
2386     case MCD::OPC_TryDecodeOrFail: {
2387       bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
2388       // Decode the Opcode value.
2389       unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2390       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2391       unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2392 
2393       // Perform the decode operation.
2394       MCInst TmpMI;
2395       TmpMI.setOpcode(Opc);
2396       bool DecodeComplete;
2397       S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
2398       LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2399                    << ", using decoder " << DecodeIdx << ": ");
2400 
2401       if (DecodeComplete) {
2402         // Decoding complete.
2403         LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2404         MI = TmpMI;
2405         return S;
2406       }
2407       assert(S == MCDisassembler::Fail);
2408       if (IsFail) {
2409         LLVM_DEBUG(dbgs() << "FAIL: returning FAIL\n");
2410         return MCDisassembler::Fail;
2411       }
2412       // If the decoding was incomplete, skip.
2413       Ptr += NumToSkip;
2414       LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2415       // Reset decode status. This also drops a SoftFail status that could be
2416       // set before the decode attempt.
2417       S = MCDisassembler::Success;
2418       break;
2419     })";
2420   }
2421   if (HasSoftFail) {
2422     OS << R"(
2423       case MCD::OPC_SoftFail: {
2424         // Decode the mask values.
2425         uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
2426         uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2427         bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
2428         if (Failed)
2429           S = MCDisassembler::SoftFail;
2430         LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
2431         break;
2432   })";
2433   }
2434   OS << R"(
2435     case MCD::OPC_Fail: {
2436       LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2437       return MCDisassembler::Fail;
2438     }
2439     }
2440   }
2441   llvm_unreachable("bogosity detected in disassembler state machine!");
2442 }
2443 
2444 )";
2445 }
2446 
2447 // Helper to propagate SoftFail status. Returns false if the status is Fail;
2448 // callers are expected to early-exit in that condition. (Note, the '&' operator
2449 // is correct to propagate the values of this enum; see comment on 'enum
2450 // DecodeStatus'.)
2451 static void emitCheck(formatted_raw_ostream &OS) {
2452   OS << R"(
2453 static bool Check(DecodeStatus &Out, DecodeStatus In) {
2454   Out = static_cast<DecodeStatus>(Out & In);
2455   return Out != MCDisassembler::Fail;
2456 }
2457 
2458 )";
2459 }
2460 
2461 // Collect all HwModes referenced by the target for encoding purposes,
2462 // returning a vector of corresponding names.
2463 static void collectHwModesReferencedForEncodings(
2464     const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2465     NamespacesHwModesMap &NamespacesWithHwModes) {
2466   SmallBitVector BV(HWM.getNumModeIds());
2467   for (const auto &MS : HWM.getHwModeSelects()) {
2468     for (const HwModeSelect::PairType &P : MS.second.Items) {
2469       if (P.second->isSubClassOf("InstructionEncoding")) {
2470         std::string DecoderNamespace =
2471             P.second->getValueAsString("DecoderNamespace").str();
2472         if (P.first == DefaultMode) {
2473           NamespacesWithHwModes[DecoderNamespace].insert("");
2474         } else {
2475           NamespacesWithHwModes[DecoderNamespace].insert(
2476               HWM.getMode(P.first).Name);
2477         }
2478         BV.set(P.first);
2479       }
2480     }
2481   }
2482   transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) {
2483     if (M == DefaultMode)
2484       return StringRef("");
2485     return HWM.getModeName(M, /*IncludeDefault=*/true);
2486   });
2487 }
2488 
2489 static void
2490 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2491                                 ArrayRef<StringRef> HwModeNames,
2492                                 NamespacesHwModesMap &NamespacesWithHwModes,
2493                                 std::vector<EncodingAndInst> &GlobalEncodings) {
2494   const Record *InstDef = Instr->TheDef;
2495 
2496   switch (DecoderEmitterSuppressDuplicates) {
2497   case SUPPRESSION_DISABLE: {
2498     for (StringRef HwModeName : HwModeNames)
2499       GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2500     break;
2501   }
2502   case SUPPRESSION_LEVEL1: {
2503     std::string DecoderNamespace =
2504         InstDef->getValueAsString("DecoderNamespace").str();
2505     auto It = NamespacesWithHwModes.find(DecoderNamespace);
2506     if (It != NamespacesWithHwModes.end()) {
2507       for (StringRef HwModeName : It->second)
2508         GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2509     } else {
2510       // Only emit the encoding once, as it's DecoderNamespace doesn't
2511       // contain any HwModes.
2512       GlobalEncodings.emplace_back(InstDef, Instr, "");
2513     }
2514     break;
2515   }
2516   case SUPPRESSION_LEVEL2:
2517     GlobalEncodings.emplace_back(InstDef, Instr, "");
2518     break;
2519   }
2520 }
2521 
2522 // Emits disassembler code for instruction decoding.
2523 void DecoderEmitter::run(raw_ostream &o) {
2524   formatted_raw_ostream OS(o);
2525   OS << R"(
2526 #include "llvm/MC/MCInst.h"
2527 #include "llvm/MC/MCSubtargetInfo.h"
2528 #include "llvm/Support/DataTypes.h"
2529 #include "llvm/Support/Debug.h"
2530 #include "llvm/Support/LEB128.h"
2531 #include "llvm/Support/raw_ostream.h"
2532 #include "llvm/TargetParser/SubtargetFeature.h"
2533 #include <assert.h>
2534 
2535 namespace {
2536 )";
2537 
2538   emitFieldFromInstruction(OS);
2539   emitInsertBits(OS);
2540   emitCheck(OS);
2541 
2542   Target.reverseBitsForLittleEndianEncoding();
2543 
2544   // Parameterize the decoders based on namespace and instruction width.
2545 
2546   // First, collect all encoding-related HwModes referenced by the target.
2547   // And establish a mapping table between DecoderNamespace and HwMode.
2548   // If HwModeNames is empty, add the empty string so we always have one HwMode.
2549   const CodeGenHwModes &HWM = Target.getHwModes();
2550   std::vector<StringRef> HwModeNames;
2551   NamespacesHwModesMap NamespacesWithHwModes;
2552   collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes);
2553   if (HwModeNames.empty())
2554     HwModeNames.push_back("");
2555 
2556   const auto &NumberedInstructions = Target.getInstructions();
2557   NumberedEncodings.reserve(NumberedInstructions.size());
2558   for (const auto &NumberedInstruction : NumberedInstructions) {
2559     const Record *InstDef = NumberedInstruction->TheDef;
2560     if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) {
2561       if (const DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
2562         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2563         for (auto &[ModeId, Encoding] : EBM) {
2564           // DecoderTables with DefaultMode should not have any suffix.
2565           if (ModeId == DefaultMode) {
2566             NumberedEncodings.emplace_back(Encoding, NumberedInstruction, "");
2567           } else {
2568             NumberedEncodings.emplace_back(Encoding, NumberedInstruction,
2569                                            HWM.getMode(ModeId).Name);
2570           }
2571         }
2572         continue;
2573       }
2574     }
2575     // This instruction is encoded the same on all HwModes.
2576     // According to user needs, provide varying degrees of suppression.
2577     handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames,
2578                                     NamespacesWithHwModes, NumberedEncodings);
2579   }
2580   for (const Record *NumberedAlias :
2581        RK.getAllDerivedDefinitions("AdditionalEncoding"))
2582     NumberedEncodings.emplace_back(
2583         NumberedAlias,
2584         &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
2585 
2586   std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2587       OpcMap;
2588   std::map<unsigned, std::vector<OperandInfo>> Operands;
2589   std::vector<unsigned> InstrLen;
2590   bool IsVarLenInst = Target.hasVariableLengthEncodings();
2591   unsigned MaxInstLen = 0;
2592 
2593   for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
2594     const Record *EncodingDef = NumberedEncoding.EncodingDef;
2595     const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2596     const Record *Def = Inst->TheDef;
2597     unsigned Size = EncodingDef->getValueAsInt("Size");
2598     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
2599         Def->getValueAsBit("isPseudo") ||
2600         Def->getValueAsBit("isAsmParserOnly") ||
2601         Def->getValueAsBit("isCodeGenOnly")) {
2602       NumEncodingsLackingDisasm++;
2603       continue;
2604     }
2605 
2606     if (NEI < NumberedInstructions.size())
2607       NumInstructions++;
2608     NumEncodings++;
2609 
2610     if (!Size && !IsVarLenInst)
2611       continue;
2612 
2613     if (IsVarLenInst)
2614       InstrLen.resize(NumberedInstructions.size(), 0);
2615 
2616     if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI,
2617                                            Operands, IsVarLenInst)) {
2618       if (IsVarLenInst) {
2619         MaxInstLen = std::max(MaxInstLen, Len);
2620         InstrLen[NEI] = Len;
2621       }
2622       std::string DecoderNamespace =
2623           EncodingDef->getValueAsString("DecoderNamespace").str();
2624       if (!NumberedEncoding.HwModeName.empty())
2625         DecoderNamespace += "_" + NumberedEncoding.HwModeName.str();
2626       OpcMap[{DecoderNamespace, Size}].emplace_back(
2627           NEI, Target.getInstrIntValue(Def));
2628     } else {
2629       NumEncodingsOmitted++;
2630     }
2631   }
2632 
2633   DecoderTableInfo TableInfo;
2634   unsigned OpcodeMask = 0;
2635   for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
2636     const std::string &DecoderNamespace = NSAndByteSize.first;
2637     const unsigned BitWidth = 8 * NSAndByteSize.second;
2638     // Emit the decoder for this namespace+width combination.
2639     FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
2640                      IsVarLenInst ? MaxInstLen : BitWidth, this);
2641 
2642     // The decode table is cleared for each top level decoder function. The
2643     // predicates and decoders themselves, however, are shared across all
2644     // decoders to give more opportunities for uniqueing.
2645     TableInfo.Table.clear();
2646     TableInfo.FixupStack.clear();
2647     TableInfo.FixupStack.emplace_back();
2648     FC.emitTableEntries(TableInfo);
2649     // Any NumToSkip fixups in the top level scope can resolve to the
2650     // OPC_Fail at the end of the table.
2651     assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
2652     // Resolve any NumToSkip fixups in the current scope.
2653     resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
2654                        TableInfo.Table.size());
2655     TableInfo.FixupStack.clear();
2656 
2657     TableInfo.Table.push_back(MCD::OPC_Fail);
2658 
2659     // Print the table to the output stream.
2660     OpcodeMask |= emitTable(OS, TableInfo.Table, FC.getBitWidth(),
2661                             DecoderNamespace, EncodingIDs);
2662   }
2663 
2664   // For variable instruction, we emit a instruction length table
2665   // to let the decoder know how long the instructions are.
2666   // You can see example usage in M68k's disassembler.
2667   if (IsVarLenInst)
2668     emitInstrLenTable(OS, InstrLen);
2669 
2670   const bool HasCheckPredicate =
2671       OpcodeMask &
2672       ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2673 
2674   // Emit the predicate function.
2675   if (HasCheckPredicate)
2676     emitPredicateFunction(OS, TableInfo.Predicates);
2677 
2678   // Emit the decoder function.
2679   emitDecoderFunction(OS, TableInfo.Decoders);
2680 
2681   // Emit the main entry point for the decoder, decodeInstruction().
2682   emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
2683 
2684   OS << "\n} // namespace\n";
2685 }
2686 
2687 void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS,
2688                        StringRef PredicateNamespace) {
2689   DecoderEmitter(RK, PredicateNamespace).run(OS);
2690 }
2691