xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/DecoderEmitter.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // It contains the tablegen backend that emits the decoder functions for
10 // targets with fixed/variable length instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Common/CodeGenHwModes.h"
15 #include "Common/CodeGenInstruction.h"
16 #include "Common/CodeGenTarget.h"
17 #include "Common/InfoByHwMode.h"
18 #include "Common/VarLenCodeEmitterGen.h"
19 #include "TableGenBackends.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/CachedHashString.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/FormattedStream.h"
36 #include "llvm/Support/LEB128.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/TableGen/Error.h"
39 #include "llvm/TableGen/Record.h"
40 #include <algorithm>
41 #include <cassert>
42 #include <cstddef>
43 #include <cstdint>
44 #include <map>
45 #include <memory>
46 #include <set>
47 #include <string>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #define DEBUG_TYPE "decoder-emitter"
54 
55 extern cl::OptionCategory DisassemblerEmitterCat;
56 
57 enum SuppressLevel {
58   SUPPRESSION_DISABLE,
59   SUPPRESSION_LEVEL1,
60   SUPPRESSION_LEVEL2
61 };
62 
63 cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
64     "suppress-per-hwmode-duplicates",
65     cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
66     cl::values(
67         clEnumValN(
68             SUPPRESSION_DISABLE, "O0",
69             "Do not prevent DecoderTable duplications caused by HwModes"),
70         clEnumValN(
71             SUPPRESSION_LEVEL1, "O1",
72             "Remove duplicate DecoderTable entries generated due to HwModes"),
73         clEnumValN(
74             SUPPRESSION_LEVEL2, "O2",
75             "Extract HwModes-specific instructions into new DecoderTables, "
76             "significantly reducing Table Duplications")),
77     cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
78 
79 namespace {
80 
81 STATISTIC(NumEncodings, "Number of encodings considered");
82 STATISTIC(NumEncodingsLackingDisasm,
83           "Number of encodings without disassembler info");
84 STATISTIC(NumInstructions, "Number of instructions considered");
85 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
86 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
87 
88 struct EncodingField {
89   unsigned Base, Width, Offset;
EncodingField__anonfc09cd8b0111::EncodingField90   EncodingField(unsigned B, unsigned W, unsigned O)
91       : Base(B), Width(W), Offset(O) {}
92 };
93 
94 struct OperandInfo {
95   std::vector<EncodingField> Fields;
96   std::string Decoder;
97   bool HasCompleteDecoder;
98   uint64_t InitValue;
99 
OperandInfo__anonfc09cd8b0111::OperandInfo100   OperandInfo(std::string D, bool HCD)
101       : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {}
102 
addField__anonfc09cd8b0111::OperandInfo103   void addField(unsigned Base, unsigned Width, unsigned Offset) {
104     Fields.push_back(EncodingField(Base, Width, Offset));
105   }
106 
numFields__anonfc09cd8b0111::OperandInfo107   unsigned numFields() const { return Fields.size(); }
108 
109   typedef std::vector<EncodingField>::const_iterator const_iterator;
110 
begin__anonfc09cd8b0111::OperandInfo111   const_iterator begin() const { return Fields.begin(); }
end__anonfc09cd8b0111::OperandInfo112   const_iterator end() const { return Fields.end(); }
113 };
114 
115 typedef std::vector<uint8_t> DecoderTable;
116 typedef uint32_t DecoderFixup;
117 typedef std::vector<DecoderFixup> FixupList;
118 typedef std::vector<FixupList> FixupScopeList;
119 typedef SmallSetVector<CachedHashString, 16> PredicateSet;
120 typedef SmallSetVector<CachedHashString, 16> DecoderSet;
121 struct DecoderTableInfo {
122   DecoderTable Table;
123   FixupScopeList FixupStack;
124   PredicateSet Predicates;
125   DecoderSet Decoders;
126 };
127 
128 struct EncodingAndInst {
129   const Record *EncodingDef;
130   const CodeGenInstruction *Inst;
131   StringRef HwModeName;
132 
EncodingAndInst__anonfc09cd8b0111::EncodingAndInst133   EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
134                   StringRef HwModeName = "")
135       : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
136 };
137 
138 struct EncodingIDAndOpcode {
139   unsigned EncodingID;
140   unsigned Opcode;
141 
EncodingIDAndOpcode__anonfc09cd8b0111::EncodingIDAndOpcode142   EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
EncodingIDAndOpcode__anonfc09cd8b0111::EncodingIDAndOpcode143   EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
144       : EncodingID(EncodingID), Opcode(Opcode) {}
145 };
146 
147 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
148 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
149 
operator <<(raw_ostream & OS,const EncodingAndInst & Value)150 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
151   if (Value.EncodingDef != Value.Inst->TheDef)
152     OS << Value.EncodingDef->getName() << ":";
153   OS << Value.Inst->TheDef->getName();
154   return OS;
155 }
156 
157 class DecoderEmitter {
158   RecordKeeper &RK;
159   std::vector<EncodingAndInst> NumberedEncodings;
160 
161 public:
DecoderEmitter(RecordKeeper & R,std::string PredicateNamespace)162   DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace)
163       : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {}
164 
165   // Emit the decoder state machine table.
166   void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
167                  unsigned Indentation, unsigned BitWidth, StringRef Namespace,
168                  const EncodingIDsVec &EncodingIDs) const;
169   void emitInstrLenTable(formatted_raw_ostream &OS,
170                          std::vector<unsigned> &InstrLen) const;
171   void emitPredicateFunction(formatted_raw_ostream &OS,
172                              PredicateSet &Predicates,
173                              unsigned Indentation) const;
174   void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
175                            unsigned Indentation) const;
176 
177   // run - Output the code emitter
178   void run(raw_ostream &o);
179 
180 private:
181   CodeGenTarget Target;
182 
183 public:
184   std::string PredicateNamespace;
185 };
186 
187 } // end anonymous namespace
188 
189 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
190 // for a bit value.
191 //
192 // BIT_UNFILTERED is used as the init value for a filter position.  It is used
193 // only for filter processings.
194 typedef enum {
195   BIT_TRUE,      // '1'
196   BIT_FALSE,     // '0'
197   BIT_UNSET,     // '?'
198   BIT_UNFILTERED // unfiltered
199 } bit_value_t;
200 
ValueSet(bit_value_t V)201 static bool ValueSet(bit_value_t V) {
202   return (V == BIT_TRUE || V == BIT_FALSE);
203 }
204 
ValueNotSet(bit_value_t V)205 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); }
206 
Value(bit_value_t V)207 static int Value(bit_value_t V) {
208   return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
209 }
210 
bitFromBits(const BitsInit & bits,unsigned index)211 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
212   if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index)))
213     return bit->getValue() ? BIT_TRUE : BIT_FALSE;
214 
215   // The bit is uninitialized.
216   return BIT_UNSET;
217 }
218 
219 // Prints the bit value for each position.
dumpBits(raw_ostream & o,const BitsInit & bits)220 static void dumpBits(raw_ostream &o, const BitsInit &bits) {
221   for (unsigned index = bits.getNumBits(); index > 0; --index) {
222     switch (bitFromBits(bits, index - 1)) {
223     case BIT_TRUE:
224       o << "1";
225       break;
226     case BIT_FALSE:
227       o << "0";
228       break;
229     case BIT_UNSET:
230       o << "_";
231       break;
232     default:
233       llvm_unreachable("unexpected return value from bitFromBits");
234     }
235   }
236 }
237 
getBitsField(const Record & def,StringRef str)238 static BitsInit &getBitsField(const Record &def, StringRef str) {
239   const RecordVal *RV = def.getValue(str);
240   if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
241     return *Bits;
242 
243   // variable length instruction
244   VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV);
245   SmallVector<Init *, 16> Bits;
246 
247   for (const auto &SI : VLI) {
248     if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) {
249       for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) {
250         Bits.push_back(BI->getBit(Idx));
251       }
252     } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) {
253       Bits.push_back(const_cast<BitInit *>(BI));
254     } else {
255       for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx)
256         Bits.push_back(UnsetInit::get(def.getRecords()));
257     }
258   }
259 
260   return *BitsInit::get(def.getRecords(), Bits);
261 }
262 
263 // Representation of the instruction to work on.
264 typedef std::vector<bit_value_t> insn_t;
265 
266 namespace {
267 
268 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL;
269 
270 class FilterChooser;
271 
272 /// Filter - Filter works with FilterChooser to produce the decoding tree for
273 /// the ISA.
274 ///
275 /// It is useful to think of a Filter as governing the switch stmts of the
276 /// decoding tree in a certain level.  Each case stmt delegates to an inferior
277 /// FilterChooser to decide what further decoding logic to employ, or in another
278 /// words, what other remaining bits to look at.  The FilterChooser eventually
279 /// chooses a best Filter to do its job.
280 ///
281 /// This recursive scheme ends when the number of Opcodes assigned to the
282 /// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
283 /// the Filter/FilterChooser combo does not know how to distinguish among the
284 /// Opcodes assigned.
285 ///
286 /// An example of a conflict is
287 ///
288 /// Conflict:
289 ///                     111101000.00........00010000....
290 ///                     111101000.00........0001........
291 ///                     1111010...00........0001........
292 ///                     1111010...00....................
293 ///                     1111010.........................
294 ///                     1111............................
295 ///                     ................................
296 ///     VST4q8a         111101000_00________00010000____
297 ///     VST4q8b         111101000_00________00010000____
298 ///
299 /// The Debug output shows the path that the decoding tree follows to reach the
300 /// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
301 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
302 ///
303 /// The encoding info in the .td files does not specify this meta information,
304 /// which could have been used by the decoder to resolve the conflict.  The
305 /// decoder could try to decode the even/odd register numbering and assign to
306 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
307 /// version and return the Opcode since the two have the same Asm format string.
308 class Filter {
309 protected:
310   const FilterChooser
311       *Owner;        // points to the FilterChooser who owns this filter
312   unsigned StartBit; // the starting bit position
313   unsigned NumBits;  // number of bits to filter
314   bool Mixed;        // a mixed region contains both set and unset bits
315 
316   // Map of well-known segment value to the set of uid's with that value.
317   std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
318 
319   // Set of uid's with non-constant segment values.
320   std::vector<EncodingIDAndOpcode> VariableInstructions;
321 
322   // Map of well-known segment value to its delegate.
323   std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324 
325   // Number of instructions which fall under FilteredInstructions category.
326   unsigned NumFiltered;
327 
328   // Keeps track of the last opcode in the filtered bucket.
329   EncodingIDAndOpcode LastOpcFiltered;
330 
331 public:
332   Filter(Filter &&f);
333   Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
334 
335   ~Filter() = default;
336 
getNumFiltered() const337   unsigned getNumFiltered() const { return NumFiltered; }
338 
getSingletonOpc() const339   EncodingIDAndOpcode getSingletonOpc() const {
340     assert(NumFiltered == 1);
341     return LastOpcFiltered;
342   }
343 
344   // Return the filter chooser for the group of instructions without constant
345   // segment values.
getVariableFC() const346   const FilterChooser &getVariableFC() const {
347     assert(NumFiltered == 1);
348     assert(FilterChooserMap.size() == 1);
349     return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second);
350   }
351 
352   // Divides the decoding task into sub tasks and delegates them to the
353   // inferior FilterChooser's.
354   //
355   // A special case arises when there's only one entry in the filtered
356   // instructions.  In order to unambiguously decode the singleton, we need to
357   // match the remaining undecoded encoding bits against the singleton.
358   void recurse();
359 
360   // Emit table entries to decode instructions given a segment or segments of
361   // bits.
362   void emitTableEntry(DecoderTableInfo &TableInfo) const;
363 
364   // Returns the number of fanout produced by the filter.  More fanout implies
365   // the filter distinguishes more categories of instructions.
366   unsigned usefulness() const;
367 }; // end class Filter
368 
369 } // end anonymous namespace
370 
371 // These are states of our finite state machines used in FilterChooser's
372 // filterProcessor() which produces the filter candidates to use.
373 typedef enum {
374   ATTR_NONE,
375   ATTR_FILTERED,
376   ATTR_ALL_SET,
377   ATTR_ALL_UNSET,
378   ATTR_MIXED
379 } bitAttr_t;
380 
381 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters
382 /// in order to perform the decoding of instructions at the current level.
383 ///
384 /// Decoding proceeds from the top down.  Based on the well-known encoding bits
385 /// of instructions available, FilterChooser builds up the possible Filters that
386 /// can further the task of decoding by distinguishing among the remaining
387 /// candidate instructions.
388 ///
389 /// Once a filter has been chosen, it is called upon to divide the decoding task
390 /// into sub-tasks and delegates them to its inferior FilterChoosers for further
391 /// processings.
392 ///
393 /// It is useful to think of a Filter as governing the switch stmts of the
394 /// decoding tree.  And each case is delegated to an inferior FilterChooser to
395 /// decide what further remaining bits to look at.
396 namespace {
397 
398 class FilterChooser {
399 protected:
400   friend class Filter;
401 
402   // Vector of codegen instructions to choose our filter.
403   ArrayRef<EncodingAndInst> AllInstructions;
404 
405   // Vector of uid's for this filter chooser to work on.
406   // The first member of the pair is the opcode id being decoded, the second is
407   // the opcode id that should be emitted.
408   const std::vector<EncodingIDAndOpcode> &Opcodes;
409 
410   // Lookup table for the operand decoding of instructions.
411   const std::map<unsigned, std::vector<OperandInfo>> &Operands;
412 
413   // Vector of candidate filters.
414   std::vector<Filter> Filters;
415 
416   // Array of bit values passed down from our parent.
417   // Set to all BIT_UNFILTERED's for Parent == NULL.
418   std::vector<bit_value_t> FilterBitValues;
419 
420   // Links to the FilterChooser above us in the decoding tree.
421   const FilterChooser *Parent;
422 
423   // Index of the best filter from Filters.
424   int BestIndex;
425 
426   // Width of instructions
427   unsigned BitWidth;
428 
429   // Parent emitter
430   const DecoderEmitter *Emitter;
431 
432 public:
FilterChooser(ArrayRef<EncodingAndInst> Insts,const std::vector<EncodingIDAndOpcode> & IDs,const std::map<unsigned,std::vector<OperandInfo>> & Ops,unsigned BW,const DecoderEmitter * E)433   FilterChooser(ArrayRef<EncodingAndInst> Insts,
434                 const std::vector<EncodingIDAndOpcode> &IDs,
435                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
436                 unsigned BW, const DecoderEmitter *E)
437       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
438         FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
439         BitWidth(BW), Emitter(E) {
440     doFilter();
441   }
442 
FilterChooser(ArrayRef<EncodingAndInst> Insts,const std::vector<EncodingIDAndOpcode> & IDs,const std::map<unsigned,std::vector<OperandInfo>> & Ops,const std::vector<bit_value_t> & ParentFilterBitValues,const FilterChooser & parent)443   FilterChooser(ArrayRef<EncodingAndInst> Insts,
444                 const std::vector<EncodingIDAndOpcode> &IDs,
445                 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
446                 const std::vector<bit_value_t> &ParentFilterBitValues,
447                 const FilterChooser &parent)
448       : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
449         FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
450         BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
451     doFilter();
452   }
453 
454   FilterChooser(const FilterChooser &) = delete;
455   void operator=(const FilterChooser &) = delete;
456 
getBitWidth() const457   unsigned getBitWidth() const { return BitWidth; }
458 
459 protected:
460   // Populates the insn given the uid.
insnWithID(insn_t & Insn,unsigned Opcode) const461   void insnWithID(insn_t &Insn, unsigned Opcode) const {
462     const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
463     BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
464     Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET);
465     // We may have a SoftFail bitmask, which specifies a mask where an encoding
466     // may differ from the value in "Inst" and yet still be valid, but the
467     // disassembler should return SoftFail instead of Success.
468     //
469     // This is used for marking UNPREDICTABLE instructions in the ARM world.
470     const RecordVal *RV = EncodingDef->getValue("SoftFail");
471     const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
472     for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
473       if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
474         Insn[i] = BIT_UNSET;
475       else
476         Insn[i] = bitFromBits(Bits, i);
477     }
478   }
479 
480   // Emit the name of the encoding/instruction pair.
emitNameWithID(raw_ostream & OS,unsigned Opcode) const481   void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
482     const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
483     const Record *InstDef = AllInstructions[Opcode].Inst->TheDef;
484     if (EncodingDef != InstDef)
485       OS << EncodingDef->getName() << ":";
486     OS << InstDef->getName();
487   }
488 
489   // Populates the field of the insn given the start position and the number of
490   // consecutive bits to scan for.
491   //
492   // Returns a pair of values (indicator, field), where the indicator is false
493   // if there exists any uninitialized bit value in the range and true if all
494   // bits are well-known. The second value is the potentially populated field.
495   std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
496                                           unsigned NumBits) const;
497 
498   /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
499   /// filter array as a series of chars.
500   void dumpFilterArray(raw_ostream &o,
501                        const std::vector<bit_value_t> &filter) const;
502 
503   /// dumpStack - dumpStack traverses the filter chooser chain and calls
504   /// dumpFilterArray on each filter chooser up to the top level one.
505   void dumpStack(raw_ostream &o, const char *prefix) const;
506 
bestFilter()507   Filter &bestFilter() {
508     assert(BestIndex != -1 && "BestIndex not set");
509     return Filters[BestIndex];
510   }
511 
PositionFiltered(unsigned i) const512   bool PositionFiltered(unsigned i) const {
513     return ValueSet(FilterBitValues[i]);
514   }
515 
516   // Calculates the island(s) needed to decode the instruction.
517   // This returns a lit of undecoded bits of an instructions, for example,
518   // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
519   // decoded bits in order to verify that the instruction matches the Opcode.
520   unsigned getIslands(std::vector<unsigned> &StartBits,
521                       std::vector<unsigned> &EndBits,
522                       std::vector<uint64_t> &FieldVals,
523                       const insn_t &Insn) const;
524 
525   // Emits code to check the Predicates member of an instruction are true.
526   // Returns true if predicate matches were emitted, false otherwise.
527   bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
528                           unsigned Opc) const;
529   bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
530                              raw_ostream &OS) const;
531 
532   bool doesOpcodeNeedPredicate(unsigned Opc) const;
533   unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
534   void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
535 
536   void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
537 
538   // Emits table entries to decode the singleton.
539   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
540                                EncodingIDAndOpcode Opc) const;
541 
542   // Emits code to decode the singleton, and then to decode the rest.
543   void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
544                                const Filter &Best) const;
545 
546   void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
547                         const OperandInfo &OpInfo,
548                         bool &OpHasCompleteDecoder) const;
549 
550   void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc,
551                    bool &HasCompleteDecoder) const;
552   unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
553                            bool &HasCompleteDecoder) const;
554 
555   // Assign a single filter and run with it.
556   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
557 
558   // reportRegion is a helper function for filterProcessor to mark a region as
559   // eligible for use as a filter region.
560   void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
561                     bool AllowMixed);
562 
563   // FilterProcessor scans the well-known encoding bits of the instructions and
564   // builds up a list of candidate filters.  It chooses the best filter and
565   // recursively descends down the decoding tree.
566   bool filterProcessor(bool AllowMixed, bool Greedy = true);
567 
568   // Decides on the best configuration of filter(s) to use in order to decode
569   // the instructions.  A conflict of instructions may occur, in which case we
570   // dump the conflict set to the standard error.
571   void doFilter();
572 
573 public:
574   // emitTableEntries - Emit state machine entries to decode our share of
575   // instructions.
576   void emitTableEntries(DecoderTableInfo &TableInfo) const;
577 };
578 
579 } // end anonymous namespace
580 
581 ///////////////////////////
582 //                       //
583 // Filter Implementation //
584 //                       //
585 ///////////////////////////
586 
Filter(Filter && f)587 Filter::Filter(Filter &&f)
588     : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
589       FilteredInstructions(std::move(f.FilteredInstructions)),
590       VariableInstructions(std::move(f.VariableInstructions)),
591       FilterChooserMap(std::move(f.FilterChooserMap)),
592       NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {}
593 
Filter(FilterChooser & owner,unsigned startBit,unsigned numBits,bool mixed)594 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
595                bool mixed)
596     : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
597   assert(StartBit + NumBits - 1 < Owner->BitWidth);
598 
599   NumFiltered = 0;
600   LastOpcFiltered = {0, 0};
601 
602   for (const auto &OpcPair : Owner->Opcodes) {
603     insn_t Insn;
604 
605     // Populates the insn given the uid.
606     Owner->insnWithID(Insn, OpcPair.EncodingID);
607 
608     // Scans the segment for possibly well-specified encoding bits.
609     auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits);
610 
611     if (Ok) {
612       // The encoding bits are well-known.  Lets add the uid of the
613       // instruction into the bucket keyed off the constant field value.
614       LastOpcFiltered = OpcPair;
615       FilteredInstructions[Field].push_back(LastOpcFiltered);
616       ++NumFiltered;
617     } else {
618       // Some of the encoding bit(s) are unspecified.  This contributes to
619       // one additional member of "Variable" instructions.
620       VariableInstructions.push_back(OpcPair);
621     }
622   }
623 
624   assert((FilteredInstructions.size() + VariableInstructions.size() > 0) &&
625          "Filter returns no instruction categories");
626 }
627 
628 // Divides the decoding task into sub tasks and delegates them to the
629 // inferior FilterChooser's.
630 //
631 // A special case arises when there's only one entry in the filtered
632 // instructions.  In order to unambiguously decode the singleton, we need to
633 // match the remaining undecoded encoding bits against the singleton.
recurse()634 void Filter::recurse() {
635   // Starts by inheriting our parent filter chooser's filter bit values.
636   std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
637 
638   if (!VariableInstructions.empty()) {
639     // Conservatively marks each segment position as BIT_UNSET.
640     for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
641       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
642 
643     // Delegates to an inferior filter chooser for further processing on this
644     // group of instructions whose segment values are variable.
645     FilterChooserMap.insert(std::pair(
646         NO_FIXED_SEGMENTS_SENTINEL,
647         std::make_unique<FilterChooser>(Owner->AllInstructions,
648                                         VariableInstructions, Owner->Operands,
649                                         BitValueArray, *Owner)));
650   }
651 
652   // No need to recurse for a singleton filtered instruction.
653   // See also Filter::emit*().
654   if (getNumFiltered() == 1) {
655     assert(FilterChooserMap.size() == 1);
656     return;
657   }
658 
659   // Otherwise, create sub choosers.
660   for (const auto &Inst : FilteredInstructions) {
661 
662     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
663     for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) {
664       if (Inst.first & (1ULL << bitIndex))
665         BitValueArray[StartBit + bitIndex] = BIT_TRUE;
666       else
667         BitValueArray[StartBit + bitIndex] = BIT_FALSE;
668     }
669 
670     // Delegates to an inferior filter chooser for further processing on this
671     // category of instructions.
672     FilterChooserMap.insert(
673         std::pair(Inst.first, std::make_unique<FilterChooser>(
674                                   Owner->AllInstructions, Inst.second,
675                                   Owner->Operands, BitValueArray, *Owner)));
676   }
677 }
678 
resolveTableFixups(DecoderTable & Table,const FixupList & Fixups,uint32_t DestIdx)679 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
680                                uint32_t DestIdx) {
681   // Any NumToSkip fixups in the current scope can resolve to the
682   // current location.
683   for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend();
684        I != E; ++I) {
685     // Calculate the distance from the byte following the fixup entry byte
686     // to the destination. The Target is calculated from after the 16-bit
687     // NumToSkip entry itself, so subtract two  from the displacement here
688     // to account for that.
689     uint32_t FixupIdx = *I;
690     uint32_t Delta = DestIdx - FixupIdx - 3;
691     // Our NumToSkip entries are 24-bits. Make sure our table isn't too
692     // big.
693     assert(Delta < (1u << 24));
694     Table[FixupIdx] = (uint8_t)Delta;
695     Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
696     Table[FixupIdx + 2] = (uint8_t)(Delta >> 16);
697   }
698 }
699 
700 // Emit table entries to decode instructions given a segment or segments
701 // of bits.
emitTableEntry(DecoderTableInfo & TableInfo) const702 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
703   assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
704   TableInfo.Table.push_back(MCD::OPC_ExtractField);
705 
706   SmallString<16> SBytes;
707   raw_svector_ostream S(SBytes);
708   encodeULEB128(StartBit, S);
709   TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end());
710   TableInfo.Table.push_back(NumBits);
711 
712   // A new filter entry begins a new scope for fixup resolution.
713   TableInfo.FixupStack.emplace_back();
714 
715   DecoderTable &Table = TableInfo.Table;
716 
717   size_t PrevFilter = 0;
718   bool HasFallthrough = false;
719   for (const auto &Filter : FilterChooserMap) {
720     // Field value -1 implies a non-empty set of variable instructions.
721     // See also recurse().
722     if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
723       HasFallthrough = true;
724 
725       // Each scope should always have at least one filter value to check
726       // for.
727       assert(PrevFilter != 0 && "empty filter set!");
728       FixupList &CurScope = TableInfo.FixupStack.back();
729       // Resolve any NumToSkip fixups in the current scope.
730       resolveTableFixups(Table, CurScope, Table.size());
731       CurScope.clear();
732       PrevFilter = 0; // Don't re-process the filter's fallthrough.
733     } else {
734       Table.push_back(MCD::OPC_FilterValue);
735       // Encode and emit the value to filter against.
736       uint8_t Buffer[16];
737       unsigned Len = encodeULEB128(Filter.first, Buffer);
738       Table.insert(Table.end(), Buffer, Buffer + Len);
739       // Reserve space for the NumToSkip entry. We'll backpatch the value
740       // later.
741       PrevFilter = Table.size();
742       Table.push_back(0);
743       Table.push_back(0);
744       Table.push_back(0);
745     }
746 
747     // We arrive at a category of instructions with the same segment value.
748     // Now delegate to the sub filter chooser for further decodings.
749     // The case may fallthrough, which happens if the remaining well-known
750     // encoding bits do not match exactly.
751     Filter.second->emitTableEntries(TableInfo);
752 
753     // Now that we've emitted the body of the handler, update the NumToSkip
754     // of the filter itself to be able to skip forward when false. Subtract
755     // two as to account for the width of the NumToSkip field itself.
756     if (PrevFilter) {
757       uint32_t NumToSkip = Table.size() - PrevFilter - 3;
758       assert(NumToSkip < (1u << 24) &&
759              "disassembler decoding table too large!");
760       Table[PrevFilter] = (uint8_t)NumToSkip;
761       Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
762       Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16);
763     }
764   }
765 
766   // Any remaining unresolved fixups bubble up to the parent fixup scope.
767   assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
768   FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
769   FixupScopeList::iterator Dest = Source - 1;
770   llvm::append_range(*Dest, *Source);
771   TableInfo.FixupStack.pop_back();
772 
773   // If there is no fallthrough, then the final filter should get fixed
774   // up according to the enclosing scope rather than the current position.
775   if (!HasFallthrough)
776     TableInfo.FixupStack.back().push_back(PrevFilter);
777 }
778 
779 // Returns the number of fanout produced by the filter.  More fanout implies
780 // the filter distinguishes more categories of instructions.
usefulness() const781 unsigned Filter::usefulness() const {
782   if (!VariableInstructions.empty())
783     return FilteredInstructions.size();
784   else
785     return FilteredInstructions.size() + 1;
786 }
787 
788 //////////////////////////////////
789 //                              //
790 // Filterchooser Implementation //
791 //                              //
792 //////////////////////////////////
793 
794 // Emit the decoder state machine table.
emitTable(formatted_raw_ostream & OS,DecoderTable & Table,unsigned Indentation,unsigned BitWidth,StringRef Namespace,const EncodingIDsVec & EncodingIDs) const795 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
796                                unsigned Indentation, unsigned BitWidth,
797                                StringRef Namespace,
798                                const EncodingIDsVec &EncodingIDs) const {
799   // We'll need to be able to map from a decoded opcode into the corresponding
800   // EncodingID for this specific combination of BitWidth and Namespace. This
801   // is used below to index into NumberedEncodings.
802   DenseMap<unsigned, unsigned> OpcodeToEncodingID;
803   OpcodeToEncodingID.reserve(EncodingIDs.size());
804   for (const auto &EI : EncodingIDs)
805     OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
806 
807   OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
808                          << BitWidth << "[] = {\n";
809 
810   Indentation += 2;
811 
812   // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
813   auto emitULEB128 = [](DecoderTable::const_iterator I,
814                         formatted_raw_ostream &OS) {
815     unsigned Len = 0;
816     while (*I >= 128) {
817       OS << (unsigned)*I++ << ", ";
818       Len++;
819     }
820     OS << (unsigned)*I++ << ", ";
821     return Len + 1;
822   };
823 
824   // Emit 24-bit numtoskip value to OS, returning the NumToSkip value.
825   auto emitNumToSkip = [](DecoderTable::const_iterator I,
826                           formatted_raw_ostream &OS) {
827     uint8_t Byte = *I++;
828     uint32_t NumToSkip = Byte;
829     OS << (unsigned)Byte << ", ";
830     Byte = *I++;
831     OS << (unsigned)Byte << ", ";
832     NumToSkip |= Byte << 8;
833     Byte = *I++;
834     OS << utostr(Byte) << ", ";
835     NumToSkip |= Byte << 16;
836     return NumToSkip;
837   };
838 
839   // FIXME: We may be able to use the NumToSkip values to recover
840   // appropriate indentation levels.
841   DecoderTable::const_iterator I = Table.begin();
842   DecoderTable::const_iterator E = Table.end();
843   while (I != E) {
844     assert(I < E && "incomplete decode table entry!");
845 
846     uint64_t Pos = I - Table.begin();
847     OS << "/* " << Pos << " */";
848     OS.PadToColumn(12);
849 
850     switch (*I) {
851     default:
852       PrintFatalError("invalid decode table opcode");
853     case MCD::OPC_ExtractField: {
854       ++I;
855       OS.indent(Indentation) << "MCD::OPC_ExtractField, ";
856 
857       // ULEB128 encoded start value.
858       const char *ErrMsg = nullptr;
859       unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr,
860                                      Table.data() + Table.size(), &ErrMsg);
861       assert(ErrMsg == nullptr && "ULEB128 value too large!");
862       I += emitULEB128(I, OS);
863 
864       unsigned Len = *I++;
865       OS << Len << ",  // Inst{";
866       if (Len > 1)
867         OS << (Start + Len - 1) << "-";
868       OS << Start << "} ...\n";
869       break;
870     }
871     case MCD::OPC_FilterValue: {
872       ++I;
873       OS.indent(Indentation) << "MCD::OPC_FilterValue, ";
874       // The filter value is ULEB128 encoded.
875       I += emitULEB128(I, OS);
876 
877       // 24-bit numtoskip value.
878       uint32_t NumToSkip = emitNumToSkip(I, OS);
879       I += 3;
880       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
881       break;
882     }
883     case MCD::OPC_CheckField: {
884       ++I;
885       OS.indent(Indentation) << "MCD::OPC_CheckField, ";
886       // ULEB128 encoded start value.
887       I += emitULEB128(I, OS);
888       // 8-bit length.
889       unsigned Len = *I++;
890       OS << Len << ", ";
891       // ULEB128 encoded field value.
892       I += emitULEB128(I, OS);
893 
894       // 24-bit numtoskip value.
895       uint32_t NumToSkip = emitNumToSkip(I, OS);
896       I += 3;
897       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
898       break;
899     }
900     case MCD::OPC_CheckPredicate: {
901       ++I;
902       OS.indent(Indentation) << "MCD::OPC_CheckPredicate, ";
903       I += emitULEB128(I, OS);
904 
905       // 24-bit numtoskip value.
906       uint32_t NumToSkip = emitNumToSkip(I, OS);
907       I += 3;
908       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
909       break;
910     }
911     case MCD::OPC_Decode:
912     case MCD::OPC_TryDecode: {
913       bool IsTry = *I == MCD::OPC_TryDecode;
914       ++I;
915       // Decode the Opcode value.
916       const char *ErrMsg = nullptr;
917       unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr,
918                                    Table.data() + Table.size(), &ErrMsg);
919       assert(ErrMsg == nullptr && "ULEB128 value too large!");
920 
921       OS.indent(Indentation)
922           << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, ";
923       I += emitULEB128(I, OS);
924 
925       // Decoder index.
926       I += emitULEB128(I, OS);
927 
928       auto EncI = OpcodeToEncodingID.find(Opc);
929       assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
930       auto EncodingID = EncI->second;
931 
932       if (!IsTry) {
933         OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n";
934         break;
935       }
936 
937       // Fallthrough for OPC_TryDecode.
938 
939       // 24-bit numtoskip value.
940       uint32_t NumToSkip = emitNumToSkip(I, OS);
941       I += 3;
942 
943       OS << "// Opcode: " << NumberedEncodings[EncodingID]
944          << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
945       break;
946     }
947     case MCD::OPC_SoftFail: {
948       ++I;
949       OS.indent(Indentation) << "MCD::OPC_SoftFail";
950       // Positive mask
951       uint64_t Value = 0;
952       unsigned Shift = 0;
953       do {
954         OS << ", " << (unsigned)*I;
955         Value += ((uint64_t)(*I & 0x7f)) << Shift;
956         Shift += 7;
957       } while (*I++ >= 128);
958       if (Value > 127) {
959         OS << " /* 0x";
960         OS.write_hex(Value);
961         OS << " */";
962       }
963       // Negative mask
964       Value = 0;
965       Shift = 0;
966       do {
967         OS << ", " << (unsigned)*I;
968         Value += ((uint64_t)(*I & 0x7f)) << Shift;
969         Shift += 7;
970       } while (*I++ >= 128);
971       if (Value > 127) {
972         OS << " /* 0x";
973         OS.write_hex(Value);
974         OS << " */";
975       }
976       OS << ",\n";
977       break;
978     }
979     case MCD::OPC_Fail: {
980       ++I;
981       OS.indent(Indentation) << "MCD::OPC_Fail,\n";
982       break;
983     }
984     }
985   }
986   OS.indent(Indentation) << "0\n";
987 
988   Indentation -= 2;
989 
990   OS.indent(Indentation) << "};\n\n";
991 }
992 
emitInstrLenTable(formatted_raw_ostream & OS,std::vector<unsigned> & InstrLen) const993 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
994                                        std::vector<unsigned> &InstrLen) const {
995   OS << "static const uint8_t InstrLenTable[] = {\n";
996   for (unsigned &Len : InstrLen) {
997     OS << Len << ",\n";
998   }
999   OS << "};\n\n";
1000 }
1001 
emitPredicateFunction(formatted_raw_ostream & OS,PredicateSet & Predicates,unsigned Indentation) const1002 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1003                                            PredicateSet &Predicates,
1004                                            unsigned Indentation) const {
1005   // The predicate function is just a big switch statement based on the
1006   // input predicate index.
1007   OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
1008                          << "const FeatureBitset &Bits) {\n";
1009   Indentation += 2;
1010   if (!Predicates.empty()) {
1011     OS.indent(Indentation) << "switch (Idx) {\n";
1012     OS.indent(Indentation)
1013         << "default: llvm_unreachable(\"Invalid index!\");\n";
1014     unsigned Index = 0;
1015     for (const auto &Predicate : Predicates) {
1016       OS.indent(Indentation) << "case " << Index++ << ":\n";
1017       OS.indent(Indentation + 2) << "return (" << Predicate << ");\n";
1018     }
1019     OS.indent(Indentation) << "}\n";
1020   } else {
1021     // No case statement to emit
1022     OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n";
1023   }
1024   Indentation -= 2;
1025   OS.indent(Indentation) << "}\n\n";
1026 }
1027 
emitDecoderFunction(formatted_raw_ostream & OS,DecoderSet & Decoders,unsigned Indentation) const1028 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1029                                          DecoderSet &Decoders,
1030                                          unsigned Indentation) const {
1031   // The decoder function is just a big switch statement based on the
1032   // input decoder index.
1033   OS.indent(Indentation) << "template <typename InsnType>\n";
1034   OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
1035                          << " unsigned Idx, InsnType insn, MCInst &MI,\n";
1036   OS.indent(Indentation)
1037       << "                                   uint64_t "
1038       << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
1039   Indentation += 2;
1040   OS.indent(Indentation) << "DecodeComplete = true;\n";
1041   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1042   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
1043   // possible but fall back to an InsnType-sized tmp for truly large fields.
1044   OS.indent(Indentation) << "using TmpType = "
1045                             "std::conditional_t<std::is_integral<InsnType>::"
1046                             "value, InsnType, uint64_t>;\n";
1047   OS.indent(Indentation) << "TmpType tmp;\n";
1048   OS.indent(Indentation) << "switch (Idx) {\n";
1049   OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
1050   unsigned Index = 0;
1051   for (const auto &Decoder : Decoders) {
1052     OS.indent(Indentation) << "case " << Index++ << ":\n";
1053     OS << Decoder;
1054     OS.indent(Indentation + 2) << "return S;\n";
1055   }
1056   OS.indent(Indentation) << "}\n";
1057   Indentation -= 2;
1058   OS.indent(Indentation) << "}\n";
1059 }
1060 
1061 // Populates the field of the insn given the start position and the number of
1062 // consecutive bits to scan for.
1063 //
1064 // Returns a pair of values (indicator, field), where the indicator is false
1065 // if there exists any uninitialized bit value in the range and true if all
1066 // bits are well-known. The second value is the potentially populated field.
fieldFromInsn(const insn_t & Insn,unsigned StartBit,unsigned NumBits) const1067 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1068                                                        unsigned StartBit,
1069                                                        unsigned NumBits) const {
1070   uint64_t Field = 0;
1071 
1072   for (unsigned i = 0; i < NumBits; ++i) {
1073     if (Insn[StartBit + i] == BIT_UNSET)
1074       return {false, Field};
1075 
1076     if (Insn[StartBit + i] == BIT_TRUE)
1077       Field = Field | (1ULL << i);
1078   }
1079 
1080   return {true, Field};
1081 }
1082 
1083 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1084 /// filter array as a series of chars.
dumpFilterArray(raw_ostream & o,const std::vector<bit_value_t> & filter) const1085 void FilterChooser::dumpFilterArray(
1086     raw_ostream &o, const std::vector<bit_value_t> &filter) const {
1087   for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
1088     switch (filter[bitIndex - 1]) {
1089     case BIT_UNFILTERED:
1090       o << ".";
1091       break;
1092     case BIT_UNSET:
1093       o << "_";
1094       break;
1095     case BIT_TRUE:
1096       o << "1";
1097       break;
1098     case BIT_FALSE:
1099       o << "0";
1100       break;
1101     }
1102   }
1103 }
1104 
1105 /// dumpStack - dumpStack traverses the filter chooser chain and calls
1106 /// dumpFilterArray on each filter chooser up to the top level one.
dumpStack(raw_ostream & o,const char * prefix) const1107 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const {
1108   const FilterChooser *current = this;
1109 
1110   while (current) {
1111     o << prefix;
1112     dumpFilterArray(o, current->FilterBitValues);
1113     o << '\n';
1114     current = current->Parent;
1115   }
1116 }
1117 
1118 // Calculates the island(s) needed to decode the instruction.
1119 // This returns a list of undecoded bits of an instructions, for example,
1120 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1121 // decoded bits in order to verify that the instruction matches the Opcode.
getIslands(std::vector<unsigned> & StartBits,std::vector<unsigned> & EndBits,std::vector<uint64_t> & FieldVals,const insn_t & Insn) const1122 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
1123                                    std::vector<unsigned> &EndBits,
1124                                    std::vector<uint64_t> &FieldVals,
1125                                    const insn_t &Insn) const {
1126   unsigned Num, BitNo;
1127   Num = BitNo = 0;
1128 
1129   uint64_t FieldVal = 0;
1130 
1131   // 0: Init
1132   // 1: Water (the bit value does not affect decoding)
1133   // 2: Island (well-known bit value needed for decoding)
1134   int State = 0;
1135 
1136   for (unsigned i = 0; i < BitWidth; ++i) {
1137     int64_t Val = Value(Insn[i]);
1138     bool Filtered = PositionFiltered(i);
1139     switch (State) {
1140     default:
1141       llvm_unreachable("Unreachable code!");
1142     case 0:
1143     case 1:
1144       if (Filtered || Val == -1)
1145         State = 1; // Still in Water
1146       else {
1147         State = 2; // Into the Island
1148         BitNo = 0;
1149         StartBits.push_back(i);
1150         FieldVal = Val;
1151       }
1152       break;
1153     case 2:
1154       if (Filtered || Val == -1) {
1155         State = 1; // Into the Water
1156         EndBits.push_back(i - 1);
1157         FieldVals.push_back(FieldVal);
1158         ++Num;
1159       } else {
1160         State = 2; // Still in Island
1161         ++BitNo;
1162         FieldVal = FieldVal | Val << BitNo;
1163       }
1164       break;
1165     }
1166   }
1167   // If we are still in Island after the loop, do some housekeeping.
1168   if (State == 2) {
1169     EndBits.push_back(BitWidth - 1);
1170     FieldVals.push_back(FieldVal);
1171     ++Num;
1172   }
1173 
1174   assert(StartBits.size() == Num && EndBits.size() == Num &&
1175          FieldVals.size() == Num);
1176   return Num;
1177 }
1178 
emitBinaryParser(raw_ostream & o,unsigned & Indentation,const OperandInfo & OpInfo,bool & OpHasCompleteDecoder) const1179 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
1180                                      const OperandInfo &OpInfo,
1181                                      bool &OpHasCompleteDecoder) const {
1182   const std::string &Decoder = OpInfo.Decoder;
1183 
1184   bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
1185 
1186   if (UseInsertBits) {
1187     o.indent(Indentation) << "tmp = 0x";
1188     o.write_hex(OpInfo.InitValue);
1189     o << ";\n";
1190   }
1191 
1192   for (const EncodingField &EF : OpInfo) {
1193     o.indent(Indentation);
1194     if (UseInsertBits)
1195       o << "insertBits(tmp, ";
1196     else
1197       o << "tmp = ";
1198     o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
1199     if (UseInsertBits)
1200       o << ", " << EF.Offset << ", " << EF.Width << ')';
1201     else if (EF.Offset != 0)
1202       o << " << " << EF.Offset;
1203     o << ";\n";
1204   }
1205 
1206   if (Decoder != "") {
1207     OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1208     o.indent(Indentation) << "if (!Check(S, " << Decoder
1209                           << "(MI, tmp, Address, Decoder))) { "
1210                           << (OpHasCompleteDecoder ? ""
1211                                                    : "DecodeComplete = false; ")
1212                           << "return MCDisassembler::Fail; }\n";
1213   } else {
1214     OpHasCompleteDecoder = true;
1215     o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n";
1216   }
1217 }
1218 
emitDecoder(raw_ostream & OS,unsigned Indentation,unsigned Opc,bool & HasCompleteDecoder) const1219 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
1220                                 unsigned Opc, bool &HasCompleteDecoder) const {
1221   HasCompleteDecoder = true;
1222 
1223   for (const auto &Op : Operands.find(Opc)->second) {
1224     // If a custom instruction decoder was specified, use that.
1225     if (Op.numFields() == 0 && !Op.Decoder.empty()) {
1226       HasCompleteDecoder = Op.HasCompleteDecoder;
1227       OS.indent(Indentation)
1228           << "if (!Check(S, " << Op.Decoder
1229           << "(MI, insn, Address, Decoder))) { "
1230           << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1231           << "return MCDisassembler::Fail; }\n";
1232       break;
1233     }
1234 
1235     bool OpHasCompleteDecoder;
1236     emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder);
1237     if (!OpHasCompleteDecoder)
1238       HasCompleteDecoder = false;
1239   }
1240 }
1241 
getDecoderIndex(DecoderSet & Decoders,unsigned Opc,bool & HasCompleteDecoder) const1242 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
1243                                         bool &HasCompleteDecoder) const {
1244   // Build up the predicate string.
1245   SmallString<256> Decoder;
1246   // FIXME: emitDecoder() function can take a buffer directly rather than
1247   // a stream.
1248   raw_svector_ostream S(Decoder);
1249   unsigned I = 4;
1250   emitDecoder(S, I, Opc, HasCompleteDecoder);
1251 
1252   // Using the full decoder string as the key value here is a bit
1253   // heavyweight, but is effective. If the string comparisons become a
1254   // performance concern, we can implement a mangling of the predicate
1255   // data easily enough with a map back to the actual string. That's
1256   // overkill for now, though.
1257 
1258   // Make sure the predicate is in the table.
1259   Decoders.insert(CachedHashString(Decoder));
1260   // Now figure out the index for when we write out the table.
1261   DecoderSet::const_iterator P = find(Decoders, Decoder.str());
1262   return (unsigned)(P - Decoders.begin());
1263 }
1264 
1265 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
emitPredicateMatchAux(const Init & Val,bool ParenIfBinOp,raw_ostream & OS) const1266 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1267                                           raw_ostream &OS) const {
1268   if (const auto *D = dyn_cast<DefInit>(&Val)) {
1269     if (!D->getDef()->isSubClassOf("SubtargetFeature"))
1270       return true;
1271     OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1272        << "]";
1273     return false;
1274   }
1275   if (const auto *D = dyn_cast<DagInit>(&Val)) {
1276     std::string Op = D->getOperator()->getAsString();
1277     if (Op == "not" && D->getNumArgs() == 1) {
1278       OS << '!';
1279       return emitPredicateMatchAux(*D->getArg(0), true, OS);
1280     }
1281     if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
1282       bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
1283       if (Paren)
1284         OS << '(';
1285       ListSeparator LS(Op == "any_of" ? " || " : " && ");
1286       for (auto *Arg : D->getArgs()) {
1287         OS << LS;
1288         if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
1289           return true;
1290       }
1291       if (Paren)
1292         OS << ')';
1293       return false;
1294     }
1295   }
1296   return true;
1297 }
1298 
emitPredicateMatch(raw_ostream & o,unsigned & Indentation,unsigned Opc) const1299 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
1300                                        unsigned Opc) const {
1301   ListInit *Predicates =
1302       AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1303   bool IsFirstEmission = true;
1304   for (unsigned i = 0; i < Predicates->size(); ++i) {
1305     Record *Pred = Predicates->getElementAsRecord(i);
1306     if (!Pred->getValue("AssemblerMatcherPredicate"))
1307       continue;
1308 
1309     if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1310       continue;
1311 
1312     if (!IsFirstEmission)
1313       o << " && ";
1314     if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"),
1315                               Predicates->size() > 1, o))
1316       PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
1317     IsFirstEmission = false;
1318   }
1319   return !Predicates->empty();
1320 }
1321 
doesOpcodeNeedPredicate(unsigned Opc) const1322 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1323   ListInit *Predicates =
1324       AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1325   for (unsigned i = 0; i < Predicates->size(); ++i) {
1326     Record *Pred = Predicates->getElementAsRecord(i);
1327     if (!Pred->getValue("AssemblerMatcherPredicate"))
1328       continue;
1329 
1330     if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1331       return true;
1332   }
1333   return false;
1334 }
1335 
getPredicateIndex(DecoderTableInfo & TableInfo,StringRef Predicate) const1336 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1337                                           StringRef Predicate) const {
1338   // Using the full predicate string as the key value here is a bit
1339   // heavyweight, but is effective. If the string comparisons become a
1340   // performance concern, we can implement a mangling of the predicate
1341   // data easily enough with a map back to the actual string. That's
1342   // overkill for now, though.
1343 
1344   // Make sure the predicate is in the table.
1345   TableInfo.Predicates.insert(CachedHashString(Predicate));
1346   // Now figure out the index for when we write out the table.
1347   PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
1348   return (unsigned)(P - TableInfo.Predicates.begin());
1349 }
1350 
emitPredicateTableEntry(DecoderTableInfo & TableInfo,unsigned Opc) const1351 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1352                                             unsigned Opc) const {
1353   if (!doesOpcodeNeedPredicate(Opc))
1354     return;
1355 
1356   // Build up the predicate string.
1357   SmallString<256> Predicate;
1358   // FIXME: emitPredicateMatch() functions can take a buffer directly rather
1359   // than a stream.
1360   raw_svector_ostream PS(Predicate);
1361   unsigned I = 0;
1362   emitPredicateMatch(PS, I, Opc);
1363 
1364   // Figure out the index into the predicate table for the predicate just
1365   // computed.
1366   unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
1367   SmallString<16> PBytes;
1368   raw_svector_ostream S(PBytes);
1369   encodeULEB128(PIdx, S);
1370 
1371   TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
1372   // Predicate index.
1373   for (const auto PB : PBytes)
1374     TableInfo.Table.push_back(PB);
1375   // Push location for NumToSkip backpatching.
1376   TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1377   TableInfo.Table.push_back(0);
1378   TableInfo.Table.push_back(0);
1379   TableInfo.Table.push_back(0);
1380 }
1381 
emitSoftFailTableEntry(DecoderTableInfo & TableInfo,unsigned Opc) const1382 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1383                                            unsigned Opc) const {
1384   const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
1385   const RecordVal *RV = EncodingDef->getValue("SoftFail");
1386   BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
1387 
1388   if (!SFBits)
1389     return;
1390   BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst");
1391 
1392   APInt PositiveMask(BitWidth, 0ULL);
1393   APInt NegativeMask(BitWidth, 0ULL);
1394   for (unsigned i = 0; i < BitWidth; ++i) {
1395     bit_value_t B = bitFromBits(*SFBits, i);
1396     bit_value_t IB = bitFromBits(*InstBits, i);
1397 
1398     if (B != BIT_TRUE)
1399       continue;
1400 
1401     switch (IB) {
1402     case BIT_FALSE:
1403       // The bit is meant to be false, so emit a check to see if it is true.
1404       PositiveMask.setBit(i);
1405       break;
1406     case BIT_TRUE:
1407       // The bit is meant to be true, so emit a check to see if it is false.
1408       NegativeMask.setBit(i);
1409       break;
1410     default:
1411       // The bit is not set; this must be an error!
1412       errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1413              << AllInstructions[Opc] << " is set but Inst{" << i
1414              << "} is unset!\n"
1415              << "  - You can only mark a bit as SoftFail if it is fully defined"
1416              << " (1/0 - not '?') in Inst\n";
1417       return;
1418     }
1419   }
1420 
1421   bool NeedPositiveMask = PositiveMask.getBoolValue();
1422   bool NeedNegativeMask = NegativeMask.getBoolValue();
1423 
1424   if (!NeedPositiveMask && !NeedNegativeMask)
1425     return;
1426 
1427   TableInfo.Table.push_back(MCD::OPC_SoftFail);
1428 
1429   SmallString<16> MaskBytes;
1430   raw_svector_ostream S(MaskBytes);
1431   if (NeedPositiveMask) {
1432     encodeULEB128(PositiveMask.getZExtValue(), S);
1433     for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
1434       TableInfo.Table.push_back(MaskBytes[i]);
1435   } else
1436     TableInfo.Table.push_back(0);
1437   if (NeedNegativeMask) {
1438     MaskBytes.clear();
1439     encodeULEB128(NegativeMask.getZExtValue(), S);
1440     for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
1441       TableInfo.Table.push_back(MaskBytes[i]);
1442   } else
1443     TableInfo.Table.push_back(0);
1444 }
1445 
1446 // Emits table entries to decode the singleton.
emitSingletonTableEntry(DecoderTableInfo & TableInfo,EncodingIDAndOpcode Opc) const1447 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1448                                             EncodingIDAndOpcode Opc) const {
1449   std::vector<unsigned> StartBits;
1450   std::vector<unsigned> EndBits;
1451   std::vector<uint64_t> FieldVals;
1452   insn_t Insn;
1453   insnWithID(Insn, Opc.EncodingID);
1454 
1455   // Look for islands of undecoded bits of the singleton.
1456   getIslands(StartBits, EndBits, FieldVals, Insn);
1457 
1458   unsigned Size = StartBits.size();
1459 
1460   // Emit the predicate table entry if one is needed.
1461   emitPredicateTableEntry(TableInfo, Opc.EncodingID);
1462 
1463   // Check any additional encoding fields needed.
1464   for (unsigned I = Size; I != 0; --I) {
1465     unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1;
1466     assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!");
1467     TableInfo.Table.push_back(MCD::OPC_CheckField);
1468     uint8_t Buffer[16], *P;
1469     encodeULEB128(StartBits[I - 1], Buffer);
1470     for (P = Buffer; *P >= 128; ++P)
1471       TableInfo.Table.push_back(*P);
1472     TableInfo.Table.push_back(*P);
1473     TableInfo.Table.push_back(NumBits);
1474     encodeULEB128(FieldVals[I - 1], Buffer);
1475     for (P = Buffer; *P >= 128; ++P)
1476       TableInfo.Table.push_back(*P);
1477     TableInfo.Table.push_back(*P);
1478     // Push location for NumToSkip backpatching.
1479     TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1480     // The fixup is always 24-bits, so go ahead and allocate the space
1481     // in the table so all our relative position calculations work OK even
1482     // before we fully resolve the real value here.
1483     TableInfo.Table.push_back(0);
1484     TableInfo.Table.push_back(0);
1485     TableInfo.Table.push_back(0);
1486   }
1487 
1488   // Check for soft failure of the match.
1489   emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
1490 
1491   bool HasCompleteDecoder;
1492   unsigned DIdx =
1493       getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder);
1494 
1495   // Produce OPC_Decode or OPC_TryDecode opcode based on the information
1496   // whether the instruction decoder is complete or not. If it is complete
1497   // then it handles all possible values of remaining variable/unfiltered bits
1498   // and for any value can determine if the bitpattern is a valid instruction
1499   // or not. This means OPC_Decode will be the final step in the decoding
1500   // process. If it is not complete, then the Fail return code from the
1501   // decoder method indicates that additional processing should be done to see
1502   // if there is any other instruction that also matches the bitpattern and
1503   // can decode it.
1504   TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode
1505                                                : MCD::OPC_TryDecode);
1506   NumEncodingsSupported++;
1507   uint8_t Buffer[16], *p;
1508   encodeULEB128(Opc.Opcode, Buffer);
1509   for (p = Buffer; *p >= 128; ++p)
1510     TableInfo.Table.push_back(*p);
1511   TableInfo.Table.push_back(*p);
1512 
1513   SmallString<16> Bytes;
1514   raw_svector_ostream S(Bytes);
1515   encodeULEB128(DIdx, S);
1516 
1517   // Decoder index.
1518   for (const auto B : Bytes)
1519     TableInfo.Table.push_back(B);
1520 
1521   if (!HasCompleteDecoder) {
1522     // Push location for NumToSkip backpatching.
1523     TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
1524     // Allocate the space for the fixup.
1525     TableInfo.Table.push_back(0);
1526     TableInfo.Table.push_back(0);
1527     TableInfo.Table.push_back(0);
1528   }
1529 }
1530 
1531 // Emits table entries to decode the singleton, and then to decode the rest.
emitSingletonTableEntry(DecoderTableInfo & TableInfo,const Filter & Best) const1532 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1533                                             const Filter &Best) const {
1534   EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1535 
1536   // complex singletons need predicate checks from the first singleton
1537   // to refer forward to the variable filterchooser that follows.
1538   TableInfo.FixupStack.emplace_back();
1539 
1540   emitSingletonTableEntry(TableInfo, Opc);
1541 
1542   resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
1543                      TableInfo.Table.size());
1544   TableInfo.FixupStack.pop_back();
1545 
1546   Best.getVariableFC().emitTableEntries(TableInfo);
1547 }
1548 
1549 // Assign a single filter and run with it.  Top level API client can initialize
1550 // with a single filter to start the filtering process.
runSingleFilter(unsigned startBit,unsigned numBit,bool mixed)1551 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1552                                     bool mixed) {
1553   Filters.clear();
1554   Filters.emplace_back(*this, startBit, numBit, true);
1555   BestIndex = 0; // Sole Filter instance to choose from.
1556   bestFilter().recurse();
1557 }
1558 
1559 // reportRegion is a helper function for filterProcessor to mark a region as
1560 // eligible for use as a filter region.
reportRegion(bitAttr_t RA,unsigned StartBit,unsigned BitIndex,bool AllowMixed)1561 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1562                                  unsigned BitIndex, bool AllowMixed) {
1563   if (RA == ATTR_MIXED && AllowMixed)
1564     Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true);
1565   else if (RA == ATTR_ALL_SET && !AllowMixed)
1566     Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false);
1567 }
1568 
1569 // FilterProcessor scans the well-known encoding bits of the instructions and
1570 // builds up a list of candidate filters.  It chooses the best filter and
1571 // recursively descends down the decoding tree.
filterProcessor(bool AllowMixed,bool Greedy)1572 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1573   Filters.clear();
1574   BestIndex = -1;
1575   unsigned numInstructions = Opcodes.size();
1576 
1577   assert(numInstructions && "Filter created with no instructions");
1578 
1579   // No further filtering is necessary.
1580   if (numInstructions == 1)
1581     return true;
1582 
1583   // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
1584   // instructions is 3.
1585   if (AllowMixed && !Greedy) {
1586     assert(numInstructions == 3);
1587 
1588     for (const auto &Opcode : Opcodes) {
1589       std::vector<unsigned> StartBits;
1590       std::vector<unsigned> EndBits;
1591       std::vector<uint64_t> FieldVals;
1592       insn_t Insn;
1593 
1594       insnWithID(Insn, Opcode.EncodingID);
1595 
1596       // Look for islands of undecoded bits of any instruction.
1597       if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
1598         // Found an instruction with island(s).  Now just assign a filter.
1599         runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true);
1600         return true;
1601       }
1602     }
1603   }
1604 
1605   unsigned BitIndex;
1606 
1607   // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1608   // The automaton consumes the corresponding bit from each
1609   // instruction.
1610   //
1611   //   Input symbols: 0, 1, and _ (unset).
1612   //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1613   //   Initial state: NONE.
1614   //
1615   // (NONE) ------- [01] -> (ALL_SET)
1616   // (NONE) ------- _ ----> (ALL_UNSET)
1617   // (ALL_SET) ---- [01] -> (ALL_SET)
1618   // (ALL_SET) ---- _ ----> (MIXED)
1619   // (ALL_UNSET) -- [01] -> (MIXED)
1620   // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1621   // (MIXED) ------ . ----> (MIXED)
1622   // (FILTERED)---- . ----> (FILTERED)
1623 
1624   std::vector<bitAttr_t> bitAttrs;
1625 
1626   // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1627   // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1628   for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1629     if (FilterBitValues[BitIndex] == BIT_TRUE ||
1630         FilterBitValues[BitIndex] == BIT_FALSE)
1631       bitAttrs.push_back(ATTR_FILTERED);
1632     else
1633       bitAttrs.push_back(ATTR_NONE);
1634 
1635   for (const auto &OpcPair : Opcodes) {
1636     insn_t insn;
1637 
1638     insnWithID(insn, OpcPair.EncodingID);
1639 
1640     for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1641       switch (bitAttrs[BitIndex]) {
1642       case ATTR_NONE:
1643         if (insn[BitIndex] == BIT_UNSET)
1644           bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1645         else
1646           bitAttrs[BitIndex] = ATTR_ALL_SET;
1647         break;
1648       case ATTR_ALL_SET:
1649         if (insn[BitIndex] == BIT_UNSET)
1650           bitAttrs[BitIndex] = ATTR_MIXED;
1651         break;
1652       case ATTR_ALL_UNSET:
1653         if (insn[BitIndex] != BIT_UNSET)
1654           bitAttrs[BitIndex] = ATTR_MIXED;
1655         break;
1656       case ATTR_MIXED:
1657       case ATTR_FILTERED:
1658         break;
1659       }
1660     }
1661   }
1662 
1663   // The regionAttr automaton consumes the bitAttrs automatons' state,
1664   // lowest-to-highest.
1665   //
1666   //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1667   //   States:        NONE, ALL_SET, MIXED
1668   //   Initial state: NONE
1669   //
1670   // (NONE) ----- F --> (NONE)
1671   // (NONE) ----- S --> (ALL_SET)     ; and set region start
1672   // (NONE) ----- U --> (NONE)
1673   // (NONE) ----- M --> (MIXED)       ; and set region start
1674   // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
1675   // (ALL_SET) -- S --> (ALL_SET)
1676   // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
1677   // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
1678   // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
1679   // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
1680   // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
1681   // (MIXED) ---- M --> (MIXED)
1682 
1683   bitAttr_t RA = ATTR_NONE;
1684   unsigned StartBit = 0;
1685 
1686   for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1687     bitAttr_t bitAttr = bitAttrs[BitIndex];
1688 
1689     assert(bitAttr != ATTR_NONE && "Bit without attributes");
1690 
1691     switch (RA) {
1692     case ATTR_NONE:
1693       switch (bitAttr) {
1694       case ATTR_FILTERED:
1695         break;
1696       case ATTR_ALL_SET:
1697         StartBit = BitIndex;
1698         RA = ATTR_ALL_SET;
1699         break;
1700       case ATTR_ALL_UNSET:
1701         break;
1702       case ATTR_MIXED:
1703         StartBit = BitIndex;
1704         RA = ATTR_MIXED;
1705         break;
1706       default:
1707         llvm_unreachable("Unexpected bitAttr!");
1708       }
1709       break;
1710     case ATTR_ALL_SET:
1711       switch (bitAttr) {
1712       case ATTR_FILTERED:
1713         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1714         RA = ATTR_NONE;
1715         break;
1716       case ATTR_ALL_SET:
1717         break;
1718       case ATTR_ALL_UNSET:
1719         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1720         RA = ATTR_NONE;
1721         break;
1722       case ATTR_MIXED:
1723         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724         StartBit = BitIndex;
1725         RA = ATTR_MIXED;
1726         break;
1727       default:
1728         llvm_unreachable("Unexpected bitAttr!");
1729       }
1730       break;
1731     case ATTR_MIXED:
1732       switch (bitAttr) {
1733       case ATTR_FILTERED:
1734         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1735         StartBit = BitIndex;
1736         RA = ATTR_NONE;
1737         break;
1738       case ATTR_ALL_SET:
1739         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1740         StartBit = BitIndex;
1741         RA = ATTR_ALL_SET;
1742         break;
1743       case ATTR_ALL_UNSET:
1744         reportRegion(RA, StartBit, BitIndex, AllowMixed);
1745         RA = ATTR_NONE;
1746         break;
1747       case ATTR_MIXED:
1748         break;
1749       default:
1750         llvm_unreachable("Unexpected bitAttr!");
1751       }
1752       break;
1753     case ATTR_ALL_UNSET:
1754       llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1755     case ATTR_FILTERED:
1756       llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1757     }
1758   }
1759 
1760   // At the end, if we're still in ALL_SET or MIXED states, report a region
1761   switch (RA) {
1762   case ATTR_NONE:
1763     break;
1764   case ATTR_FILTERED:
1765     break;
1766   case ATTR_ALL_SET:
1767     reportRegion(RA, StartBit, BitIndex, AllowMixed);
1768     break;
1769   case ATTR_ALL_UNSET:
1770     break;
1771   case ATTR_MIXED:
1772     reportRegion(RA, StartBit, BitIndex, AllowMixed);
1773     break;
1774   }
1775 
1776   // We have finished with the filter processings.  Now it's time to choose
1777   // the best performing filter.
1778   BestIndex = 0;
1779   bool AllUseless = true;
1780   unsigned BestScore = 0;
1781 
1782   for (const auto &[Idx, Filter] : enumerate(Filters)) {
1783     unsigned Usefulness = Filter.usefulness();
1784 
1785     if (Usefulness)
1786       AllUseless = false;
1787 
1788     if (Usefulness > BestScore) {
1789       BestIndex = Idx;
1790       BestScore = Usefulness;
1791     }
1792   }
1793 
1794   if (!AllUseless)
1795     bestFilter().recurse();
1796 
1797   return !AllUseless;
1798 } // end of FilterChooser::filterProcessor(bool)
1799 
1800 // Decides on the best configuration of filter(s) to use in order to decode
1801 // the instructions.  A conflict of instructions may occur, in which case we
1802 // dump the conflict set to the standard error.
doFilter()1803 void FilterChooser::doFilter() {
1804   unsigned Num = Opcodes.size();
1805   assert(Num && "FilterChooser created with no instructions");
1806 
1807   // Try regions of consecutive known bit values first.
1808   if (filterProcessor(false))
1809     return;
1810 
1811   // Then regions of mixed bits (both known and unitialized bit values allowed).
1812   if (filterProcessor(true))
1813     return;
1814 
1815   // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1816   // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1817   // well-known encoding pattern.  In such case, we backtrack and scan for the
1818   // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1819   if (Num == 3 && filterProcessor(true, false))
1820     return;
1821 
1822   // If we come to here, the instruction decoding has failed.
1823   // Set the BestIndex to -1 to indicate so.
1824   BestIndex = -1;
1825 }
1826 
1827 // emitTableEntries - Emit state machine entries to decode our share of
1828 // instructions.
emitTableEntries(DecoderTableInfo & TableInfo) const1829 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1830   if (Opcodes.size() == 1) {
1831     // There is only one instruction in the set, which is great!
1832     // Call emitSingletonDecoder() to see whether there are any remaining
1833     // encodings bits.
1834     emitSingletonTableEntry(TableInfo, Opcodes[0]);
1835     return;
1836   }
1837 
1838   // Choose the best filter to do the decodings!
1839   if (BestIndex != -1) {
1840     const Filter &Best = Filters[BestIndex];
1841     if (Best.getNumFiltered() == 1)
1842       emitSingletonTableEntry(TableInfo, Best);
1843     else
1844       Best.emitTableEntry(TableInfo);
1845     return;
1846   }
1847 
1848   // We don't know how to decode these instructions!  Dump the
1849   // conflict set and bail.
1850 
1851   // Print out useful conflict information for postmortem analysis.
1852   errs() << "Decoding Conflict:\n";
1853 
1854   dumpStack(errs(), "\t\t");
1855 
1856   for (auto Opcode : Opcodes) {
1857     errs() << '\t';
1858     emitNameWithID(errs(), Opcode.EncodingID);
1859     errs() << " ";
1860     dumpBits(
1861         errs(),
1862         getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst"));
1863     errs() << '\n';
1864   }
1865 }
1866 
findOperandDecoderMethod(Record * Record)1867 static std::string findOperandDecoderMethod(Record *Record) {
1868   std::string Decoder;
1869 
1870   RecordVal *DecoderString = Record->getValue("DecoderMethod");
1871   StringInit *String =
1872       DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
1873   if (String) {
1874     Decoder = std::string(String->getValue());
1875     if (!Decoder.empty())
1876       return Decoder;
1877   }
1878 
1879   if (Record->isSubClassOf("RegisterOperand"))
1880     // Allows use of a DecoderMethod in referenced RegisterClass if set.
1881     return findOperandDecoderMethod(Record->getValueAsDef("RegClass"));
1882 
1883   if (Record->isSubClassOf("RegisterClass")) {
1884     Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1885   } else if (Record->isSubClassOf("PointerLikeRegClass")) {
1886     Decoder = "DecodePointerLikeRegClass" +
1887               utostr(Record->getValueAsInt("RegClassKind"));
1888   }
1889 
1890   return Decoder;
1891 }
1892 
getOpInfo(Record * TypeRecord)1893 OperandInfo getOpInfo(Record *TypeRecord) {
1894   std::string Decoder = findOperandDecoderMethod(TypeRecord);
1895 
1896   RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder");
1897   BitInit *HasCompleteDecoderBit =
1898       HasCompleteDecoderVal
1899           ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
1900           : nullptr;
1901   bool HasCompleteDecoder =
1902       HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1903 
1904   return OperandInfo(Decoder, HasCompleteDecoder);
1905 }
1906 
parseVarLenInstOperand(const Record & Def,std::vector<OperandInfo> & Operands,const CodeGenInstruction & CGI)1907 void parseVarLenInstOperand(const Record &Def,
1908                             std::vector<OperandInfo> &Operands,
1909                             const CodeGenInstruction &CGI) {
1910 
1911   const RecordVal *RV = Def.getValue("Inst");
1912   VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
1913   SmallVector<int> TiedTo;
1914 
1915   for (const auto &[Idx, Op] : enumerate(CGI.Operands)) {
1916     if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
1917       for (auto *Arg : Op.MIOperandInfo->getArgs())
1918         Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
1919     else
1920       Operands.push_back(getOpInfo(Op.Rec));
1921 
1922     int TiedReg = Op.getTiedRegister();
1923     TiedTo.push_back(-1);
1924     if (TiedReg != -1) {
1925       TiedTo[Idx] = TiedReg;
1926       TiedTo[TiedReg] = Idx;
1927     }
1928   }
1929 
1930   unsigned CurrBitPos = 0;
1931   for (const auto &EncodingSegment : VLI) {
1932     unsigned Offset = 0;
1933     StringRef OpName;
1934 
1935     if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
1936       OpName = SI->getValue();
1937     } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
1938       OpName = cast<StringInit>(DI->getArg(0))->getValue();
1939       Offset = cast<IntInit>(DI->getArg(2))->getValue();
1940     }
1941 
1942     if (!OpName.empty()) {
1943       auto OpSubOpPair =
1944           const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1945               OpName);
1946       unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
1947       Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1948       if (!EncodingSegment.CustomDecoder.empty())
1949         Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1950 
1951       int TiedReg = TiedTo[OpSubOpPair.first];
1952       if (TiedReg != -1) {
1953         unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1954             std::pair(TiedReg, OpSubOpPair.second));
1955         Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1956       }
1957     }
1958 
1959     CurrBitPos += EncodingSegment.BitWidth;
1960   }
1961 }
1962 
debugDumpRecord(const Record & Rec)1963 static void debugDumpRecord(const Record &Rec) {
1964   // Dump the record, so we can see what's going on...
1965   std::string E;
1966   raw_string_ostream S(E);
1967   S << "Dumping record for previous error:\n";
1968   S << Rec;
1969   PrintNote(E);
1970 }
1971 
1972 /// For an operand field named OpName: populate OpInfo.InitValue with the
1973 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1974 /// insert from the decoded instruction.
addOneOperandFields(const Record & EncodingDef,const BitsInit & Bits,std::map<std::string,std::string> & TiedNames,StringRef OpName,OperandInfo & OpInfo)1975 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1976                                 std::map<std::string, std::string> &TiedNames,
1977                                 StringRef OpName, OperandInfo &OpInfo) {
1978   // Some bits of the operand may be required to be 1 depending on the
1979   // instruction's encoding. Collect those bits.
1980   if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName))
1981     if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
1982       for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
1983         if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
1984           if (OpBit->getValue())
1985             OpInfo.InitValue |= 1ULL << I;
1986 
1987   for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) {
1988     VarInit *Var;
1989     unsigned Offset = 0;
1990     for (; J != Bits.getNumBits(); ++J) {
1991       VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J));
1992       if (BJ) {
1993         Var = dyn_cast<VarInit>(BJ->getBitVar());
1994         if (I == J)
1995           Offset = BJ->getBitNum();
1996         else if (BJ->getBitNum() != Offset + J - I)
1997           break;
1998       } else {
1999         Var = dyn_cast<VarInit>(Bits.getBit(J));
2000       }
2001       if (!Var || (Var->getName() != OpName &&
2002                    Var->getName() != TiedNames[std::string(OpName)]))
2003         break;
2004     }
2005     if (I == J)
2006       ++J;
2007     else
2008       OpInfo.addField(I, J - I, Offset);
2009   }
2010 }
2011 
2012 static unsigned
populateInstruction(CodeGenTarget & Target,const Record & EncodingDef,const CodeGenInstruction & CGI,unsigned Opc,std::map<unsigned,std::vector<OperandInfo>> & Operands,bool IsVarLenInst)2013 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
2014                     const CodeGenInstruction &CGI, unsigned Opc,
2015                     std::map<unsigned, std::vector<OperandInfo>> &Operands,
2016                     bool IsVarLenInst) {
2017   const Record &Def = *CGI.TheDef;
2018   // If all the bit positions are not specified; do not decode this instruction.
2019   // We are bound to fail!  For proper disassembly, the well-known encoding bits
2020   // of the instruction must be fully specified.
2021 
2022   BitsInit &Bits = getBitsField(EncodingDef, "Inst");
2023   if (Bits.allInComplete())
2024     return 0;
2025 
2026   std::vector<OperandInfo> InsnOperands;
2027 
2028   // If the instruction has specified a custom decoding hook, use that instead
2029   // of trying to auto-generate the decoder.
2030   StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
2031   if (InstDecoder != "") {
2032     bool HasCompleteInstDecoder =
2033         EncodingDef.getValueAsBit("hasCompleteDecoder");
2034     InsnOperands.push_back(
2035         OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder));
2036     Operands[Opc] = InsnOperands;
2037     return Bits.getNumBits();
2038   }
2039 
2040   // Generate a description of the operand of the instruction that we know
2041   // how to decode automatically.
2042   // FIXME: We'll need to have a way to manually override this as needed.
2043 
2044   // Gather the outputs/inputs of the instruction, so we can find their
2045   // positions in the encoding.  This assumes for now that they appear in the
2046   // MCInst in the order that they're listed.
2047   std::vector<std::pair<Init *, StringRef>> InOutOperands;
2048   DagInit *Out = Def.getValueAsDag("OutOperandList");
2049   DagInit *In = Def.getValueAsDag("InOperandList");
2050   for (const auto &[Idx, Arg] : enumerate(Out->getArgs()))
2051     InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx)));
2052   for (const auto &[Idx, Arg] : enumerate(In->getArgs()))
2053     InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx)));
2054 
2055   // Search for tied operands, so that we can correctly instantiate
2056   // operands that are not explicitly represented in the encoding.
2057   std::map<std::string, std::string> TiedNames;
2058   for (const auto &[I, Op] : enumerate(CGI.Operands)) {
2059     for (const auto &[J, CI] : enumerate(Op.Constraints)) {
2060       if (CI.isTied()) {
2061         std::pair<unsigned, unsigned> SO =
2062             CGI.Operands.getSubOperandNumber(CI.getTiedOperand());
2063         std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
2064         if (TiedName.empty())
2065           TiedName = CGI.Operands[SO.first].Name;
2066         std::string MyName = Op.SubOpNames[J];
2067         if (MyName.empty())
2068           MyName = Op.Name;
2069 
2070         TiedNames[MyName] = TiedName;
2071         TiedNames[TiedName] = MyName;
2072       }
2073     }
2074   }
2075 
2076   if (IsVarLenInst) {
2077     parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
2078   } else {
2079     // For each operand, see if we can figure out where it is encoded.
2080     for (const auto &Op : InOutOperands) {
2081       Init *OpInit = Op.first;
2082       StringRef OpName = Op.second;
2083 
2084       // We're ready to find the instruction encoding locations for this
2085       // operand.
2086 
2087       // First, find the operand type ("OpInit"), and sub-op names
2088       // ("SubArgDag") if present.
2089       DagInit *SubArgDag = dyn_cast<DagInit>(OpInit);
2090       if (SubArgDag)
2091         OpInit = SubArgDag->getOperator();
2092       Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
2093       // Lookup the sub-operands from the operand type record (note that only
2094       // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2095       DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
2096                             ? OpTypeRec->getValueAsDag("MIOperandInfo")
2097                             : nullptr;
2098 
2099       // Lookup the decoder method and construct a new OperandInfo to hold our
2100       // result.
2101       OperandInfo OpInfo = getOpInfo(OpTypeRec);
2102 
2103       // If we have named sub-operands...
2104       if (SubArgDag) {
2105         // Then there should not be a custom decoder specified on the top-level
2106         // type.
2107         if (!OpInfo.Decoder.empty()) {
2108           PrintError(EncodingDef.getLoc(),
2109                      "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2110                          OpInit->getAsString() +
2111                          "\" with a custom DecoderMethod, but also named "
2112                          "sub-operands.");
2113           continue;
2114         }
2115 
2116         // Decode each of the sub-ops separately.
2117         assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2118         for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) {
2119           StringRef SubOpName = SubArgDag->getArgNameStr(I);
2120           OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef());
2121 
2122           addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName,
2123                               SubOpInfo);
2124           InsnOperands.push_back(SubOpInfo);
2125         }
2126         continue;
2127       }
2128 
2129       // Otherwise, if we have an operand with sub-operands, but they aren't
2130       // named...
2131       if (SubOps && OpInfo.Decoder.empty()) {
2132         // If it's a single sub-operand, and no custom decoder, use the decoder
2133         // from the one sub-operand.
2134         if (SubOps->getNumArgs() == 1)
2135           OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef());
2136 
2137         // If we have multiple sub-ops, there'd better have a custom
2138         // decoder. (Otherwise we don't know how to populate them properly...)
2139         if (SubOps->getNumArgs() > 1) {
2140           PrintError(EncodingDef.getLoc(),
2141                      "DecoderEmitter: operand \"" + OpName +
2142                          "\" uses MIOperandInfo with multiple ops, but doesn't "
2143                          "have a custom decoder!");
2144           debugDumpRecord(EncodingDef);
2145           continue;
2146         }
2147       }
2148 
2149       addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2150       // FIXME: it should be an error not to find a definition for a given
2151       // operand, rather than just failing to add it to the resulting
2152       // instruction! (This is a longstanding bug, which will be addressed in an
2153       // upcoming change.)
2154       if (OpInfo.numFields() > 0)
2155         InsnOperands.push_back(OpInfo);
2156     }
2157   }
2158   Operands[Opc] = InsnOperands;
2159 
2160 #if 0
2161   LLVM_DEBUG({
2162       // Dumps the instruction encoding bits.
2163       dumpBits(errs(), Bits);
2164 
2165       errs() << '\n';
2166 
2167       // Dumps the list of operand info.
2168       for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
2169         const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2170         const std::string &OperandName = Info.Name;
2171         const Record &OperandDef = *Info.Rec;
2172 
2173         errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2174       }
2175     });
2176 #endif
2177 
2178   return Bits.getNumBits();
2179 }
2180 
2181 // emitFieldFromInstruction - Emit the templated helper function
2182 // fieldFromInstruction().
2183 // On Windows we make sure that this function is not inlined when
2184 // using the VS compiler. It has a bug which causes the function
2185 // to be optimized out in some circumstances. See llvm.org/pr38292
emitFieldFromInstruction(formatted_raw_ostream & OS)2186 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2187   OS << R"(
2188 // Helper functions for extracting fields from encoded instructions.
2189 // InsnType must either be integral or an APInt-like object that must:
2190 // * be default-constructible and copy-constructible
2191 // * be constructible from an APInt (this can be private)
2192 // * Support insertBits(bits, startBit, numBits)
2193 // * Support extractBitsAsZExtValue(numBits, startBit)
2194 // * Support the ~, &, ==, and != operators with other objects of the same type
2195 // * Support the != and bitwise & with uint64_t
2196 // * Support put (<<) to raw_ostream&
2197 template <typename InsnType>
2198 #if defined(_MSC_VER) && !defined(__clang__)
2199 __declspec(noinline)
2200 #endif
2201 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2202 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2203                      unsigned numBits) {
2204   assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2205   assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2206          "Instruction field out of bounds!");
2207   InsnType fieldMask;
2208   if (numBits == sizeof(InsnType) * 8)
2209     fieldMask = (InsnType)(-1LL);
2210   else
2211     fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2212   return (insn & fieldMask) >> startBit;
2213 }
2214 
2215 template <typename InsnType>
2216 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2217 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2218                      unsigned numBits) {
2219   return insn.extractBitsAsZExtValue(numBits, startBit);
2220 }
2221 )";
2222 }
2223 
2224 // emitInsertBits - Emit the templated helper function insertBits().
emitInsertBits(formatted_raw_ostream & OS)2225 static void emitInsertBits(formatted_raw_ostream &OS) {
2226   OS << R"(
2227 // Helper function for inserting bits extracted from an encoded instruction into
2228 // a field.
2229 template <typename InsnType>
2230 static std::enable_if_t<std::is_integral<InsnType>::value>
2231 insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) {
2232   assert(startBit + numBits <= sizeof field * 8);
2233   field |= (InsnType)bits << startBit;
2234 }
2235 
2236 template <typename InsnType>
2237 static std::enable_if_t<!std::is_integral<InsnType>::value>
2238 insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) {
2239   field.insertBits(bits, startBit, numBits);
2240 }
2241 )";
2242 }
2243 
2244 // emitDecodeInstruction - Emit the templated helper function
2245 // decodeInstruction().
emitDecodeInstruction(formatted_raw_ostream & OS,bool IsVarLenInst)2246 static void emitDecodeInstruction(formatted_raw_ostream &OS,
2247                                   bool IsVarLenInst) {
2248   OS << R"(
2249 template <typename InsnType>
2250 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2251                                       InsnType insn, uint64_t Address,
2252                                       const MCDisassembler *DisAsm,
2253                                       const MCSubtargetInfo &STI)";
2254   if (IsVarLenInst) {
2255     OS << ",\n                                      "
2256           "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2257   }
2258   OS << R"() {
2259   const FeatureBitset &Bits = STI.getFeatureBits();
2260 
2261   const uint8_t *Ptr = DecodeTable;
2262   uint64_t CurFieldValue = 0;
2263   DecodeStatus S = MCDisassembler::Success;
2264   while (true) {
2265     ptrdiff_t Loc = Ptr - DecodeTable;
2266     switch (*Ptr) {
2267     default:
2268       errs() << Loc << ": Unexpected decode table opcode!\n";
2269       return MCDisassembler::Fail;
2270     case MCD::OPC_ExtractField: {
2271       // Decode the start value.
2272       unsigned Start = decodeULEB128AndIncUnsafe(++Ptr);
2273       unsigned Len = *Ptr++;)";
2274   if (IsVarLenInst)
2275     OS << "\n      makeUp(insn, Start + Len);";
2276   OS << R"(
2277       CurFieldValue = fieldFromInstruction(insn, Start, Len);
2278       LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2279                    << Len << "): " << CurFieldValue << "\n");
2280       break;
2281     }
2282     case MCD::OPC_FilterValue: {
2283       // Decode the field value.
2284       uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr);
2285       // NumToSkip is a plain 24-bit integer.
2286       unsigned NumToSkip = *Ptr++;
2287       NumToSkip |= (*Ptr++) << 8;
2288       NumToSkip |= (*Ptr++) << 16;
2289 
2290       // Perform the filter operation.
2291       if (Val != CurFieldValue)
2292         Ptr += NumToSkip;
2293       LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip
2294                    << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:")
2295                    << " continuing at " << (Ptr - DecodeTable) << "\n");
2296 
2297       break;
2298     }
2299     case MCD::OPC_CheckField: {
2300       // Decode the start value.
2301       unsigned Start = decodeULEB128AndIncUnsafe(++Ptr);
2302       unsigned Len = *Ptr;)";
2303   if (IsVarLenInst)
2304     OS << "\n      makeUp(insn, Start + Len);";
2305   OS << R"(
2306       uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2307       // Decode the field value.
2308       unsigned PtrLen = 0;
2309       uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2310       Ptr += PtrLen;
2311       // NumToSkip is a plain 24-bit integer.
2312       unsigned NumToSkip = *Ptr++;
2313       NumToSkip |= (*Ptr++) << 8;
2314       NumToSkip |= (*Ptr++) << 16;
2315 
2316       // If the actual and expected values don't match, skip.
2317       if (ExpectedValue != FieldValue)
2318         Ptr += NumToSkip;
2319       LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", "
2320                    << Len << ", " << ExpectedValue << ", " << NumToSkip
2321                    << "): FieldValue = " << FieldValue << ", ExpectedValue = "
2322                    << ExpectedValue << ": "
2323                    << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n"));
2324       break;
2325     }
2326     case MCD::OPC_CheckPredicate: {
2327       // Decode the Predicate Index value.
2328       unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr);
2329       // NumToSkip is a plain 24-bit integer.
2330       unsigned NumToSkip = *Ptr++;
2331       NumToSkip |= (*Ptr++) << 8;
2332       NumToSkip |= (*Ptr++) << 16;
2333       // Check the predicate.
2334       bool Pred;
2335       if (!(Pred = checkDecoderPredicate(PIdx, Bits)))
2336         Ptr += NumToSkip;
2337       (void)Pred;
2338       LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
2339             << (Pred ? "PASS\n" : "FAIL\n"));
2340 
2341       break;
2342     }
2343     case MCD::OPC_Decode: {
2344       // Decode the Opcode value.
2345       unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
2346       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2347 
2348       MI.clear();
2349       MI.setOpcode(Opc);
2350       bool DecodeComplete;)";
2351   if (IsVarLenInst) {
2352     OS << "\n      unsigned Len = InstrLenTable[Opc];\n"
2353        << "      makeUp(insn, Len);";
2354   }
2355   OS << R"(
2356       S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
2357       assert(DecodeComplete);
2358 
2359       LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2360                    << ", using decoder " << DecodeIdx << ": "
2361                    << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2362       return S;
2363     }
2364     case MCD::OPC_TryDecode: {
2365       // Decode the Opcode value.
2366       unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr);
2367       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2368       // NumToSkip is a plain 24-bit integer.
2369       unsigned NumToSkip = *Ptr++;
2370       NumToSkip |= (*Ptr++) << 8;
2371       NumToSkip |= (*Ptr++) << 16;
2372 
2373       // Perform the decode operation.
2374       MCInst TmpMI;
2375       TmpMI.setOpcode(Opc);
2376       bool DecodeComplete;
2377       S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
2378       LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2379                    << ", using decoder " << DecodeIdx << ": ");
2380 
2381       if (DecodeComplete) {
2382         // Decoding complete.
2383         LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2384         MI = TmpMI;
2385         return S;
2386       } else {
2387         assert(S == MCDisassembler::Fail);
2388         // If the decoding was incomplete, skip.
2389         Ptr += NumToSkip;
2390         LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2391         // Reset decode status. This also drops a SoftFail status that could be
2392         // set before the decode attempt.
2393         S = MCDisassembler::Success;
2394       }
2395       break;
2396     }
2397     case MCD::OPC_SoftFail: {
2398       // Decode the mask values.
2399       uint64_t PositiveMask = decodeULEB128AndIncUnsafe(++Ptr);
2400       uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2401       bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
2402       if (Fail)
2403         S = MCDisassembler::SoftFail;
2404       LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n"));
2405       break;
2406     }
2407     case MCD::OPC_Fail: {
2408       LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2409       return MCDisassembler::Fail;
2410     }
2411     }
2412   }
2413   llvm_unreachable("bogosity detected in disassembler state machine!");
2414 }
2415 
2416 )";
2417 }
2418 
2419 // Helper to propagate SoftFail status. Returns false if the status is Fail;
2420 // callers are expected to early-exit in that condition. (Note, the '&' operator
2421 // is correct to propagate the values of this enum; see comment on 'enum
2422 // DecodeStatus'.)
emitCheck(formatted_raw_ostream & OS)2423 static void emitCheck(formatted_raw_ostream &OS) {
2424   OS << R"(
2425 static bool Check(DecodeStatus &Out, DecodeStatus In) {
2426   Out = static_cast<DecodeStatus>(Out & In);
2427   return Out != MCDisassembler::Fail;
2428 }
2429 
2430 )";
2431 }
2432 
2433 // Collect all HwModes referenced by the target for encoding purposes,
2434 // returning a vector of corresponding names.
collectHwModesReferencedForEncodings(const CodeGenHwModes & HWM,std::vector<StringRef> & Names,NamespacesHwModesMap & NamespacesWithHwModes)2435 static void collectHwModesReferencedForEncodings(
2436     const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2437     NamespacesHwModesMap &NamespacesWithHwModes) {
2438   SmallBitVector BV(HWM.getNumModeIds());
2439   for (const auto &MS : HWM.getHwModeSelects()) {
2440     for (const HwModeSelect::PairType &P : MS.second.Items) {
2441       if (P.second->isSubClassOf("InstructionEncoding")) {
2442         std::string DecoderNamespace =
2443             std::string(P.second->getValueAsString("DecoderNamespace"));
2444         if (P.first == DefaultMode) {
2445           NamespacesWithHwModes[DecoderNamespace].insert("");
2446         } else {
2447           NamespacesWithHwModes[DecoderNamespace].insert(
2448               HWM.getMode(P.first).Name);
2449         }
2450         BV.set(P.first);
2451       }
2452     }
2453   }
2454   transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) {
2455     if (M == DefaultMode)
2456       return StringRef("");
2457     return HWM.getModeName(M, /*IncludeDefault=*/true);
2458   });
2459 }
2460 
2461 static void
handleHwModesUnrelatedEncodings(const CodeGenInstruction * Instr,const std::vector<StringRef> & HwModeNames,NamespacesHwModesMap & NamespacesWithHwModes,std::vector<EncodingAndInst> & GlobalEncodings)2462 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2463                                 const std::vector<StringRef> &HwModeNames,
2464                                 NamespacesHwModesMap &NamespacesWithHwModes,
2465                                 std::vector<EncodingAndInst> &GlobalEncodings) {
2466   const Record *InstDef = Instr->TheDef;
2467 
2468   switch (DecoderEmitterSuppressDuplicates) {
2469   case SUPPRESSION_DISABLE: {
2470     for (StringRef HwModeName : HwModeNames)
2471       GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2472     break;
2473   }
2474   case SUPPRESSION_LEVEL1: {
2475     std::string DecoderNamespace =
2476         std::string(InstDef->getValueAsString("DecoderNamespace"));
2477     auto It = NamespacesWithHwModes.find(DecoderNamespace);
2478     if (It != NamespacesWithHwModes.end()) {
2479       for (StringRef HwModeName : It->second)
2480         GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2481     } else {
2482       // Only emit the encoding once, as it's DecoderNamespace doesn't
2483       // contain any HwModes.
2484       GlobalEncodings.emplace_back(InstDef, Instr, "");
2485     }
2486     break;
2487   }
2488   case SUPPRESSION_LEVEL2:
2489     GlobalEncodings.emplace_back(InstDef, Instr, "");
2490     break;
2491   }
2492 }
2493 
2494 // Emits disassembler code for instruction decoding.
run(raw_ostream & o)2495 void DecoderEmitter::run(raw_ostream &o) {
2496   formatted_raw_ostream OS(o);
2497   OS << R"(
2498 #include "llvm/MC/MCInst.h"
2499 #include "llvm/MC/MCSubtargetInfo.h"
2500 #include "llvm/Support/DataTypes.h"
2501 #include "llvm/Support/Debug.h"
2502 #include "llvm/Support/LEB128.h"
2503 #include "llvm/Support/raw_ostream.h"
2504 #include "llvm/TargetParser/SubtargetFeature.h"
2505 #include <assert.h>
2506 
2507 namespace llvm {
2508 )";
2509 
2510   emitFieldFromInstruction(OS);
2511   emitInsertBits(OS);
2512   emitCheck(OS);
2513 
2514   Target.reverseBitsForLittleEndianEncoding();
2515 
2516   // Parameterize the decoders based on namespace and instruction width.
2517 
2518   // First, collect all encoding-related HwModes referenced by the target.
2519   // And establish a mapping table between DecoderNamespace and HwMode.
2520   // If HwModeNames is empty, add the empty string so we always have one HwMode.
2521   const CodeGenHwModes &HWM = Target.getHwModes();
2522   std::vector<StringRef> HwModeNames;
2523   NamespacesHwModesMap NamespacesWithHwModes;
2524   collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes);
2525   if (HwModeNames.empty())
2526     HwModeNames.push_back("");
2527 
2528   const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
2529   NumberedEncodings.reserve(NumberedInstructions.size());
2530   for (const auto &NumberedInstruction : NumberedInstructions) {
2531     const Record *InstDef = NumberedInstruction->TheDef;
2532     if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) {
2533       if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
2534         EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2535         for (auto &[ModeId, Encoding] : EBM) {
2536           // DecoderTables with DefaultMode should not have any suffix.
2537           if (ModeId == DefaultMode) {
2538             NumberedEncodings.emplace_back(Encoding, NumberedInstruction, "");
2539           } else {
2540             NumberedEncodings.emplace_back(Encoding, NumberedInstruction,
2541                                            HWM.getMode(ModeId).Name);
2542           }
2543         }
2544         continue;
2545       }
2546     }
2547     // This instruction is encoded the same on all HwModes.
2548     // According to user needs, provide varying degrees of suppression.
2549     handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames,
2550                                     NamespacesWithHwModes, NumberedEncodings);
2551   }
2552   for (const auto &NumberedAlias :
2553        RK.getAllDerivedDefinitions("AdditionalEncoding"))
2554     NumberedEncodings.emplace_back(
2555         NumberedAlias,
2556         &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
2557 
2558   std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2559       OpcMap;
2560   std::map<unsigned, std::vector<OperandInfo>> Operands;
2561   std::vector<unsigned> InstrLen;
2562   bool IsVarLenInst = Target.hasVariableLengthEncodings();
2563   unsigned MaxInstLen = 0;
2564 
2565   for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
2566     const Record *EncodingDef = NumberedEncoding.EncodingDef;
2567     const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2568     const Record *Def = Inst->TheDef;
2569     unsigned Size = EncodingDef->getValueAsInt("Size");
2570     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
2571         Def->getValueAsBit("isPseudo") ||
2572         Def->getValueAsBit("isAsmParserOnly") ||
2573         Def->getValueAsBit("isCodeGenOnly")) {
2574       NumEncodingsLackingDisasm++;
2575       continue;
2576     }
2577 
2578     if (NEI < NumberedInstructions.size())
2579       NumInstructions++;
2580     NumEncodings++;
2581 
2582     if (!Size && !IsVarLenInst)
2583       continue;
2584 
2585     if (IsVarLenInst)
2586       InstrLen.resize(NumberedInstructions.size(), 0);
2587 
2588     if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI,
2589                                            Operands, IsVarLenInst)) {
2590       if (IsVarLenInst) {
2591         MaxInstLen = std::max(MaxInstLen, Len);
2592         InstrLen[NEI] = Len;
2593       }
2594       std::string DecoderNamespace =
2595           std::string(EncodingDef->getValueAsString("DecoderNamespace"));
2596       if (!NumberedEncoding.HwModeName.empty())
2597         DecoderNamespace +=
2598             std::string("_") + NumberedEncoding.HwModeName.str();
2599       OpcMap[std::pair(DecoderNamespace, Size)].emplace_back(
2600           NEI, Target.getInstrIntValue(Def));
2601     } else {
2602       NumEncodingsOmitted++;
2603     }
2604   }
2605 
2606   DecoderTableInfo TableInfo;
2607   for (const auto &Opc : OpcMap) {
2608     // Emit the decoder for this namespace+width combination.
2609     ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
2610                                                    NumberedEncodings.size());
2611     FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
2612                      IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this);
2613 
2614     // The decode table is cleared for each top level decoder function. The
2615     // predicates and decoders themselves, however, are shared across all
2616     // decoders to give more opportunities for uniqueing.
2617     TableInfo.Table.clear();
2618     TableInfo.FixupStack.clear();
2619     TableInfo.Table.reserve(16384);
2620     TableInfo.FixupStack.emplace_back();
2621     FC.emitTableEntries(TableInfo);
2622     // Any NumToSkip fixups in the top level scope can resolve to the
2623     // OPC_Fail at the end of the table.
2624     assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
2625     // Resolve any NumToSkip fixups in the current scope.
2626     resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
2627                        TableInfo.Table.size());
2628     TableInfo.FixupStack.clear();
2629 
2630     TableInfo.Table.push_back(MCD::OPC_Fail);
2631 
2632     // Print the table to the output stream.
2633     emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first,
2634               Opc.second);
2635   }
2636 
2637   // For variable instruction, we emit a instruction length table
2638   // to let the decoder know how long the instructions are.
2639   // You can see example usage in M68k's disassembler.
2640   if (IsVarLenInst)
2641     emitInstrLenTable(OS, InstrLen);
2642   // Emit the predicate function.
2643   emitPredicateFunction(OS, TableInfo.Predicates, 0);
2644 
2645   // Emit the decoder function.
2646   emitDecoderFunction(OS, TableInfo.Decoders, 0);
2647 
2648   // Emit the main entry point for the decoder, decodeInstruction().
2649   emitDecodeInstruction(OS, IsVarLenInst);
2650 
2651   OS << "\n} // end namespace llvm\n";
2652 }
2653 
2654 namespace llvm {
2655 
EmitDecoder(RecordKeeper & RK,raw_ostream & OS,const std::string & PredicateNamespace)2656 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
2657                  const std::string &PredicateNamespace) {
2658   DecoderEmitter(RK, PredicateNamespace).run(OS);
2659 }
2660 
2661 } // end namespace llvm
2662