1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // It contains the tablegen backend that emits the decoder functions for
10 // targets with fixed/variable length instruction set.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "Common/CodeGenHwModes.h"
15 #include "Common/CodeGenInstruction.h"
16 #include "Common/CodeGenTarget.h"
17 #include "Common/InfoByHwMode.h"
18 #include "Common/VarLenCodeEmitterGen.h"
19 #include "TableGenBackends.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/CachedHashString.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallString.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/MC/MCDecoderOps.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/FormatVariadic.h"
36 #include "llvm/Support/FormattedStream.h"
37 #include "llvm/Support/LEB128.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/TableGen/Error.h"
41 #include "llvm/TableGen/Record.h"
42 #include <algorithm>
43 #include <cassert>
44 #include <cstddef>
45 #include <cstdint>
46 #include <map>
47 #include <memory>
48 #include <set>
49 #include <string>
50 #include <utility>
51 #include <vector>
52
53 using namespace llvm;
54
55 #define DEBUG_TYPE "decoder-emitter"
56
57 extern cl::OptionCategory DisassemblerEmitterCat;
58
59 enum SuppressLevel {
60 SUPPRESSION_DISABLE,
61 SUPPRESSION_LEVEL1,
62 SUPPRESSION_LEVEL2
63 };
64
65 static cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
66 "suppress-per-hwmode-duplicates",
67 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"),
68 cl::values(
69 clEnumValN(
70 SUPPRESSION_DISABLE, "O0",
71 "Do not prevent DecoderTable duplications caused by HwModes"),
72 clEnumValN(
73 SUPPRESSION_LEVEL1, "O1",
74 "Remove duplicate DecoderTable entries generated due to HwModes"),
75 clEnumValN(
76 SUPPRESSION_LEVEL2, "O2",
77 "Extract HwModes-specific instructions into new DecoderTables, "
78 "significantly reducing Table Duplications")),
79 cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
80
81 static cl::opt<bool> LargeTable(
82 "large-decoder-table",
83 cl::desc("Use large decoder table format. This uses 24 bits for offset\n"
84 "in the table instead of the default 16 bits."),
85 cl::init(false), cl::cat(DisassemblerEmitterCat));
86
87 static cl::opt<bool> UseFnTableInDecodeToMCInst(
88 "use-fn-table-in-decode-to-mcinst",
89 cl::desc(
90 "Use a table of function pointers instead of a switch case in the\n"
91 "generated `decodeToMCInst` function. Helps improve compile time\n"
92 "of the generated code."),
93 cl::init(false), cl::cat(DisassemblerEmitterCat));
94
95 STATISTIC(NumEncodings, "Number of encodings considered");
96 STATISTIC(NumEncodingsLackingDisasm,
97 "Number of encodings without disassembler info");
98 STATISTIC(NumInstructions, "Number of instructions considered");
99 STATISTIC(NumEncodingsSupported, "Number of encodings supported");
100 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
101
getNumToSkipInBytes()102 static unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
103
104 namespace {
105
106 struct EncodingField {
107 unsigned Base, Width, Offset;
EncodingField__anonfc09cd8b0111::EncodingField108 EncodingField(unsigned B, unsigned W, unsigned O)
109 : Base(B), Width(W), Offset(O) {}
110 };
111
112 struct OperandInfo {
113 std::vector<EncodingField> Fields;
114 std::string Decoder;
115 bool HasCompleteDecoder;
116 uint64_t InitValue = 0;
117
OperandInfo__anonfc09cd8b0111::OperandInfo118 OperandInfo(std::string D, bool HCD) : Decoder(D), HasCompleteDecoder(HCD) {}
119
addField__anonfc09cd8b0111::OperandInfo120 void addField(unsigned Base, unsigned Width, unsigned Offset) {
121 Fields.push_back(EncodingField(Base, Width, Offset));
122 }
123
numFields__anonfc09cd8b0111::OperandInfo124 unsigned numFields() const { return Fields.size(); }
125
126 typedef std::vector<EncodingField>::const_iterator const_iterator;
127
begin__anonfc09cd8b0111::OperandInfo128 const_iterator begin() const { return Fields.begin(); }
end__anonfc09cd8b0111::OperandInfo129 const_iterator end() const { return Fields.end(); }
130 };
131
132 typedef std::vector<uint32_t> FixupList;
133 typedef std::vector<FixupList> FixupScopeList;
134 typedef SmallSetVector<CachedHashString, 16> PredicateSet;
135 typedef SmallSetVector<CachedHashString, 16> DecoderSet;
136
137 class DecoderTable {
138 public:
DecoderTable()139 DecoderTable() { Data.reserve(16384); }
140
clear()141 void clear() { Data.clear(); }
push_back(uint8_t Item)142 void push_back(uint8_t Item) { Data.push_back(Item); }
size() const143 size_t size() const { return Data.size(); }
data() const144 const uint8_t *data() const { return Data.data(); }
145
146 using const_iterator = std::vector<uint8_t>::const_iterator;
begin() const147 const_iterator begin() const { return Data.begin(); }
end() const148 const_iterator end() const { return Data.end(); }
149
150 // Insert a ULEB128 encoded value into the table.
insertULEB128(uint64_t Value)151 void insertULEB128(uint64_t Value) {
152 // Encode and emit the value to filter against.
153 uint8_t Buffer[16];
154 unsigned Len = encodeULEB128(Value, Buffer);
155 Data.insert(Data.end(), Buffer, Buffer + Len);
156 }
157
158 // Insert space for `NumToSkip` and return the position
159 // in the table for patching.
insertNumToSkip()160 size_t insertNumToSkip() {
161 size_t Size = Data.size();
162 Data.insert(Data.end(), getNumToSkipInBytes(), 0);
163 return Size;
164 }
165
patchNumToSkip(size_t FixupIdx,uint32_t DestIdx)166 void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
167 // Calculate the distance from the byte following the fixup entry byte
168 // to the destination. The Target is calculated from after the
169 // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
170 // `getNumToSkipInBytes()` from the displacement here to account for that.
171 assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
172 "Expecting a forward jump in the decoding table");
173 uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
174 if (!isUIntN(8 * getNumToSkipInBytes(), Delta))
175 PrintFatalError(
176 "disassembler decoding table too large, try --large-decoder-table");
177
178 Data[FixupIdx] = static_cast<uint8_t>(Delta);
179 Data[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
180 if (getNumToSkipInBytes() == 3)
181 Data[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
182 }
183
184 private:
185 std::vector<uint8_t> Data;
186 };
187
188 struct DecoderTableInfo {
189 DecoderTable Table;
190 FixupScopeList FixupStack;
191 PredicateSet Predicates;
192 DecoderSet Decoders;
193
isOutermostScope__anonfc09cd8b0111::DecoderTableInfo194 bool isOutermostScope() const { return FixupStack.size() == 1; }
195 };
196
197 struct EncodingAndInst {
198 const Record *EncodingDef;
199 const CodeGenInstruction *Inst;
200 StringRef HwModeName;
201
EncodingAndInst__anonfc09cd8b0111::EncodingAndInst202 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst,
203 StringRef HwModeName = "")
204 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {}
205 };
206
207 struct EncodingIDAndOpcode {
208 unsigned EncodingID;
209 unsigned Opcode;
210
EncodingIDAndOpcode__anonfc09cd8b0111::EncodingIDAndOpcode211 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {}
EncodingIDAndOpcode__anonfc09cd8b0111::EncodingIDAndOpcode212 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
213 : EncodingID(EncodingID), Opcode(Opcode) {}
214 };
215
216 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
217 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
218
219 class DecoderEmitter {
220 const RecordKeeper &RK;
221 std::vector<EncodingAndInst> NumberedEncodings;
222
223 public:
DecoderEmitter(const RecordKeeper & R,StringRef PredicateNamespace)224 DecoderEmitter(const RecordKeeper &R, StringRef PredicateNamespace)
225 : RK(R), Target(R), PredicateNamespace(PredicateNamespace) {}
226
227 // Emit the decoder state machine table. Returns a mask of MCD decoder ops
228 // that were emitted.
229 unsigned emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
230 unsigned BitWidth, StringRef Namespace,
231 const EncodingIDsVec &EncodingIDs) const;
232 void emitInstrLenTable(formatted_raw_ostream &OS,
233 ArrayRef<unsigned> InstrLen) const;
234 void emitPredicateFunction(formatted_raw_ostream &OS,
235 PredicateSet &Predicates) const;
236 void emitDecoderFunction(formatted_raw_ostream &OS,
237 DecoderSet &Decoders) const;
238
239 // run - Output the code emitter
240 void run(raw_ostream &o);
241
242 private:
243 CodeGenTarget Target;
244
245 public:
246 StringRef PredicateNamespace;
247 };
248
249 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
250 // for a bit value.
251 //
252 // BIT_UNFILTERED is used as the init value for a filter position. It is used
253 // only for filter processings.
254 struct BitValue {
255 enum bit_value_t : uint8_t {
256 BIT_FALSE, // '0'
257 BIT_TRUE, // '1'
258 BIT_UNSET, // '?', printed as '_'
259 BIT_UNFILTERED // unfiltered, printed as '.'
260 };
261
BitValue__anonfc09cd8b0111::BitValue262 BitValue(bit_value_t V) : V(V) {}
BitValue__anonfc09cd8b0111::BitValue263 explicit BitValue(const Init *Init) {
264 if (const auto *Bit = dyn_cast<BitInit>(Init))
265 V = Bit->getValue() ? BIT_TRUE : BIT_FALSE;
266 else
267 V = BIT_UNSET;
268 }
BitValue__anonfc09cd8b0111::BitValue269 BitValue(const BitsInit &Bits, unsigned Idx) : BitValue(Bits.getBit(Idx)) {}
270
isSet__anonfc09cd8b0111::BitValue271 bool isSet() const { return V == BIT_TRUE || V == BIT_FALSE; }
isUnset__anonfc09cd8b0111::BitValue272 bool isUnset() const { return V == BIT_UNSET; }
getValue__anonfc09cd8b0111::BitValue273 std::optional<uint64_t> getValue() const {
274 if (isSet())
275 return static_cast<uint64_t>(V);
276 return std::nullopt;
277 }
278
279 // For printing a bit value.
operator StringRef__anonfc09cd8b0111::BitValue280 operator StringRef() const {
281 switch (V) {
282 case BIT_FALSE:
283 return "0";
284 case BIT_TRUE:
285 return "1";
286 case BIT_UNSET:
287 return "_";
288 case BIT_UNFILTERED:
289 return ".";
290 }
291 llvm_unreachable("Unknow bit value");
292 }
293
operator ==__anonfc09cd8b0111::BitValue294 bool operator==(bit_value_t Other) const { return Other == V; }
operator !=__anonfc09cd8b0111::BitValue295 bool operator!=(bit_value_t Other) const { return Other != V; }
296
297 private:
298 bit_value_t V;
299 };
300
301 } // end anonymous namespace
302
operator <<(raw_ostream & OS,const EncodingAndInst & Value)303 static raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
304 if (Value.EncodingDef != Value.Inst->TheDef)
305 OS << Value.EncodingDef->getName() << ":";
306 OS << Value.Inst->TheDef->getName();
307 return OS;
308 }
309
310 // Prints the bit value for each position.
dumpBits(raw_ostream & OS,const BitsInit & Bits)311 static void dumpBits(raw_ostream &OS, const BitsInit &Bits) {
312 for (const Init *Bit : reverse(Bits.getBits()))
313 OS << BitValue(Bit);
314 }
315
getBitsField(const Record & Def,StringRef FieldName)316 static const BitsInit &getBitsField(const Record &Def, StringRef FieldName) {
317 const RecordVal *RV = Def.getValue(FieldName);
318 if (const BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue()))
319 return *Bits;
320
321 // Handle variable length instructions.
322 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
323 SmallVector<const Init *, 16> Bits;
324
325 for (const auto &SI : VLI) {
326 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value))
327 llvm::append_range(Bits, BI->getBits());
328 else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value))
329 Bits.push_back(BI);
330 else
331 Bits.append(SI.BitWidth, UnsetInit::get(Def.getRecords()));
332 }
333
334 return *BitsInit::get(Def.getRecords(), Bits);
335 }
336
337 // Representation of the instruction to work on.
338 typedef std::vector<BitValue> insn_t;
339
340 namespace {
341
342 static constexpr uint64_t NO_FIXED_SEGMENTS_SENTINEL =
343 std::numeric_limits<uint64_t>::max();
344
345 class FilterChooser;
346
347 /// Filter - Filter works with FilterChooser to produce the decoding tree for
348 /// the ISA.
349 ///
350 /// It is useful to think of a Filter as governing the switch stmts of the
351 /// decoding tree in a certain level. Each case stmt delegates to an inferior
352 /// FilterChooser to decide what further decoding logic to employ, or in another
353 /// words, what other remaining bits to look at. The FilterChooser eventually
354 /// chooses a best Filter to do its job.
355 ///
356 /// This recursive scheme ends when the number of Opcodes assigned to the
357 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
358 /// the Filter/FilterChooser combo does not know how to distinguish among the
359 /// Opcodes assigned.
360 ///
361 /// An example of a conflict is
362 ///
363 /// Conflict:
364 /// 111101000.00........00010000....
365 /// 111101000.00........0001........
366 /// 1111010...00........0001........
367 /// 1111010...00....................
368 /// 1111010.........................
369 /// 1111............................
370 /// ................................
371 /// VST4q8a 111101000_00________00010000____
372 /// VST4q8b 111101000_00________00010000____
373 ///
374 /// The Debug output shows the path that the decoding tree follows to reach the
375 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
376 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
377 ///
378 /// The encoding info in the .td files does not specify this meta information,
379 /// which could have been used by the decoder to resolve the conflict. The
380 /// decoder could try to decode the even/odd register numbering and assign to
381 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
382 /// version and return the Opcode since the two have the same Asm format string.
383 class Filter {
384 protected:
385 const FilterChooser &Owner; // FilterChooser who owns this filter
386 unsigned StartBit; // the starting bit position
387 unsigned NumBits; // number of bits to filter
388 bool Mixed; // a mixed region contains both set and unset bits
389
390 // Map of well-known segment value to the set of uid's with that value.
391 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
392
393 // Set of uid's with non-constant segment values.
394 std::vector<EncodingIDAndOpcode> VariableInstructions;
395
396 // Map of well-known segment value to its delegate.
397 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
398
399 // Number of instructions which fall under FilteredInstructions category.
400 unsigned NumFiltered;
401
402 // Keeps track of the last opcode in the filtered bucket.
403 EncodingIDAndOpcode LastOpcFiltered;
404
405 public:
406 Filter(Filter &&f);
407 Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
408 bool mixed);
409
410 ~Filter() = default;
411
getNumFiltered() const412 unsigned getNumFiltered() const { return NumFiltered; }
413
getSingletonOpc() const414 EncodingIDAndOpcode getSingletonOpc() const {
415 assert(NumFiltered == 1);
416 return LastOpcFiltered;
417 }
418
419 // Return the filter chooser for the group of instructions without constant
420 // segment values.
getVariableFC() const421 const FilterChooser &getVariableFC() const {
422 assert(NumFiltered == 1 && FilterChooserMap.size() == 1);
423 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second);
424 }
425
426 // Divides the decoding task into sub tasks and delegates them to the
427 // inferior FilterChooser's.
428 //
429 // A special case arises when there's only one entry in the filtered
430 // instructions. In order to unambiguously decode the singleton, we need to
431 // match the remaining undecoded encoding bits against the singleton.
432 void recurse();
433
434 // Emit table entries to decode instructions given a segment or segments of
435 // bits.
436 void emitTableEntry(DecoderTableInfo &TableInfo) const;
437
438 // Returns the number of fanout produced by the filter. More fanout implies
439 // the filter distinguishes more categories of instructions.
440 unsigned usefulness() const;
441 }; // end class Filter
442
443 // These are states of our finite state machines used in FilterChooser's
444 // filterProcessor() which produces the filter candidates to use.
445 enum bitAttr_t {
446 ATTR_NONE,
447 ATTR_FILTERED,
448 ATTR_ALL_SET,
449 ATTR_ALL_UNSET,
450 ATTR_MIXED
451 };
452
453 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters
454 /// in order to perform the decoding of instructions at the current level.
455 ///
456 /// Decoding proceeds from the top down. Based on the well-known encoding bits
457 /// of instructions available, FilterChooser builds up the possible Filters that
458 /// can further the task of decoding by distinguishing among the remaining
459 /// candidate instructions.
460 ///
461 /// Once a filter has been chosen, it is called upon to divide the decoding task
462 /// into sub-tasks and delegates them to its inferior FilterChoosers for further
463 /// processings.
464 ///
465 /// It is useful to think of a Filter as governing the switch stmts of the
466 /// decoding tree. And each case is delegated to an inferior FilterChooser to
467 /// decide what further remaining bits to look at.
468
469 class FilterChooser {
470 protected:
471 friend class Filter;
472
473 // Vector of codegen instructions to choose our filter.
474 ArrayRef<EncodingAndInst> AllInstructions;
475
476 // Vector of uid's for this filter chooser to work on.
477 // The first member of the pair is the opcode id being decoded, the second is
478 // the opcode id that should be emitted.
479 ArrayRef<EncodingIDAndOpcode> Opcodes;
480
481 // Lookup table for the operand decoding of instructions.
482 const std::map<unsigned, std::vector<OperandInfo>> &Operands;
483
484 // Vector of candidate filters.
485 std::vector<Filter> Filters;
486
487 // Array of bit values passed down from our parent.
488 // Set to all BIT_UNFILTERED's for Parent == NULL.
489 std::vector<BitValue> FilterBitValues;
490
491 // Links to the FilterChooser above us in the decoding tree.
492 const FilterChooser *Parent;
493
494 // Index of the best filter from Filters.
495 int BestIndex;
496
497 // Width of instructions
498 unsigned BitWidth;
499
500 // Parent emitter
501 const DecoderEmitter *Emitter;
502
503 struct Island {
504 unsigned StartBit;
505 unsigned NumBits;
506 uint64_t FieldVal;
507 };
508
509 public:
FilterChooser(ArrayRef<EncodingAndInst> Insts,ArrayRef<EncodingIDAndOpcode> IDs,const std::map<unsigned,std::vector<OperandInfo>> & Ops,unsigned BW,const DecoderEmitter * E)510 FilterChooser(ArrayRef<EncodingAndInst> Insts,
511 ArrayRef<EncodingIDAndOpcode> IDs,
512 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
513 unsigned BW, const DecoderEmitter *E)
514 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
515 FilterBitValues(BW, BitValue::BIT_UNFILTERED), Parent(nullptr),
516 BestIndex(-1), BitWidth(BW), Emitter(E) {
517 doFilter();
518 }
519
FilterChooser(ArrayRef<EncodingAndInst> Insts,ArrayRef<EncodingIDAndOpcode> IDs,const std::map<unsigned,std::vector<OperandInfo>> & Ops,const std::vector<BitValue> & ParentFilterBitValues,const FilterChooser & parent)520 FilterChooser(ArrayRef<EncodingAndInst> Insts,
521 ArrayRef<EncodingIDAndOpcode> IDs,
522 const std::map<unsigned, std::vector<OperandInfo>> &Ops,
523 const std::vector<BitValue> &ParentFilterBitValues,
524 const FilterChooser &parent)
525 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
526 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
527 BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
528 doFilter();
529 }
530
531 FilterChooser(const FilterChooser &) = delete;
532 void operator=(const FilterChooser &) = delete;
533
getBitWidth() const534 unsigned getBitWidth() const { return BitWidth; }
535
536 protected:
537 // Populates the insn given the uid.
insnWithID(insn_t & Insn,unsigned Opcode) const538 void insnWithID(insn_t &Insn, unsigned Opcode) const {
539 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef;
540 const BitsInit &Bits = getBitsField(*EncodingDef, "Inst");
541 Insn.resize(std::max(BitWidth, Bits.getNumBits()), BitValue::BIT_UNSET);
542 // We may have a SoftFail bitmask, which specifies a mask where an encoding
543 // may differ from the value in "Inst" and yet still be valid, but the
544 // disassembler should return SoftFail instead of Success.
545 //
546 // This is used for marking UNPREDICTABLE instructions in the ARM world.
547 const RecordVal *RV = EncodingDef->getValue("SoftFail");
548 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
549 for (unsigned i = 0; i < Bits.getNumBits(); ++i) {
550 if (SFBits && BitValue(*SFBits, i) == BitValue::BIT_TRUE)
551 Insn[i] = BitValue::BIT_UNSET;
552 else
553 Insn[i] = BitValue(Bits, i);
554 }
555 }
556
557 // Populates the field of the insn given the start position and the number of
558 // consecutive bits to scan for.
559 //
560 // Returns a pair of values (indicator, field), where the indicator is false
561 // if there exists any uninitialized bit value in the range and true if all
562 // bits are well-known. The second value is the potentially populated field.
563 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
564 unsigned NumBits) const;
565
566 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
567 /// filter array as a series of chars.
568 void dumpFilterArray(raw_ostream &OS, ArrayRef<BitValue> Filter) const;
569
570 /// dumpStack - dumpStack traverses the filter chooser chain and calls
571 /// dumpFilterArray on each filter chooser up to the top level one.
572 void dumpStack(raw_ostream &OS, const char *prefix) const;
573
bestFilter()574 Filter &bestFilter() {
575 assert(BestIndex != -1 && "BestIndex not set");
576 return Filters[BestIndex];
577 }
578
PositionFiltered(unsigned Idx) const579 bool PositionFiltered(unsigned Idx) const {
580 return FilterBitValues[Idx].isSet();
581 }
582
583 // Calculates the island(s) needed to decode the instruction.
584 // This returns a list of undecoded bits of an instructions, for example,
585 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
586 // decoded bits in order to verify that the instruction matches the Opcode.
587 unsigned getIslands(std::vector<Island> &Islands, const insn_t &Insn) const;
588
589 // Emits code to check the Predicates member of an instruction are true.
590 // Returns true if predicate matches were emitted, false otherwise.
591 bool emitPredicateMatch(raw_ostream &OS, unsigned Opc) const;
592 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
593 raw_ostream &OS) const;
594
595 bool doesOpcodeNeedPredicate(unsigned Opc) const;
596 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
597 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
598
599 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
600
601 // Emits table entries to decode the singleton.
602 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
603 EncodingIDAndOpcode Opc) const;
604
605 // Emits code to decode the singleton, and then to decode the rest.
606 void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
607 const Filter &Best) const;
608
609 bool emitBinaryParser(raw_ostream &OS, indent Indent,
610 const OperandInfo &OpInfo) const;
611
612 bool emitDecoder(raw_ostream &OS, indent Indent, unsigned Opc) const;
613 std::pair<unsigned, bool> getDecoderIndex(DecoderSet &Decoders,
614 unsigned Opc) const;
615
616 // Assign a single filter and run with it.
617 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
618
619 // reportRegion is a helper function for filterProcessor to mark a region as
620 // eligible for use as a filter region.
621 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
622 bool AllowMixed);
623
624 // FilterProcessor scans the well-known encoding bits of the instructions and
625 // builds up a list of candidate filters. It chooses the best filter and
626 // recursively descends down the decoding tree.
627 bool filterProcessor(bool AllowMixed, bool Greedy = true);
628
629 // Decides on the best configuration of filter(s) to use in order to decode
630 // the instructions. A conflict of instructions may occur, in which case we
631 // dump the conflict set to the standard error.
632 void doFilter();
633
634 public:
635 // emitTableEntries - Emit state machine entries to decode our share of
636 // instructions.
637 void emitTableEntries(DecoderTableInfo &TableInfo) const;
638 };
639
640 } // end anonymous namespace
641
642 ///////////////////////////
643 // //
644 // Filter Implementation //
645 // //
646 ///////////////////////////
647
Filter(Filter && f)648 Filter::Filter(Filter &&f)
649 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
650 FilteredInstructions(std::move(f.FilteredInstructions)),
651 VariableInstructions(std::move(f.VariableInstructions)),
652 FilterChooserMap(std::move(f.FilterChooserMap)),
653 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {}
654
Filter(const FilterChooser & owner,unsigned startBit,unsigned numBits,bool mixed)655 Filter::Filter(const FilterChooser &owner, unsigned startBit, unsigned numBits,
656 bool mixed)
657 : Owner(owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
658 assert(StartBit + NumBits - 1 < Owner.BitWidth);
659
660 NumFiltered = 0;
661 LastOpcFiltered = {0, 0};
662
663 for (const auto &OpcPair : Owner.Opcodes) {
664 insn_t Insn;
665
666 // Populates the insn given the uid.
667 Owner.insnWithID(Insn, OpcPair.EncodingID);
668
669 // Scans the segment for possibly well-specified encoding bits.
670 auto [Ok, Field] = Owner.fieldFromInsn(Insn, StartBit, NumBits);
671
672 if (Ok) {
673 // The encoding bits are well-known. Lets add the uid of the
674 // instruction into the bucket keyed off the constant field value.
675 LastOpcFiltered = OpcPair;
676 FilteredInstructions[Field].push_back(LastOpcFiltered);
677 ++NumFiltered;
678 } else {
679 // Some of the encoding bit(s) are unspecified. This contributes to
680 // one additional member of "Variable" instructions.
681 VariableInstructions.push_back(OpcPair);
682 }
683 }
684
685 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) &&
686 "Filter returns no instruction categories");
687 }
688
689 // Divides the decoding task into sub tasks and delegates them to the
690 // inferior FilterChooser's.
691 //
692 // A special case arises when there's only one entry in the filtered
693 // instructions. In order to unambiguously decode the singleton, we need to
694 // match the remaining undecoded encoding bits against the singleton.
recurse()695 void Filter::recurse() {
696 // Starts by inheriting our parent filter chooser's filter bit values.
697 std::vector<BitValue> BitValueArray(Owner.FilterBitValues);
698
699 if (!VariableInstructions.empty()) {
700 // Conservatively marks each segment position as BIT_UNSET.
701 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
702 BitValueArray[StartBit + bitIndex] = BitValue::BIT_UNSET;
703
704 // Delegates to an inferior filter chooser for further processing on this
705 // group of instructions whose segment values are variable.
706 FilterChooserMap.try_emplace(
707 NO_FIXED_SEGMENTS_SENTINEL,
708 std::make_unique<FilterChooser>(Owner.AllInstructions,
709 VariableInstructions, Owner.Operands,
710 BitValueArray, Owner));
711 }
712
713 // No need to recurse for a singleton filtered instruction.
714 // See also Filter::emit*().
715 if (getNumFiltered() == 1) {
716 assert(FilterChooserMap.size() == 1);
717 return;
718 }
719
720 // Otherwise, create sub choosers.
721 for (const auto &Inst : FilteredInstructions) {
722 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
723 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
724 BitValueArray[StartBit + bitIndex] = Inst.first & (1ULL << bitIndex)
725 ? BitValue::BIT_TRUE
726 : BitValue::BIT_FALSE;
727
728 // Delegates to an inferior filter chooser for further processing on this
729 // category of instructions.
730 FilterChooserMap.try_emplace(
731 Inst.first,
732 std::make_unique<FilterChooser>(Owner.AllInstructions, Inst.second,
733 Owner.Operands, BitValueArray, Owner));
734 }
735 }
736
resolveTableFixups(DecoderTable & Table,const FixupList & Fixups,uint32_t DestIdx)737 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
738 uint32_t DestIdx) {
739 // Any NumToSkip fixups in the current scope can resolve to the
740 // current location.
741 for (uint32_t FixupIdx : Fixups)
742 Table.patchNumToSkip(FixupIdx, DestIdx);
743 }
744
745 // Emit table entries to decode instructions given a segment or segments
746 // of bits.
emitTableEntry(DecoderTableInfo & TableInfo) const747 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
748 assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
749 TableInfo.Table.push_back(MCD::OPC_ExtractField);
750
751 TableInfo.Table.insertULEB128(StartBit);
752 TableInfo.Table.push_back(NumBits);
753
754 // If the NO_FIXED_SEGMENTS_SENTINEL is present, we need to add a new scope
755 // for this filter. Otherwise, we can skip adding a new scope and any
756 // patching added will automatically be added to the enclosing scope.
757
758 // If NO_FIXED_SEGMENTS_SENTINEL is present, it will be last entry in
759 // FilterChooserMap.
760
761 const uint64_t LastFilter = FilterChooserMap.rbegin()->first;
762 bool HasFallthrough = LastFilter == NO_FIXED_SEGMENTS_SENTINEL;
763 if (HasFallthrough)
764 TableInfo.FixupStack.emplace_back();
765
766 DecoderTable &Table = TableInfo.Table;
767
768 size_t PrevFilter = 0;
769 for (const auto &[FilterVal, Delegate] : FilterChooserMap) {
770 // Field value NO_FIXED_SEGMENTS_SENTINEL implies a non-empty set of
771 // variable instructions. See also recurse().
772 if (FilterVal == NO_FIXED_SEGMENTS_SENTINEL) {
773 // Each scope should always have at least one filter value to check
774 // for.
775 assert(PrevFilter != 0 && "empty filter set!");
776 FixupList &CurScope = TableInfo.FixupStack.back();
777 // Resolve any NumToSkip fixups in the current scope.
778 resolveTableFixups(Table, CurScope, Table.size());
779
780 // Delete the scope we have added here.
781 TableInfo.FixupStack.pop_back();
782
783 PrevFilter = 0; // Don't re-process the filter's fallthrough.
784 } else {
785 // The last filtervalue emitted can be OPC_FilterValue if we are at
786 // outermost scope.
787 const uint8_t DecoderOp =
788 FilterVal == LastFilter && TableInfo.isOutermostScope()
789 ? MCD::OPC_FilterValueOrFail
790 : MCD::OPC_FilterValue;
791 Table.push_back(DecoderOp);
792 Table.insertULEB128(FilterVal);
793 if (DecoderOp == MCD::OPC_FilterValue) {
794 // Reserve space for the NumToSkip entry. We'll backpatch the value
795 // later.
796 PrevFilter = Table.insertNumToSkip();
797 } else {
798 PrevFilter = 0;
799 }
800 }
801
802 // We arrive at a category of instructions with the same segment value.
803 // Now delegate to the sub filter chooser for further decodings.
804 // The case may fallthrough, which happens if the remaining well-known
805 // encoding bits do not match exactly.
806 Delegate->emitTableEntries(TableInfo);
807
808 // Now that we've emitted the body of the handler, update the NumToSkip
809 // of the filter itself to be able to skip forward when false.
810 if (PrevFilter)
811 Table.patchNumToSkip(PrevFilter, Table.size());
812 }
813
814 // If there is no fallthrough and the final filter was not in the outermost
815 // scope, then it must be fixed up according to the enclosing scope rather
816 // than the current position.
817 if (PrevFilter)
818 TableInfo.FixupStack.back().push_back(PrevFilter);
819 }
820
821 // Returns the number of fanout produced by the filter. More fanout implies
822 // the filter distinguishes more categories of instructions.
usefulness() const823 unsigned Filter::usefulness() const {
824 return FilteredInstructions.size() + VariableInstructions.empty();
825 }
826
827 //////////////////////////////////
828 // //
829 // Filterchooser Implementation //
830 // //
831 //////////////////////////////////
832
833 // Emit the decoder state machine table. Returns a mask of MCD decoder ops
834 // that were emitted.
emitTable(formatted_raw_ostream & OS,DecoderTable & Table,unsigned BitWidth,StringRef Namespace,const EncodingIDsVec & EncodingIDs) const835 unsigned DecoderEmitter::emitTable(formatted_raw_ostream &OS,
836 DecoderTable &Table, unsigned BitWidth,
837 StringRef Namespace,
838 const EncodingIDsVec &EncodingIDs) const {
839 // We'll need to be able to map from a decoded opcode into the corresponding
840 // EncodingID for this specific combination of BitWidth and Namespace. This
841 // is used below to index into NumberedEncodings.
842 DenseMap<unsigned, unsigned> OpcodeToEncodingID;
843 OpcodeToEncodingID.reserve(EncodingIDs.size());
844 for (const auto &EI : EncodingIDs)
845 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID;
846
847 OS << "static const uint8_t DecoderTable" << Namespace << BitWidth
848 << "[] = {\n";
849
850 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
851 auto emitULEB128 = [](DecoderTable::const_iterator &I,
852 formatted_raw_ostream &OS) {
853 while (*I >= 128)
854 OS << (unsigned)*I++ << ", ";
855 OS << (unsigned)*I++ << ", ";
856 };
857
858 // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
859 // NumToSkip value.
860 auto emitNumToSkip = [](DecoderTable::const_iterator &I,
861 formatted_raw_ostream &OS) {
862 uint8_t Byte = *I++;
863 uint32_t NumToSkip = Byte;
864 OS << (unsigned)Byte << ", ";
865 Byte = *I++;
866 OS << (unsigned)Byte << ", ";
867 NumToSkip |= Byte << 8;
868 if (getNumToSkipInBytes() == 3) {
869 Byte = *I++;
870 OS << (unsigned)(Byte) << ", ";
871 NumToSkip |= Byte << 16;
872 }
873 return NumToSkip;
874 };
875
876 // FIXME: We may be able to use the NumToSkip values to recover
877 // appropriate indentation levels.
878 DecoderTable::const_iterator I = Table.begin();
879 DecoderTable::const_iterator E = Table.end();
880 const uint8_t *const EndPtr = Table.data() + Table.size();
881
882 auto emitNumToSkipComment = [&](uint32_t NumToSkip, bool InComment = false) {
883 uint32_t Index = ((I - Table.begin()) + NumToSkip);
884 OS << (InComment ? ", " : "// ");
885 OS << "Skip to: " << Index;
886 if (*(I + NumToSkip) == MCD::OPC_Fail)
887 OS << " (Fail)";
888 };
889
890 unsigned OpcodeMask = 0;
891
892 while (I != E) {
893 assert(I < E && "incomplete decode table entry!");
894
895 uint64_t Pos = I - Table.begin();
896 OS << "/* " << Pos << " */";
897 OS.PadToColumn(12);
898
899 const uint8_t DecoderOp = *I++;
900 OpcodeMask |= (1 << DecoderOp);
901 switch (DecoderOp) {
902 default:
903 PrintFatalError("Invalid decode table opcode: " + Twine((int)DecoderOp) +
904 " at index " + Twine(Pos));
905 case MCD::OPC_ExtractField: {
906 OS << " MCD::OPC_ExtractField, ";
907
908 // ULEB128 encoded start value.
909 const char *ErrMsg = nullptr;
910 unsigned Start = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
911 assert(ErrMsg == nullptr && "ULEB128 value too large!");
912 emitULEB128(I, OS);
913
914 unsigned Len = *I++;
915 OS << Len << ", // Inst{";
916 if (Len > 1)
917 OS << (Start + Len - 1) << "-";
918 OS << Start << "} ...\n";
919 break;
920 }
921 case MCD::OPC_FilterValue:
922 case MCD::OPC_FilterValueOrFail: {
923 bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
924 OS << " MCD::OPC_FilterValue" << (IsFail ? "OrFail, " : ", ");
925 // The filter value is ULEB128 encoded.
926 emitULEB128(I, OS);
927
928 if (!IsFail) {
929 uint32_t NumToSkip = emitNumToSkip(I, OS);
930 emitNumToSkipComment(NumToSkip);
931 }
932 OS << '\n';
933 break;
934 }
935 case MCD::OPC_CheckField:
936 case MCD::OPC_CheckFieldOrFail: {
937 bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
938 OS << " MCD::OPC_CheckField" << (IsFail ? "OrFail, " : ", ");
939 // ULEB128 encoded start value.
940 emitULEB128(I, OS);
941 // 8-bit length.
942 unsigned Len = *I++;
943 OS << Len << ", ";
944 // ULEB128 encoded field value.
945 emitULEB128(I, OS);
946
947 if (!IsFail) {
948 uint32_t NumToSkip = emitNumToSkip(I, OS);
949 emitNumToSkipComment(NumToSkip);
950 }
951 OS << '\n';
952 break;
953 }
954 case MCD::OPC_CheckPredicate:
955 case MCD::OPC_CheckPredicateOrFail: {
956 bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
957
958 OS << " MCD::OPC_CheckPredicate" << (IsFail ? "OrFail, " : ", ");
959 emitULEB128(I, OS);
960
961 if (!IsFail) {
962 uint32_t NumToSkip = emitNumToSkip(I, OS);
963 emitNumToSkipComment(NumToSkip);
964 }
965 OS << '\n';
966 break;
967 }
968 case MCD::OPC_Decode:
969 case MCD::OPC_TryDecode:
970 case MCD::OPC_TryDecodeOrFail: {
971 bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
972 bool IsTry = DecoderOp == MCD::OPC_TryDecode || IsFail;
973 // Decode the Opcode value.
974 const char *ErrMsg = nullptr;
975 unsigned Opc = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
976 assert(ErrMsg == nullptr && "ULEB128 value too large!");
977
978 OS << " MCD::OPC_" << (IsTry ? "Try" : "") << "Decode"
979 << (IsFail ? "OrFail, " : ", ");
980 emitULEB128(I, OS);
981
982 // Decoder index.
983 unsigned DecodeIdx = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
984 assert(ErrMsg == nullptr && "ULEB128 value too large!");
985 emitULEB128(I, OS);
986
987 auto EncI = OpcodeToEncodingID.find(Opc);
988 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
989 auto EncodingID = EncI->second;
990
991 if (!IsTry) {
992 OS << "// Opcode: " << NumberedEncodings[EncodingID]
993 << ", DecodeIdx: " << DecodeIdx << '\n';
994 break;
995 }
996
997 // Fallthrough for OPC_TryDecode.
998 if (!IsFail) {
999 uint32_t NumToSkip = emitNumToSkip(I, OS);
1000 OS << "// Opcode: " << NumberedEncodings[EncodingID]
1001 << ", DecodeIdx: " << DecodeIdx;
1002 emitNumToSkipComment(NumToSkip, /*InComment=*/true);
1003 }
1004 OS << '\n';
1005 break;
1006 }
1007 case MCD::OPC_SoftFail: {
1008 OS << " MCD::OPC_SoftFail, ";
1009 // Decode the positive mask.
1010 const char *ErrMsg = nullptr;
1011 uint64_t PositiveMask = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
1012 assert(ErrMsg == nullptr && "ULEB128 value too large!");
1013 emitULEB128(I, OS);
1014
1015 // Decode the negative mask.
1016 uint64_t NegativeMask = decodeULEB128(&*I, nullptr, EndPtr, &ErrMsg);
1017 assert(ErrMsg == nullptr && "ULEB128 value too large!");
1018 emitULEB128(I, OS);
1019 OS << "// +ve mask: 0x";
1020 OS.write_hex(PositiveMask);
1021 OS << ", -ve mask: 0x";
1022 OS.write_hex(NegativeMask);
1023 OS << '\n';
1024 break;
1025 }
1026 case MCD::OPC_Fail:
1027 OS << " MCD::OPC_Fail,\n";
1028 break;
1029 }
1030 }
1031 OS << " 0\n";
1032 OS << "};\n\n";
1033
1034 return OpcodeMask;
1035 }
1036
emitInstrLenTable(formatted_raw_ostream & OS,ArrayRef<unsigned> InstrLen) const1037 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
1038 ArrayRef<unsigned> InstrLen) const {
1039 OS << "static const uint8_t InstrLenTable[] = {\n";
1040 for (unsigned Len : InstrLen)
1041 OS << Len << ",\n";
1042 OS << "};\n\n";
1043 }
1044
emitPredicateFunction(formatted_raw_ostream & OS,PredicateSet & Predicates) const1045 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1046 PredicateSet &Predicates) const {
1047 // The predicate function is just a big switch statement based on the
1048 // input predicate index.
1049 OS << "static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset "
1050 "&Bits) {\n";
1051 OS << " switch (Idx) {\n";
1052 OS << " default: llvm_unreachable(\"Invalid index!\");\n";
1053 for (const auto &[Index, Predicate] : enumerate(Predicates)) {
1054 OS << " case " << Index << ":\n";
1055 OS << " return (" << Predicate << ");\n";
1056 }
1057 OS << " }\n";
1058 OS << "}\n\n";
1059 }
1060
emitDecoderFunction(formatted_raw_ostream & OS,DecoderSet & Decoders) const1061 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1062 DecoderSet &Decoders) const {
1063 // The decoder function is just a big switch statement or a table of function
1064 // pointers based on the input decoder index.
1065
1066 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1067 // It would be better for emitBinaryParser to use a 64-bit tmp whenever
1068 // possible but fall back to an InsnType-sized tmp for truly large fields.
1069 StringRef TmpTypeDecl =
1070 "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
1071 "InsnType, uint64_t>;\n";
1072 StringRef DecodeParams =
1073 "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
1074 "MCDisassembler *Decoder, bool &DecodeComplete";
1075
1076 if (UseFnTableInDecodeToMCInst) {
1077 // Emit a function for each case first.
1078 for (const auto &[Index, Decoder] : enumerate(Decoders)) {
1079 OS << "template <typename InsnType>\n";
1080 OS << "DecodeStatus decodeFn" << Index << "(" << DecodeParams << ") {\n";
1081 OS << " " << TmpTypeDecl;
1082 OS << " [[maybe_unused]] TmpType tmp;\n";
1083 OS << Decoder;
1084 OS << " return S;\n";
1085 OS << "}\n\n";
1086 }
1087 }
1088
1089 OS << "// Handling " << Decoders.size() << " cases.\n";
1090 OS << "template <typename InsnType>\n";
1091 OS << "static DecodeStatus decodeToMCInst(unsigned Idx, " << DecodeParams
1092 << ") {\n";
1093 OS << " DecodeComplete = true;\n";
1094
1095 if (UseFnTableInDecodeToMCInst) {
1096 // Build a table of function pointers.
1097 OS << " using DecodeFnTy = DecodeStatus (*)(" << DecodeParams << ");\n";
1098 OS << " static constexpr DecodeFnTy decodeFnTable[] = {\n";
1099 for (size_t Index : llvm::seq(Decoders.size()))
1100 OS << " decodeFn" << Index << ",\n";
1101 OS << " };\n";
1102 OS << " if (Idx >= " << Decoders.size() << ")\n";
1103 OS << " llvm_unreachable(\"Invalid index!\");\n";
1104 OS << " return decodeFnTable[Idx](S, insn, MI, Address, Decoder, "
1105 "DecodeComplete);\n";
1106 } else {
1107 OS << " " << TmpTypeDecl;
1108 OS << " TmpType tmp;\n";
1109 OS << " switch (Idx) {\n";
1110 OS << " default: llvm_unreachable(\"Invalid index!\");\n";
1111 for (const auto &[Index, Decoder] : enumerate(Decoders)) {
1112 OS << " case " << Index << ":\n";
1113 OS << Decoder;
1114 OS << " return S;\n";
1115 }
1116 OS << " }\n";
1117 }
1118 OS << "}\n";
1119 }
1120
1121 // Populates the field of the insn given the start position and the number of
1122 // consecutive bits to scan for.
1123 //
1124 // Returns a pair of values (indicator, field), where the indicator is false
1125 // if there exists any uninitialized bit value in the range and true if all
1126 // bits are well-known. The second value is the potentially populated field.
fieldFromInsn(const insn_t & Insn,unsigned StartBit,unsigned NumBits) const1127 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1128 unsigned StartBit,
1129 unsigned NumBits) const {
1130 uint64_t Field = 0;
1131
1132 for (unsigned i = 0; i < NumBits; ++i) {
1133 if (Insn[StartBit + i] == BitValue::BIT_UNSET)
1134 return {false, Field};
1135
1136 if (Insn[StartBit + i] == BitValue::BIT_TRUE)
1137 Field = Field | (1ULL << i);
1138 }
1139
1140 return {true, Field};
1141 }
1142
1143 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1144 /// filter array as a series of chars.
dumpFilterArray(raw_ostream & OS,ArrayRef<BitValue> Filter) const1145 void FilterChooser::dumpFilterArray(raw_ostream &OS,
1146 ArrayRef<BitValue> Filter) const {
1147 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--)
1148 OS << Filter[bitIndex - 1];
1149 }
1150
1151 /// dumpStack - dumpStack traverses the filter chooser chain and calls
1152 /// dumpFilterArray on each filter chooser up to the top level one.
dumpStack(raw_ostream & OS,const char * prefix) const1153 void FilterChooser::dumpStack(raw_ostream &OS, const char *prefix) const {
1154 const FilterChooser *current = this;
1155
1156 while (current) {
1157 OS << prefix;
1158 dumpFilterArray(OS, current->FilterBitValues);
1159 OS << '\n';
1160 current = current->Parent;
1161 }
1162 }
1163
1164 // Calculates the island(s) needed to decode the instruction.
1165 // This returns a list of undecoded bits of an instructions, for example,
1166 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1167 // decoded bits in order to verify that the instruction matches the Opcode.
getIslands(std::vector<Island> & Islands,const insn_t & Insn) const1168 unsigned FilterChooser::getIslands(std::vector<Island> &Islands,
1169 const insn_t &Insn) const {
1170 uint64_t FieldVal;
1171 unsigned StartBit;
1172
1173 // 0: Init
1174 // 1: Water (the bit value does not affect decoding)
1175 // 2: Island (well-known bit value needed for decoding)
1176 unsigned State = 0;
1177
1178 for (unsigned i = 0; i < BitWidth; ++i) {
1179 std::optional<uint64_t> Val = Insn[i].getValue();
1180 bool Filtered = PositionFiltered(i);
1181 switch (State) {
1182 default:
1183 llvm_unreachable("Unreachable code!");
1184 case 0:
1185 case 1:
1186 if (Filtered || !Val) {
1187 State = 1; // Still in Water
1188 } else {
1189 State = 2; // Into the Island
1190 StartBit = i;
1191 FieldVal = *Val;
1192 }
1193 break;
1194 case 2:
1195 if (Filtered || !Val) {
1196 State = 1; // Into the Water
1197 Islands.push_back({StartBit, i - StartBit, FieldVal});
1198 } else {
1199 State = 2; // Still in Island
1200 FieldVal |= *Val << (i - StartBit);
1201 }
1202 break;
1203 }
1204 }
1205 // If we are still in Island after the loop, do some housekeeping.
1206 if (State == 2)
1207 Islands.push_back({StartBit, BitWidth - StartBit, FieldVal});
1208
1209 return Islands.size();
1210 }
1211
emitBinaryParser(raw_ostream & OS,indent Indent,const OperandInfo & OpInfo) const1212 bool FilterChooser::emitBinaryParser(raw_ostream &OS, indent Indent,
1213 const OperandInfo &OpInfo) const {
1214 const std::string &Decoder = OpInfo.Decoder;
1215
1216 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0;
1217
1218 if (UseInsertBits) {
1219 OS << Indent << "tmp = 0x";
1220 OS.write_hex(OpInfo.InitValue);
1221 OS << ";\n";
1222 }
1223
1224 for (const EncodingField &EF : OpInfo) {
1225 OS << Indent;
1226 if (UseInsertBits)
1227 OS << "insertBits(tmp, ";
1228 else
1229 OS << "tmp = ";
1230 OS << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')';
1231 if (UseInsertBits)
1232 OS << ", " << EF.Offset << ", " << EF.Width << ')';
1233 else if (EF.Offset != 0)
1234 OS << " << " << EF.Offset;
1235 OS << ";\n";
1236 }
1237
1238 bool OpHasCompleteDecoder;
1239 if (!Decoder.empty()) {
1240 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1241 OS << Indent << "if (!Check(S, " << Decoder
1242 << "(MI, tmp, Address, Decoder))) { "
1243 << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
1244 << "return MCDisassembler::Fail; }\n";
1245 } else {
1246 OpHasCompleteDecoder = true;
1247 OS << Indent << "MI.addOperand(MCOperand::createImm(tmp));\n";
1248 }
1249 return OpHasCompleteDecoder;
1250 }
1251
emitDecoder(raw_ostream & OS,indent Indent,unsigned Opc) const1252 bool FilterChooser::emitDecoder(raw_ostream &OS, indent Indent,
1253 unsigned Opc) const {
1254 bool HasCompleteDecoder = true;
1255
1256 for (const auto &Op : Operands.find(Opc)->second) {
1257 // If a custom instruction decoder was specified, use that.
1258 if (Op.numFields() == 0 && !Op.Decoder.empty()) {
1259 HasCompleteDecoder = Op.HasCompleteDecoder;
1260 OS << Indent << "if (!Check(S, " << Op.Decoder
1261 << "(MI, insn, Address, Decoder))) { "
1262 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1263 << "return MCDisassembler::Fail; }\n";
1264 break;
1265 }
1266
1267 HasCompleteDecoder &= emitBinaryParser(OS, Indent, Op);
1268 }
1269 return HasCompleteDecoder;
1270 }
1271
getDecoderIndex(DecoderSet & Decoders,unsigned Opc) const1272 std::pair<unsigned, bool> FilterChooser::getDecoderIndex(DecoderSet &Decoders,
1273 unsigned Opc) const {
1274 // Build up the predicate string.
1275 SmallString<256> Decoder;
1276 // FIXME: emitDecoder() function can take a buffer directly rather than
1277 // a stream.
1278 raw_svector_ostream S(Decoder);
1279 indent Indent(UseFnTableInDecodeToMCInst ? 2 : 4);
1280 bool HasCompleteDecoder = emitDecoder(S, Indent, Opc);
1281
1282 // Using the full decoder string as the key value here is a bit
1283 // heavyweight, but is effective. If the string comparisons become a
1284 // performance concern, we can implement a mangling of the predicate
1285 // data easily enough with a map back to the actual string. That's
1286 // overkill for now, though.
1287
1288 // Make sure the predicate is in the table.
1289 Decoders.insert(CachedHashString(Decoder));
1290 // Now figure out the index for when we write out the table.
1291 DecoderSet::const_iterator P = find(Decoders, Decoder.str());
1292 return {(unsigned)(P - Decoders.begin()), HasCompleteDecoder};
1293 }
1294
1295 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||.
emitPredicateMatchAux(const Init & Val,bool ParenIfBinOp,raw_ostream & OS) const1296 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1297 raw_ostream &OS) const {
1298 if (const auto *D = dyn_cast<DefInit>(&Val)) {
1299 if (!D->getDef()->isSubClassOf("SubtargetFeature"))
1300 return true;
1301 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1302 << "]";
1303 return false;
1304 }
1305 if (const auto *D = dyn_cast<DagInit>(&Val)) {
1306 std::string Op = D->getOperator()->getAsString();
1307 if (Op == "not" && D->getNumArgs() == 1) {
1308 OS << '!';
1309 return emitPredicateMatchAux(*D->getArg(0), true, OS);
1310 }
1311 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
1312 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
1313 if (Paren)
1314 OS << '(';
1315 ListSeparator LS(Op == "any_of" ? " || " : " && ");
1316 for (auto *Arg : D->getArgs()) {
1317 OS << LS;
1318 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS))
1319 return true;
1320 }
1321 if (Paren)
1322 OS << ')';
1323 return false;
1324 }
1325 }
1326 return true;
1327 }
1328
emitPredicateMatch(raw_ostream & OS,unsigned Opc) const1329 bool FilterChooser::emitPredicateMatch(raw_ostream &OS, unsigned Opc) const {
1330 const ListInit *Predicates =
1331 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1332 bool IsFirstEmission = true;
1333 for (unsigned i = 0; i < Predicates->size(); ++i) {
1334 const Record *Pred = Predicates->getElementAsRecord(i);
1335 if (!Pred->getValue("AssemblerMatcherPredicate"))
1336 continue;
1337
1338 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1339 continue;
1340
1341 if (!IsFirstEmission)
1342 OS << " && ";
1343 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"),
1344 Predicates->size() > 1, OS))
1345 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!");
1346 IsFirstEmission = false;
1347 }
1348 return !Predicates->empty();
1349 }
1350
doesOpcodeNeedPredicate(unsigned Opc) const1351 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1352 const ListInit *Predicates =
1353 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates");
1354 for (unsigned i = 0; i < Predicates->size(); ++i) {
1355 const Record *Pred = Predicates->getElementAsRecord(i);
1356 if (!Pred->getValue("AssemblerMatcherPredicate"))
1357 continue;
1358
1359 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue()))
1360 return true;
1361 }
1362 return false;
1363 }
1364
getPredicateIndex(DecoderTableInfo & TableInfo,StringRef Predicate) const1365 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1366 StringRef Predicate) const {
1367 // Using the full predicate string as the key value here is a bit
1368 // heavyweight, but is effective. If the string comparisons become a
1369 // performance concern, we can implement a mangling of the predicate
1370 // data easily enough with a map back to the actual string. That's
1371 // overkill for now, though.
1372
1373 // Make sure the predicate is in the table.
1374 TableInfo.Predicates.insert(CachedHashString(Predicate));
1375 // Now figure out the index for when we write out the table.
1376 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
1377 return (unsigned)(P - TableInfo.Predicates.begin());
1378 }
1379
emitPredicateTableEntry(DecoderTableInfo & TableInfo,unsigned Opc) const1380 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1381 unsigned Opc) const {
1382 if (!doesOpcodeNeedPredicate(Opc))
1383 return;
1384
1385 // Build up the predicate string.
1386 SmallString<256> Predicate;
1387 // FIXME: emitPredicateMatch() functions can take a buffer directly rather
1388 // than a stream.
1389 raw_svector_ostream PS(Predicate);
1390 emitPredicateMatch(PS, Opc);
1391
1392 // Figure out the index into the predicate table for the predicate just
1393 // computed.
1394 unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
1395
1396 const uint8_t DecoderOp = TableInfo.isOutermostScope()
1397 ? MCD::OPC_CheckPredicateOrFail
1398 : MCD::OPC_CheckPredicate;
1399 TableInfo.Table.push_back(DecoderOp);
1400 TableInfo.Table.insertULEB128(PIdx);
1401
1402 if (DecoderOp == MCD::OPC_CheckPredicate) {
1403 // Push location for NumToSkip backpatching.
1404 TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1405 }
1406 }
1407
emitSoftFailTableEntry(DecoderTableInfo & TableInfo,unsigned Opc) const1408 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1409 unsigned Opc) const {
1410 const Record *EncodingDef = AllInstructions[Opc].EncodingDef;
1411 const RecordVal *RV = EncodingDef->getValue("SoftFail");
1412 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr;
1413
1414 if (!SFBits)
1415 return;
1416 const BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst");
1417
1418 APInt PositiveMask(BitWidth, 0ULL);
1419 APInt NegativeMask(BitWidth, 0ULL);
1420 for (unsigned i = 0; i < BitWidth; ++i) {
1421 BitValue B(*SFBits, i);
1422 BitValue IB(*InstBits, i);
1423
1424 if (B != BitValue::BIT_TRUE)
1425 continue;
1426
1427 if (IB == BitValue::BIT_FALSE) {
1428 // The bit is meant to be false, so emit a check to see if it is true.
1429 PositiveMask.setBit(i);
1430 } else if (IB == BitValue::BIT_TRUE) {
1431 // The bit is meant to be true, so emit a check to see if it is false.
1432 NegativeMask.setBit(i);
1433 } else {
1434 // The bit is not set; this must be an error!
1435 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1436 << AllInstructions[Opc] << " is set but Inst{" << i
1437 << "} is unset!\n"
1438 << " - You can only mark a bit as SoftFail if it is fully defined"
1439 << " (1/0 - not '?') in Inst\n";
1440 return;
1441 }
1442 }
1443
1444 bool NeedPositiveMask = PositiveMask.getBoolValue();
1445 bool NeedNegativeMask = NegativeMask.getBoolValue();
1446
1447 if (!NeedPositiveMask && !NeedNegativeMask)
1448 return;
1449
1450 TableInfo.Table.push_back(MCD::OPC_SoftFail);
1451 TableInfo.Table.insertULEB128(PositiveMask.getZExtValue());
1452 TableInfo.Table.insertULEB128(NegativeMask.getZExtValue());
1453 }
1454
1455 // Emits table entries to decode the singleton.
emitSingletonTableEntry(DecoderTableInfo & TableInfo,EncodingIDAndOpcode Opc) const1456 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1457 EncodingIDAndOpcode Opc) const {
1458 std::vector<Island> Islands;
1459 insn_t Insn;
1460 insnWithID(Insn, Opc.EncodingID);
1461
1462 // Look for islands of undecoded bits of the singleton.
1463 getIslands(Islands, Insn);
1464
1465 // Emit the predicate table entry if one is needed.
1466 emitPredicateTableEntry(TableInfo, Opc.EncodingID);
1467
1468 // Check any additional encoding fields needed.
1469 for (const Island &Ilnd : reverse(Islands)) {
1470 unsigned NumBits = Ilnd.NumBits;
1471 assert(isUInt<8>(NumBits) && "NumBits overflowed uint8 table entry!");
1472 const uint8_t DecoderOp = TableInfo.isOutermostScope()
1473 ? MCD::OPC_CheckFieldOrFail
1474 : MCD::OPC_CheckField;
1475 TableInfo.Table.push_back(DecoderOp);
1476
1477 TableInfo.Table.insertULEB128(Ilnd.StartBit);
1478 TableInfo.Table.push_back(NumBits);
1479 TableInfo.Table.insertULEB128(Ilnd.FieldVal);
1480
1481 if (DecoderOp == MCD::OPC_CheckField) {
1482 // Allocate space in the table for fixup so all our relative position
1483 // calculations work OK even before we fully resolve the real value here.
1484
1485 // Push location for NumToSkip backpatching.
1486 TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1487 }
1488 }
1489
1490 // Check for soft failure of the match.
1491 emitSoftFailTableEntry(TableInfo, Opc.EncodingID);
1492
1493 auto [DIdx, HasCompleteDecoder] =
1494 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID);
1495
1496 // Produce OPC_Decode or OPC_TryDecode opcode based on the information
1497 // whether the instruction decoder is complete or not. If it is complete
1498 // then it handles all possible values of remaining variable/unfiltered bits
1499 // and for any value can determine if the bitpattern is a valid instruction
1500 // or not. This means OPC_Decode will be the final step in the decoding
1501 // process. If it is not complete, then the Fail return code from the
1502 // decoder method indicates that additional processing should be done to see
1503 // if there is any other instruction that also matches the bitpattern and
1504 // can decode it.
1505 const uint8_t DecoderOp = HasCompleteDecoder ? MCD::OPC_Decode
1506 : (TableInfo.isOutermostScope()
1507 ? MCD::OPC_TryDecodeOrFail
1508 : MCD::OPC_TryDecode);
1509 TableInfo.Table.push_back(DecoderOp);
1510 NumEncodingsSupported++;
1511 TableInfo.Table.insertULEB128(Opc.Opcode);
1512 TableInfo.Table.insertULEB128(DIdx);
1513
1514 if (DecoderOp == MCD::OPC_TryDecode) {
1515 // Push location for NumToSkip backpatching.
1516 TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
1517 }
1518 }
1519
1520 // Emits table entries to decode the singleton, and then to decode the rest.
emitSingletonTableEntry(DecoderTableInfo & TableInfo,const Filter & Best) const1521 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1522 const Filter &Best) const {
1523 EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1524
1525 // complex singletons need predicate checks from the first singleton
1526 // to refer forward to the variable filterchooser that follows.
1527 TableInfo.FixupStack.emplace_back();
1528
1529 emitSingletonTableEntry(TableInfo, Opc);
1530
1531 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
1532 TableInfo.Table.size());
1533 TableInfo.FixupStack.pop_back();
1534
1535 Best.getVariableFC().emitTableEntries(TableInfo);
1536 }
1537
1538 // Assign a single filter and run with it. Top level API client can initialize
1539 // with a single filter to start the filtering process.
runSingleFilter(unsigned startBit,unsigned numBit,bool mixed)1540 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1541 bool mixed) {
1542 Filters.clear();
1543 Filters.emplace_back(*this, startBit, numBit, true);
1544 BestIndex = 0; // Sole Filter instance to choose from.
1545 bestFilter().recurse();
1546 }
1547
1548 // reportRegion is a helper function for filterProcessor to mark a region as
1549 // eligible for use as a filter region.
reportRegion(bitAttr_t RA,unsigned StartBit,unsigned BitIndex,bool AllowMixed)1550 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1551 unsigned BitIndex, bool AllowMixed) {
1552 if (RA == ATTR_MIXED && AllowMixed)
1553 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true);
1554 else if (RA == ATTR_ALL_SET && !AllowMixed)
1555 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false);
1556 }
1557
1558 // FilterProcessor scans the well-known encoding bits of the instructions and
1559 // builds up a list of candidate filters. It chooses the best filter and
1560 // recursively descends down the decoding tree.
filterProcessor(bool AllowMixed,bool Greedy)1561 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1562 Filters.clear();
1563 BestIndex = -1;
1564 unsigned numInstructions = Opcodes.size();
1565
1566 assert(numInstructions && "Filter created with no instructions");
1567
1568 // No further filtering is necessary.
1569 if (numInstructions == 1)
1570 return true;
1571
1572 // Heuristics. See also doFilter()'s "Heuristics" comment when num of
1573 // instructions is 3.
1574 if (AllowMixed && !Greedy) {
1575 assert(numInstructions == 3);
1576
1577 for (const auto &Opcode : Opcodes) {
1578 std::vector<Island> Islands;
1579 insn_t Insn;
1580
1581 insnWithID(Insn, Opcode.EncodingID);
1582
1583 // Look for islands of undecoded bits of any instruction.
1584 if (getIslands(Islands, Insn) > 0) {
1585 // Found an instruction with island(s). Now just assign a filter.
1586 runSingleFilter(Islands[0].StartBit, Islands[0].NumBits, true);
1587 return true;
1588 }
1589 }
1590 }
1591
1592 unsigned BitIndex;
1593
1594 // We maintain BIT_WIDTH copies of the bitAttrs automaton.
1595 // The automaton consumes the corresponding bit from each
1596 // instruction.
1597 //
1598 // Input symbols: 0, 1, and _ (unset).
1599 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1600 // Initial state: NONE.
1601 //
1602 // (NONE) ------- [01] -> (ALL_SET)
1603 // (NONE) ------- _ ----> (ALL_UNSET)
1604 // (ALL_SET) ---- [01] -> (ALL_SET)
1605 // (ALL_SET) ---- _ ----> (MIXED)
1606 // (ALL_UNSET) -- [01] -> (MIXED)
1607 // (ALL_UNSET) -- _ ----> (ALL_UNSET)
1608 // (MIXED) ------ . ----> (MIXED)
1609 // (FILTERED)---- . ----> (FILTERED)
1610
1611 std::vector<bitAttr_t> bitAttrs(BitWidth, ATTR_NONE);
1612
1613 // FILTERED bit positions provide no entropy and are not worthy of pursuing.
1614 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1615 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex)
1616 if (FilterBitValues[BitIndex].isSet())
1617 bitAttrs[BitIndex] = ATTR_FILTERED;
1618
1619 for (const auto &OpcPair : Opcodes) {
1620 insn_t insn;
1621
1622 insnWithID(insn, OpcPair.EncodingID);
1623
1624 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1625 switch (bitAttrs[BitIndex]) {
1626 case ATTR_NONE:
1627 if (insn[BitIndex] == BitValue::BIT_UNSET)
1628 bitAttrs[BitIndex] = ATTR_ALL_UNSET;
1629 else
1630 bitAttrs[BitIndex] = ATTR_ALL_SET;
1631 break;
1632 case ATTR_ALL_SET:
1633 if (insn[BitIndex] == BitValue::BIT_UNSET)
1634 bitAttrs[BitIndex] = ATTR_MIXED;
1635 break;
1636 case ATTR_ALL_UNSET:
1637 if (insn[BitIndex] != BitValue::BIT_UNSET)
1638 bitAttrs[BitIndex] = ATTR_MIXED;
1639 break;
1640 case ATTR_MIXED:
1641 case ATTR_FILTERED:
1642 break;
1643 }
1644 }
1645 }
1646
1647 // The regionAttr automaton consumes the bitAttrs automatons' state,
1648 // lowest-to-highest.
1649 //
1650 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1651 // States: NONE, ALL_SET, MIXED
1652 // Initial state: NONE
1653 //
1654 // (NONE) ----- F --> (NONE)
1655 // (NONE) ----- S --> (ALL_SET) ; and set region start
1656 // (NONE) ----- U --> (NONE)
1657 // (NONE) ----- M --> (MIXED) ; and set region start
1658 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
1659 // (ALL_SET) -- S --> (ALL_SET)
1660 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
1661 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
1662 // (MIXED) ---- F --> (NONE) ; and report a MIXED region
1663 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
1664 // (MIXED) ---- U --> (NONE) ; and report a MIXED region
1665 // (MIXED) ---- M --> (MIXED)
1666
1667 bitAttr_t RA = ATTR_NONE;
1668 unsigned StartBit = 0;
1669
1670 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
1671 bitAttr_t bitAttr = bitAttrs[BitIndex];
1672
1673 assert(bitAttr != ATTR_NONE && "Bit without attributes");
1674
1675 switch (RA) {
1676 case ATTR_NONE:
1677 switch (bitAttr) {
1678 case ATTR_FILTERED:
1679 break;
1680 case ATTR_ALL_SET:
1681 StartBit = BitIndex;
1682 RA = ATTR_ALL_SET;
1683 break;
1684 case ATTR_ALL_UNSET:
1685 break;
1686 case ATTR_MIXED:
1687 StartBit = BitIndex;
1688 RA = ATTR_MIXED;
1689 break;
1690 default:
1691 llvm_unreachable("Unexpected bitAttr!");
1692 }
1693 break;
1694 case ATTR_ALL_SET:
1695 switch (bitAttr) {
1696 case ATTR_FILTERED:
1697 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1698 RA = ATTR_NONE;
1699 break;
1700 case ATTR_ALL_SET:
1701 break;
1702 case ATTR_ALL_UNSET:
1703 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1704 RA = ATTR_NONE;
1705 break;
1706 case ATTR_MIXED:
1707 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1708 StartBit = BitIndex;
1709 RA = ATTR_MIXED;
1710 break;
1711 default:
1712 llvm_unreachable("Unexpected bitAttr!");
1713 }
1714 break;
1715 case ATTR_MIXED:
1716 switch (bitAttr) {
1717 case ATTR_FILTERED:
1718 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1719 StartBit = BitIndex;
1720 RA = ATTR_NONE;
1721 break;
1722 case ATTR_ALL_SET:
1723 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724 StartBit = BitIndex;
1725 RA = ATTR_ALL_SET;
1726 break;
1727 case ATTR_ALL_UNSET:
1728 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1729 RA = ATTR_NONE;
1730 break;
1731 case ATTR_MIXED:
1732 break;
1733 default:
1734 llvm_unreachable("Unexpected bitAttr!");
1735 }
1736 break;
1737 case ATTR_ALL_UNSET:
1738 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1739 case ATTR_FILTERED:
1740 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1741 }
1742 }
1743
1744 // At the end, if we're still in ALL_SET or MIXED states, report a region
1745 switch (RA) {
1746 case ATTR_NONE:
1747 break;
1748 case ATTR_FILTERED:
1749 break;
1750 case ATTR_ALL_SET:
1751 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1752 break;
1753 case ATTR_ALL_UNSET:
1754 break;
1755 case ATTR_MIXED:
1756 reportRegion(RA, StartBit, BitIndex, AllowMixed);
1757 break;
1758 }
1759
1760 // We have finished with the filter processings. Now it's time to choose
1761 // the best performing filter.
1762 BestIndex = 0;
1763 bool AllUseless = true;
1764 unsigned BestScore = 0;
1765
1766 for (const auto &[Idx, Filter] : enumerate(Filters)) {
1767 unsigned Usefulness = Filter.usefulness();
1768
1769 if (Usefulness)
1770 AllUseless = false;
1771
1772 if (Usefulness > BestScore) {
1773 BestIndex = Idx;
1774 BestScore = Usefulness;
1775 }
1776 }
1777
1778 if (!AllUseless)
1779 bestFilter().recurse();
1780
1781 return !AllUseless;
1782 } // end of FilterChooser::filterProcessor(bool)
1783
1784 // Decides on the best configuration of filter(s) to use in order to decode
1785 // the instructions. A conflict of instructions may occur, in which case we
1786 // dump the conflict set to the standard error.
doFilter()1787 void FilterChooser::doFilter() {
1788 unsigned Num = Opcodes.size();
1789 assert(Num && "FilterChooser created with no instructions");
1790
1791 // Try regions of consecutive known bit values first.
1792 if (filterProcessor(false))
1793 return;
1794
1795 // Then regions of mixed bits (both known and unitialized bit values allowed).
1796 if (filterProcessor(true))
1797 return;
1798
1799 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1800 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1801 // well-known encoding pattern. In such case, we backtrack and scan for the
1802 // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1803 if (Num == 3 && filterProcessor(true, false))
1804 return;
1805
1806 // If we come to here, the instruction decoding has failed.
1807 // Set the BestIndex to -1 to indicate so.
1808 BestIndex = -1;
1809 }
1810
1811 // emitTableEntries - Emit state machine entries to decode our share of
1812 // instructions.
emitTableEntries(DecoderTableInfo & TableInfo) const1813 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1814 if (Opcodes.size() == 1) {
1815 // There is only one instruction in the set, which is great!
1816 // Call emitSingletonDecoder() to see whether there are any remaining
1817 // encodings bits.
1818 emitSingletonTableEntry(TableInfo, Opcodes[0]);
1819 return;
1820 }
1821
1822 // Choose the best filter to do the decodings!
1823 if (BestIndex != -1) {
1824 const Filter &Best = Filters[BestIndex];
1825 if (Best.getNumFiltered() == 1)
1826 emitSingletonTableEntry(TableInfo, Best);
1827 else
1828 Best.emitTableEntry(TableInfo);
1829 return;
1830 }
1831
1832 // We don't know how to decode these instructions! Dump the
1833 // conflict set and bail.
1834
1835 // Print out useful conflict information for postmortem analysis.
1836 errs() << "Decoding Conflict:\n";
1837
1838 dumpStack(errs(), "\t\t");
1839
1840 for (auto Opcode : Opcodes) {
1841 const EncodingAndInst &Enc = AllInstructions[Opcode.EncodingID];
1842 errs() << '\t' << Enc << ' ';
1843 dumpBits(errs(), getBitsField(*Enc.EncodingDef, "Inst"));
1844 errs() << '\n';
1845 }
1846 PrintFatalError("Decoding conflict encountered");
1847 }
1848
findOperandDecoderMethod(const Record * Record)1849 static std::string findOperandDecoderMethod(const Record *Record) {
1850 std::string Decoder;
1851
1852 const RecordVal *DecoderString = Record->getValue("DecoderMethod");
1853 const StringInit *String =
1854 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
1855 if (String) {
1856 Decoder = String->getValue().str();
1857 if (!Decoder.empty())
1858 return Decoder;
1859 }
1860
1861 if (Record->isSubClassOf("RegisterOperand"))
1862 // Allows use of a DecoderMethod in referenced RegisterClass if set.
1863 return findOperandDecoderMethod(Record->getValueAsDef("RegClass"));
1864
1865 if (Record->isSubClassOf("RegisterClass")) {
1866 Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1867 } else if (Record->isSubClassOf("PointerLikeRegClass")) {
1868 Decoder = "DecodePointerLikeRegClass" +
1869 utostr(Record->getValueAsInt("RegClassKind"));
1870 }
1871
1872 return Decoder;
1873 }
1874
getOpInfo(const Record * TypeRecord)1875 OperandInfo getOpInfo(const Record *TypeRecord) {
1876 const RecordVal *HasCompleteDecoderVal =
1877 TypeRecord->getValue("hasCompleteDecoder");
1878 const BitInit *HasCompleteDecoderBit =
1879 HasCompleteDecoderVal
1880 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
1881 : nullptr;
1882 bool HasCompleteDecoder =
1883 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1884
1885 return OperandInfo(findOperandDecoderMethod(TypeRecord), HasCompleteDecoder);
1886 }
1887
parseVarLenInstOperand(const Record & Def,std::vector<OperandInfo> & Operands,const CodeGenInstruction & CGI)1888 static void parseVarLenInstOperand(const Record &Def,
1889 std::vector<OperandInfo> &Operands,
1890 const CodeGenInstruction &CGI) {
1891
1892 const RecordVal *RV = Def.getValue("Inst");
1893 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV);
1894 SmallVector<int> TiedTo;
1895
1896 for (const auto &[Idx, Op] : enumerate(CGI.Operands)) {
1897 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0)
1898 for (auto *Arg : Op.MIOperandInfo->getArgs())
1899 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef()));
1900 else
1901 Operands.push_back(getOpInfo(Op.Rec));
1902
1903 int TiedReg = Op.getTiedRegister();
1904 TiedTo.push_back(-1);
1905 if (TiedReg != -1) {
1906 TiedTo[Idx] = TiedReg;
1907 TiedTo[TiedReg] = Idx;
1908 }
1909 }
1910
1911 unsigned CurrBitPos = 0;
1912 for (const auto &EncodingSegment : VLI) {
1913 unsigned Offset = 0;
1914 StringRef OpName;
1915
1916 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) {
1917 OpName = SI->getValue();
1918 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) {
1919 OpName = cast<StringInit>(DI->getArg(0))->getValue();
1920 Offset = cast<IntInit>(DI->getArg(2))->getValue();
1921 }
1922
1923 if (!OpName.empty()) {
1924 auto OpSubOpPair =
1925 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1926 OpName);
1927 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair);
1928 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1929 if (!EncodingSegment.CustomDecoder.empty())
1930 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1931
1932 int TiedReg = TiedTo[OpSubOpPair.first];
1933 if (TiedReg != -1) {
1934 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1935 {TiedReg, OpSubOpPair.second});
1936 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset);
1937 }
1938 }
1939
1940 CurrBitPos += EncodingSegment.BitWidth;
1941 }
1942 }
1943
debugDumpRecord(const Record & Rec)1944 static void debugDumpRecord(const Record &Rec) {
1945 // Dump the record, so we can see what's going on.
1946 PrintNote([&Rec](raw_ostream &OS) {
1947 OS << "Dumping record for previous error:\n";
1948 OS << Rec;
1949 });
1950 }
1951
1952 /// For an operand field named OpName: populate OpInfo.InitValue with the
1953 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1954 /// insert from the decoded instruction.
addOneOperandFields(const Record & EncodingDef,const BitsInit & Bits,std::map<StringRef,StringRef> & TiedNames,StringRef OpName,OperandInfo & OpInfo)1955 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1956 std::map<StringRef, StringRef> &TiedNames,
1957 StringRef OpName, OperandInfo &OpInfo) {
1958 // Some bits of the operand may be required to be 1 depending on the
1959 // instruction's encoding. Collect those bits.
1960 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName))
1961 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue()))
1962 for (unsigned I = 0; I < OpBits->getNumBits(); ++I)
1963 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I)))
1964 if (OpBit->getValue())
1965 OpInfo.InitValue |= 1ULL << I;
1966
1967 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) {
1968 const VarInit *Var;
1969 unsigned Offset = 0;
1970 for (; J != Bits.getNumBits(); ++J) {
1971 const VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J));
1972 if (BJ) {
1973 Var = dyn_cast<VarInit>(BJ->getBitVar());
1974 if (I == J)
1975 Offset = BJ->getBitNum();
1976 else if (BJ->getBitNum() != Offset + J - I)
1977 break;
1978 } else {
1979 Var = dyn_cast<VarInit>(Bits.getBit(J));
1980 }
1981 if (!Var ||
1982 (Var->getName() != OpName && Var->getName() != TiedNames[OpName]))
1983 break;
1984 }
1985 if (I == J)
1986 ++J;
1987 else
1988 OpInfo.addField(I, J - I, Offset);
1989 }
1990 }
1991
1992 static unsigned
populateInstruction(const CodeGenTarget & Target,const Record & EncodingDef,const CodeGenInstruction & CGI,unsigned Opc,std::map<unsigned,std::vector<OperandInfo>> & Operands,bool IsVarLenInst)1993 populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
1994 const CodeGenInstruction &CGI, unsigned Opc,
1995 std::map<unsigned, std::vector<OperandInfo>> &Operands,
1996 bool IsVarLenInst) {
1997 const Record &Def = *CGI.TheDef;
1998 // If all the bit positions are not specified; do not decode this instruction.
1999 // We are bound to fail! For proper disassembly, the well-known encoding bits
2000 // of the instruction must be fully specified.
2001
2002 const BitsInit &Bits = getBitsField(EncodingDef, "Inst");
2003 if (Bits.allInComplete())
2004 return 0;
2005
2006 std::vector<OperandInfo> InsnOperands;
2007
2008 // If the instruction has specified a custom decoding hook, use that instead
2009 // of trying to auto-generate the decoder.
2010 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod");
2011 if (!InstDecoder.empty()) {
2012 bool HasCompleteInstDecoder =
2013 EncodingDef.getValueAsBit("hasCompleteDecoder");
2014 InsnOperands.push_back(
2015 OperandInfo(InstDecoder.str(), HasCompleteInstDecoder));
2016 Operands[Opc] = std::move(InsnOperands);
2017 return Bits.getNumBits();
2018 }
2019
2020 // Generate a description of the operand of the instruction that we know
2021 // how to decode automatically.
2022 // FIXME: We'll need to have a way to manually override this as needed.
2023
2024 // Gather the outputs/inputs of the instruction, so we can find their
2025 // positions in the encoding. This assumes for now that they appear in the
2026 // MCInst in the order that they're listed.
2027 std::vector<std::pair<const Init *, StringRef>> InOutOperands;
2028 const DagInit *Out = Def.getValueAsDag("OutOperandList");
2029 const DagInit *In = Def.getValueAsDag("InOperandList");
2030 for (const auto &[Idx, Arg] : enumerate(Out->getArgs()))
2031 InOutOperands.emplace_back(Arg, Out->getArgNameStr(Idx));
2032 for (const auto &[Idx, Arg] : enumerate(In->getArgs()))
2033 InOutOperands.emplace_back(Arg, In->getArgNameStr(Idx));
2034
2035 // Search for tied operands, so that we can correctly instantiate
2036 // operands that are not explicitly represented in the encoding.
2037 std::map<StringRef, StringRef> TiedNames;
2038 for (const auto &Op : CGI.Operands) {
2039 for (const auto &[J, CI] : enumerate(Op.Constraints)) {
2040 if (!CI.isTied())
2041 continue;
2042 std::pair<unsigned, unsigned> SO =
2043 CGI.Operands.getSubOperandNumber(CI.getTiedOperand());
2044 StringRef TiedName = CGI.Operands[SO.first].SubOpNames[SO.second];
2045 if (TiedName.empty())
2046 TiedName = CGI.Operands[SO.first].Name;
2047 StringRef MyName = Op.SubOpNames[J];
2048 if (MyName.empty())
2049 MyName = Op.Name;
2050
2051 TiedNames[MyName] = TiedName;
2052 TiedNames[TiedName] = MyName;
2053 }
2054 }
2055
2056 if (IsVarLenInst) {
2057 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
2058 } else {
2059 // For each operand, see if we can figure out where it is encoded.
2060 for (const auto &Op : InOutOperands) {
2061 const Init *OpInit = Op.first;
2062 StringRef OpName = Op.second;
2063
2064 // We're ready to find the instruction encoding locations for this
2065 // operand.
2066
2067 // First, find the operand type ("OpInit"), and sub-op names
2068 // ("SubArgDag") if present.
2069 const DagInit *SubArgDag = dyn_cast<DagInit>(OpInit);
2070 if (SubArgDag)
2071 OpInit = SubArgDag->getOperator();
2072 const Record *OpTypeRec = cast<DefInit>(OpInit)->getDef();
2073 // Lookup the sub-operands from the operand type record (note that only
2074 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2075 const DagInit *SubOps = OpTypeRec->isSubClassOf("Operand")
2076 ? OpTypeRec->getValueAsDag("MIOperandInfo")
2077 : nullptr;
2078
2079 // Lookup the decoder method and construct a new OperandInfo to hold our
2080 // result.
2081 OperandInfo OpInfo = getOpInfo(OpTypeRec);
2082
2083 // If we have named sub-operands...
2084 if (SubArgDag) {
2085 // Then there should not be a custom decoder specified on the top-level
2086 // type.
2087 if (!OpInfo.Decoder.empty()) {
2088 PrintError(EncodingDef.getLoc(),
2089 "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2090 OpInit->getAsString() +
2091 "\" with a custom DecoderMethod, but also named "
2092 "sub-operands.");
2093 continue;
2094 }
2095
2096 // Decode each of the sub-ops separately.
2097 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2098 for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) {
2099 StringRef SubOpName = SubArgDag->getArgNameStr(I);
2100 OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef());
2101
2102 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName,
2103 SubOpInfo);
2104 InsnOperands.push_back(std::move(SubOpInfo));
2105 }
2106 continue;
2107 }
2108
2109 // Otherwise, if we have an operand with sub-operands, but they aren't
2110 // named...
2111 if (SubOps && OpInfo.Decoder.empty()) {
2112 // If it's a single sub-operand, and no custom decoder, use the decoder
2113 // from the one sub-operand.
2114 if (SubOps->getNumArgs() == 1)
2115 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef());
2116
2117 // If we have multiple sub-ops, there'd better have a custom
2118 // decoder. (Otherwise we don't know how to populate them properly...)
2119 if (SubOps->getNumArgs() > 1) {
2120 PrintError(EncodingDef.getLoc(),
2121 "DecoderEmitter: operand \"" + OpName +
2122 "\" uses MIOperandInfo with multiple ops, but doesn't "
2123 "have a custom decoder!");
2124 debugDumpRecord(EncodingDef);
2125 continue;
2126 }
2127 }
2128
2129 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2130 // FIXME: it should be an error not to find a definition for a given
2131 // operand, rather than just failing to add it to the resulting
2132 // instruction! (This is a longstanding bug, which will be addressed in an
2133 // upcoming change.)
2134 if (OpInfo.numFields() > 0)
2135 InsnOperands.push_back(std::move(OpInfo));
2136 }
2137 }
2138 Operands[Opc] = std::move(InsnOperands);
2139
2140 #if 0
2141 LLVM_DEBUG({
2142 // Dumps the instruction encoding bits.
2143 dumpBits(errs(), Bits);
2144
2145 errs() << '\n';
2146
2147 // Dumps the list of operand info.
2148 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
2149 const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2150 const std::string &OperandName = Info.Name;
2151 const Record &OperandDef = *Info.Rec;
2152
2153 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2154 }
2155 });
2156 #endif
2157
2158 return Bits.getNumBits();
2159 }
2160
2161 // emitFieldFromInstruction - Emit the templated helper function
2162 // fieldFromInstruction().
2163 // On Windows we make sure that this function is not inlined when
2164 // using the VS compiler. It has a bug which causes the function
2165 // to be optimized out in some circumstances. See llvm.org/pr38292
emitFieldFromInstruction(formatted_raw_ostream & OS)2166 static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2167 OS << R"(
2168 // Helper functions for extracting fields from encoded instructions.
2169 // InsnType must either be integral or an APInt-like object that must:
2170 // * be default-constructible and copy-constructible
2171 // * Support extractBitsAsZExtValue(numBits, startBit)
2172 // * Support the ~, &, ==, and != operators with other objects of the same type
2173 // * Support the != and bitwise & with uint64_t
2174 template <typename InsnType>
2175 #if defined(_MSC_VER) && !defined(__clang__)
2176 __declspec(noinline)
2177 #endif
2178 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2179 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2180 unsigned numBits) {
2181 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2182 assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2183 "Instruction field out of bounds!");
2184 InsnType fieldMask;
2185 if (numBits == sizeof(InsnType) * 8)
2186 fieldMask = (InsnType)(-1LL);
2187 else
2188 fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2189 return (insn & fieldMask) >> startBit;
2190 }
2191
2192 template <typename InsnType>
2193 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2194 fieldFromInstruction(const InsnType &insn, unsigned startBit,
2195 unsigned numBits) {
2196 return insn.extractBitsAsZExtValue(numBits, startBit);
2197 }
2198 )";
2199 }
2200
2201 // emitInsertBits - Emit the templated helper function insertBits().
emitInsertBits(formatted_raw_ostream & OS)2202 static void emitInsertBits(formatted_raw_ostream &OS) {
2203 OS << R"(
2204 // Helper function for inserting bits extracted from an encoded instruction into
2205 // an integer-typed field.
2206 template <typename IntType>
2207 static std::enable_if_t<std::is_integral_v<IntType>, void>
2208 insertBits(IntType &field, IntType bits, unsigned startBit, unsigned numBits) {
2209 // Check that no bit beyond numBits is set, so that a simple bitwise |
2210 // is sufficient.
2211 assert((~(((IntType)1 << numBits) - 1) & bits) == 0 &&
2212 "bits has more than numBits bits set");
2213 assert(startBit + numBits <= sizeof(IntType) * 8);
2214 (void)numBits;
2215 field |= bits << startBit;
2216 }
2217 )";
2218 }
2219
2220 // emitDecodeInstruction - Emit the templated helper function
2221 // decodeInstruction().
emitDecodeInstruction(formatted_raw_ostream & OS,bool IsVarLenInst,unsigned OpcodeMask)2222 static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
2223 unsigned OpcodeMask) {
2224 const bool HasTryDecode = OpcodeMask & ((1 << MCD::OPC_TryDecode) |
2225 (1 << MCD::OPC_TryDecodeOrFail));
2226 const bool HasCheckPredicate =
2227 OpcodeMask &
2228 ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2229 const bool HasSoftFail = OpcodeMask & (1 << MCD::OPC_SoftFail);
2230
2231 OS << R"(
2232 static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
2233 unsigned NumToSkip = *Ptr++;
2234 NumToSkip |= (*Ptr++) << 8;
2235 )";
2236 if (getNumToSkipInBytes() == 3)
2237 OS << " NumToSkip |= (*Ptr++) << 16;\n";
2238 OS << R"( return NumToSkip;
2239 }
2240
2241 template <typename InsnType>
2242 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2243 InsnType insn, uint64_t Address,
2244 const MCDisassembler *DisAsm,
2245 const MCSubtargetInfo &STI)";
2246 if (IsVarLenInst) {
2247 OS << ",\n "
2248 "llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2249 }
2250 OS << ") {\n";
2251 if (HasCheckPredicate)
2252 OS << " const FeatureBitset &Bits = STI.getFeatureBits();\n";
2253
2254 OS << R"(
2255 const uint8_t *Ptr = DecodeTable;
2256 uint64_t CurFieldValue = 0;
2257 DecodeStatus S = MCDisassembler::Success;
2258 while (true) {
2259 ptrdiff_t Loc = Ptr - DecodeTable;
2260 const uint8_t DecoderOp = *Ptr++;
2261 switch (DecoderOp) {
2262 default:
2263 errs() << Loc << ": Unexpected decode table opcode: "
2264 << (int)DecoderOp << '\n';
2265 return MCDisassembler::Fail;
2266 case MCD::OPC_ExtractField: {
2267 // Decode the start value.
2268 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2269 unsigned Len = *Ptr++;)";
2270 if (IsVarLenInst)
2271 OS << "\n makeUp(insn, Start + Len);";
2272 OS << R"(
2273 CurFieldValue = fieldFromInstruction(insn, Start, Len);
2274 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2275 << Len << "): " << CurFieldValue << "\n");
2276 break;
2277 }
2278 case MCD::OPC_FilterValue:
2279 case MCD::OPC_FilterValueOrFail: {
2280 bool IsFail = DecoderOp == MCD::OPC_FilterValueOrFail;
2281 // Decode the field value.
2282 uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
2283 bool Failed = Val != CurFieldValue;
2284 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2285
2286 // Note: Print NumToSkip even for OPC_FilterValueOrFail to simplify debug
2287 // prints.
2288 LLVM_DEBUG({
2289 StringRef OpName = IsFail ? "OPC_FilterValueOrFail" : "OPC_FilterValue";
2290 dbgs() << Loc << ": " << OpName << '(' << Val << ", " << NumToSkip
2291 << ") " << (Failed ? "FAIL:" : "PASS:")
2292 << " continuing at " << (Ptr - DecodeTable) << '\n';
2293 });
2294
2295 // Perform the filter operation.
2296 if (Failed) {
2297 if (IsFail)
2298 return MCDisassembler::Fail;
2299 Ptr += NumToSkip;
2300 }
2301 break;
2302 }
2303 case MCD::OPC_CheckField:
2304 case MCD::OPC_CheckFieldOrFail: {
2305 bool IsFail = DecoderOp == MCD::OPC_CheckFieldOrFail;
2306 // Decode the start value.
2307 unsigned Start = decodeULEB128AndIncUnsafe(Ptr);
2308 unsigned Len = *Ptr;)";
2309 if (IsVarLenInst)
2310 OS << "\n makeUp(insn, Start + Len);";
2311 OS << R"(
2312 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2313 // Decode the field value.
2314 unsigned PtrLen = 0;
2315 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2316 Ptr += PtrLen;
2317 bool Failed = ExpectedValue != FieldValue;
2318 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2319
2320 LLVM_DEBUG({
2321 StringRef OpName = IsFail ? "OPC_CheckFieldOrFail" : "OPC_CheckField";
2322 dbgs() << Loc << ": " << OpName << '(' << Start << ", " << Len << ", "
2323 << ExpectedValue << ", " << NumToSkip << "): FieldValue = "
2324 << FieldValue << ", ExpectedValue = " << ExpectedValue << ": "
2325 << (Failed ? "FAIL\n" : "PASS\n");
2326 });
2327
2328 // If the actual and expected values don't match, skip or fail.
2329 if (Failed) {
2330 if (IsFail)
2331 return MCDisassembler::Fail;
2332 Ptr += NumToSkip;
2333 }
2334 break;
2335 })";
2336 if (HasCheckPredicate) {
2337 OS << R"(
2338 case MCD::OPC_CheckPredicate:
2339 case MCD::OPC_CheckPredicateOrFail: {
2340 bool IsFail = DecoderOp == MCD::OPC_CheckPredicateOrFail;
2341 // Decode the Predicate Index value.
2342 unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
2343 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2344 // Check the predicate.
2345 bool Failed = !checkDecoderPredicate(PIdx, Bits);
2346
2347 LLVM_DEBUG({
2348 StringRef OpName = IsFail ? "OPC_CheckPredicateOrFail" : "OPC_CheckPredicate";
2349 dbgs() << Loc << ": " << OpName << '(' << PIdx << ", " << NumToSkip
2350 << "): " << (Failed ? "FAIL\n" : "PASS\n");
2351 });
2352
2353 if (Failed) {
2354 if (IsFail)
2355 return MCDisassembler::Fail;
2356 Ptr += NumToSkip;
2357 }
2358 break;
2359 })";
2360 }
2361 OS << R"(
2362 case MCD::OPC_Decode: {
2363 // Decode the Opcode value.
2364 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2365 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2366
2367 MI.clear();
2368 MI.setOpcode(Opc);
2369 bool DecodeComplete;)";
2370 if (IsVarLenInst) {
2371 OS << "\n unsigned Len = InstrLenTable[Opc];\n"
2372 << " makeUp(insn, Len);";
2373 }
2374 OS << R"(
2375 S = decodeToMCInst(DecodeIdx, S, insn, MI, Address, DisAsm, DecodeComplete);
2376 assert(DecodeComplete);
2377
2378 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2379 << ", using decoder " << DecodeIdx << ": "
2380 << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2381 return S;
2382 })";
2383 if (HasTryDecode) {
2384 OS << R"(
2385 case MCD::OPC_TryDecode:
2386 case MCD::OPC_TryDecodeOrFail: {
2387 bool IsFail = DecoderOp == MCD::OPC_TryDecodeOrFail;
2388 // Decode the Opcode value.
2389 unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
2390 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
2391 unsigned NumToSkip = IsFail ? 0 : decodeNumToSkip(Ptr);
2392
2393 // Perform the decode operation.
2394 MCInst TmpMI;
2395 TmpMI.setOpcode(Opc);
2396 bool DecodeComplete;
2397 S = decodeToMCInst(DecodeIdx, S, insn, TmpMI, Address, DisAsm, DecodeComplete);
2398 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2399 << ", using decoder " << DecodeIdx << ": ");
2400
2401 if (DecodeComplete) {
2402 // Decoding complete.
2403 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS\n" : "FAIL\n"));
2404 MI = TmpMI;
2405 return S;
2406 }
2407 assert(S == MCDisassembler::Fail);
2408 if (IsFail) {
2409 LLVM_DEBUG(dbgs() << "FAIL: returning FAIL\n");
2410 return MCDisassembler::Fail;
2411 }
2412 // If the decoding was incomplete, skip.
2413 Ptr += NumToSkip;
2414 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2415 // Reset decode status. This also drops a SoftFail status that could be
2416 // set before the decode attempt.
2417 S = MCDisassembler::Success;
2418 break;
2419 })";
2420 }
2421 if (HasSoftFail) {
2422 OS << R"(
2423 case MCD::OPC_SoftFail: {
2424 // Decode the mask values.
2425 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(Ptr);
2426 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr);
2427 bool Failed = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
2428 if (Failed)
2429 S = MCDisassembler::SoftFail;
2430 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Failed ? "FAIL\n" : "PASS\n"));
2431 break;
2432 })";
2433 }
2434 OS << R"(
2435 case MCD::OPC_Fail: {
2436 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2437 return MCDisassembler::Fail;
2438 }
2439 }
2440 }
2441 llvm_unreachable("bogosity detected in disassembler state machine!");
2442 }
2443
2444 )";
2445 }
2446
2447 // Helper to propagate SoftFail status. Returns false if the status is Fail;
2448 // callers are expected to early-exit in that condition. (Note, the '&' operator
2449 // is correct to propagate the values of this enum; see comment on 'enum
2450 // DecodeStatus'.)
emitCheck(formatted_raw_ostream & OS)2451 static void emitCheck(formatted_raw_ostream &OS) {
2452 OS << R"(
2453 static bool Check(DecodeStatus &Out, DecodeStatus In) {
2454 Out = static_cast<DecodeStatus>(Out & In);
2455 return Out != MCDisassembler::Fail;
2456 }
2457
2458 )";
2459 }
2460
2461 // Collect all HwModes referenced by the target for encoding purposes,
2462 // returning a vector of corresponding names.
collectHwModesReferencedForEncodings(const CodeGenHwModes & HWM,std::vector<StringRef> & Names,NamespacesHwModesMap & NamespacesWithHwModes)2463 static void collectHwModesReferencedForEncodings(
2464 const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2465 NamespacesHwModesMap &NamespacesWithHwModes) {
2466 SmallBitVector BV(HWM.getNumModeIds());
2467 for (const auto &MS : HWM.getHwModeSelects()) {
2468 for (const HwModeSelect::PairType &P : MS.second.Items) {
2469 if (P.second->isSubClassOf("InstructionEncoding")) {
2470 std::string DecoderNamespace =
2471 P.second->getValueAsString("DecoderNamespace").str();
2472 if (P.first == DefaultMode) {
2473 NamespacesWithHwModes[DecoderNamespace].insert("");
2474 } else {
2475 NamespacesWithHwModes[DecoderNamespace].insert(
2476 HWM.getMode(P.first).Name);
2477 }
2478 BV.set(P.first);
2479 }
2480 }
2481 }
2482 transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) {
2483 if (M == DefaultMode)
2484 return StringRef("");
2485 return HWM.getModeName(M, /*IncludeDefault=*/true);
2486 });
2487 }
2488
2489 static void
handleHwModesUnrelatedEncodings(const CodeGenInstruction * Instr,ArrayRef<StringRef> HwModeNames,NamespacesHwModesMap & NamespacesWithHwModes,std::vector<EncodingAndInst> & GlobalEncodings)2490 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2491 ArrayRef<StringRef> HwModeNames,
2492 NamespacesHwModesMap &NamespacesWithHwModes,
2493 std::vector<EncodingAndInst> &GlobalEncodings) {
2494 const Record *InstDef = Instr->TheDef;
2495
2496 switch (DecoderEmitterSuppressDuplicates) {
2497 case SUPPRESSION_DISABLE: {
2498 for (StringRef HwModeName : HwModeNames)
2499 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2500 break;
2501 }
2502 case SUPPRESSION_LEVEL1: {
2503 std::string DecoderNamespace =
2504 InstDef->getValueAsString("DecoderNamespace").str();
2505 auto It = NamespacesWithHwModes.find(DecoderNamespace);
2506 if (It != NamespacesWithHwModes.end()) {
2507 for (StringRef HwModeName : It->second)
2508 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName);
2509 } else {
2510 // Only emit the encoding once, as it's DecoderNamespace doesn't
2511 // contain any HwModes.
2512 GlobalEncodings.emplace_back(InstDef, Instr, "");
2513 }
2514 break;
2515 }
2516 case SUPPRESSION_LEVEL2:
2517 GlobalEncodings.emplace_back(InstDef, Instr, "");
2518 break;
2519 }
2520 }
2521
2522 // Emits disassembler code for instruction decoding.
run(raw_ostream & o)2523 void DecoderEmitter::run(raw_ostream &o) {
2524 formatted_raw_ostream OS(o);
2525 OS << R"(
2526 #include "llvm/MC/MCInst.h"
2527 #include "llvm/MC/MCSubtargetInfo.h"
2528 #include "llvm/Support/DataTypes.h"
2529 #include "llvm/Support/Debug.h"
2530 #include "llvm/Support/LEB128.h"
2531 #include "llvm/Support/raw_ostream.h"
2532 #include "llvm/TargetParser/SubtargetFeature.h"
2533 #include <assert.h>
2534
2535 namespace {
2536 )";
2537
2538 emitFieldFromInstruction(OS);
2539 emitInsertBits(OS);
2540 emitCheck(OS);
2541
2542 Target.reverseBitsForLittleEndianEncoding();
2543
2544 // Parameterize the decoders based on namespace and instruction width.
2545
2546 // First, collect all encoding-related HwModes referenced by the target.
2547 // And establish a mapping table between DecoderNamespace and HwMode.
2548 // If HwModeNames is empty, add the empty string so we always have one HwMode.
2549 const CodeGenHwModes &HWM = Target.getHwModes();
2550 std::vector<StringRef> HwModeNames;
2551 NamespacesHwModesMap NamespacesWithHwModes;
2552 collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes);
2553 if (HwModeNames.empty())
2554 HwModeNames.push_back("");
2555
2556 const auto &NumberedInstructions = Target.getInstructions();
2557 NumberedEncodings.reserve(NumberedInstructions.size());
2558 for (const auto &NumberedInstruction : NumberedInstructions) {
2559 const Record *InstDef = NumberedInstruction->TheDef;
2560 if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) {
2561 if (const DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
2562 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2563 for (auto &[ModeId, Encoding] : EBM) {
2564 // DecoderTables with DefaultMode should not have any suffix.
2565 if (ModeId == DefaultMode) {
2566 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, "");
2567 } else {
2568 NumberedEncodings.emplace_back(Encoding, NumberedInstruction,
2569 HWM.getMode(ModeId).Name);
2570 }
2571 }
2572 continue;
2573 }
2574 }
2575 // This instruction is encoded the same on all HwModes.
2576 // According to user needs, provide varying degrees of suppression.
2577 handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames,
2578 NamespacesWithHwModes, NumberedEncodings);
2579 }
2580 for (const Record *NumberedAlias :
2581 RK.getAllDerivedDefinitions("AdditionalEncoding"))
2582 NumberedEncodings.emplace_back(
2583 NumberedAlias,
2584 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf")));
2585
2586 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2587 OpcMap;
2588 std::map<unsigned, std::vector<OperandInfo>> Operands;
2589 std::vector<unsigned> InstrLen;
2590 bool IsVarLenInst = Target.hasVariableLengthEncodings();
2591 unsigned MaxInstLen = 0;
2592
2593 for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
2594 const Record *EncodingDef = NumberedEncoding.EncodingDef;
2595 const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2596 const Record *Def = Inst->TheDef;
2597 unsigned Size = EncodingDef->getValueAsInt("Size");
2598 if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
2599 Def->getValueAsBit("isPseudo") ||
2600 Def->getValueAsBit("isAsmParserOnly") ||
2601 Def->getValueAsBit("isCodeGenOnly")) {
2602 NumEncodingsLackingDisasm++;
2603 continue;
2604 }
2605
2606 if (NEI < NumberedInstructions.size())
2607 NumInstructions++;
2608 NumEncodings++;
2609
2610 if (!Size && !IsVarLenInst)
2611 continue;
2612
2613 if (IsVarLenInst)
2614 InstrLen.resize(NumberedInstructions.size(), 0);
2615
2616 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI,
2617 Operands, IsVarLenInst)) {
2618 if (IsVarLenInst) {
2619 MaxInstLen = std::max(MaxInstLen, Len);
2620 InstrLen[NEI] = Len;
2621 }
2622 std::string DecoderNamespace =
2623 EncodingDef->getValueAsString("DecoderNamespace").str();
2624 if (!NumberedEncoding.HwModeName.empty())
2625 DecoderNamespace += "_" + NumberedEncoding.HwModeName.str();
2626 OpcMap[{DecoderNamespace, Size}].emplace_back(
2627 NEI, Target.getInstrIntValue(Def));
2628 } else {
2629 NumEncodingsOmitted++;
2630 }
2631 }
2632
2633 DecoderTableInfo TableInfo;
2634 unsigned OpcodeMask = 0;
2635 for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
2636 const std::string &DecoderNamespace = NSAndByteSize.first;
2637 const unsigned BitWidth = 8 * NSAndByteSize.second;
2638 // Emit the decoder for this namespace+width combination.
2639 FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
2640 IsVarLenInst ? MaxInstLen : BitWidth, this);
2641
2642 // The decode table is cleared for each top level decoder function. The
2643 // predicates and decoders themselves, however, are shared across all
2644 // decoders to give more opportunities for uniqueing.
2645 TableInfo.Table.clear();
2646 TableInfo.FixupStack.clear();
2647 TableInfo.FixupStack.emplace_back();
2648 FC.emitTableEntries(TableInfo);
2649 // Any NumToSkip fixups in the top level scope can resolve to the
2650 // OPC_Fail at the end of the table.
2651 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
2652 // Resolve any NumToSkip fixups in the current scope.
2653 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
2654 TableInfo.Table.size());
2655 TableInfo.FixupStack.clear();
2656
2657 TableInfo.Table.push_back(MCD::OPC_Fail);
2658
2659 // Print the table to the output stream.
2660 OpcodeMask |= emitTable(OS, TableInfo.Table, FC.getBitWidth(),
2661 DecoderNamespace, EncodingIDs);
2662 }
2663
2664 // For variable instruction, we emit a instruction length table
2665 // to let the decoder know how long the instructions are.
2666 // You can see example usage in M68k's disassembler.
2667 if (IsVarLenInst)
2668 emitInstrLenTable(OS, InstrLen);
2669
2670 const bool HasCheckPredicate =
2671 OpcodeMask &
2672 ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
2673
2674 // Emit the predicate function.
2675 if (HasCheckPredicate)
2676 emitPredicateFunction(OS, TableInfo.Predicates);
2677
2678 // Emit the decoder function.
2679 emitDecoderFunction(OS, TableInfo.Decoders);
2680
2681 // Emit the main entry point for the decoder, decodeInstruction().
2682 emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
2683
2684 OS << "\n} // namespace\n";
2685 }
2686
EmitDecoder(const RecordKeeper & RK,raw_ostream & OS,StringRef PredicateNamespace)2687 void llvm::EmitDecoder(const RecordKeeper &RK, raw_ostream &OS,
2688 StringRef PredicateNamespace) {
2689 DecoderEmitter(RK, PredicateNamespace).run(OS);
2690 }
2691