1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "Common/CodeGenHwModes.h" 15 #include "Common/CodeGenInstruction.h" 16 #include "Common/CodeGenTarget.h" 17 #include "Common/InfoByHwMode.h" 18 #include "Common/VarLenCodeEmitterGen.h" 19 #include "TableGenBackends.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/CachedHashString.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallBitVector.h" 26 #include "llvm/ADT/SmallString.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/MC/MCDecoderOps.h" 31 #include "llvm/Support/Casting.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/FormattedStream.h" 36 #include "llvm/Support/LEB128.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/TableGen/Error.h" 39 #include "llvm/TableGen/Record.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cstddef> 43 #include <cstdint> 44 #include <map> 45 #include <memory> 46 #include <set> 47 #include <string> 48 #include <utility> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "decoder-emitter" 54 55 extern cl::OptionCategory DisassemblerEmitterCat; 56 57 enum SuppressLevel { 58 SUPPRESSION_DISABLE, 59 SUPPRESSION_LEVEL1, 60 SUPPRESSION_LEVEL2 61 }; 62 63 cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates( 64 "suppress-per-hwmode-duplicates", 65 cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"), 66 cl::values( 67 clEnumValN( 68 SUPPRESSION_DISABLE, "O0", 69 "Do not prevent DecoderTable duplications caused by HwModes"), 70 clEnumValN( 71 SUPPRESSION_LEVEL1, "O1", 72 "Remove duplicate DecoderTable entries generated due to HwModes"), 73 clEnumValN( 74 SUPPRESSION_LEVEL2, "O2", 75 "Extract HwModes-specific instructions into new DecoderTables, " 76 "significantly reducing Table Duplications")), 77 cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat)); 78 79 namespace { 80 81 STATISTIC(NumEncodings, "Number of encodings considered"); 82 STATISTIC(NumEncodingsLackingDisasm, 83 "Number of encodings without disassembler info"); 84 STATISTIC(NumInstructions, "Number of instructions considered"); 85 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 86 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 87 88 struct EncodingField { 89 unsigned Base, Width, Offset; 90 EncodingField(unsigned B, unsigned W, unsigned O) 91 : Base(B), Width(W), Offset(O) {} 92 }; 93 94 struct OperandInfo { 95 std::vector<EncodingField> Fields; 96 std::string Decoder; 97 bool HasCompleteDecoder; 98 uint64_t InitValue; 99 100 OperandInfo(std::string D, bool HCD) 101 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 102 103 void addField(unsigned Base, unsigned Width, unsigned Offset) { 104 Fields.push_back(EncodingField(Base, Width, Offset)); 105 } 106 107 unsigned numFields() const { return Fields.size(); } 108 109 typedef std::vector<EncodingField>::const_iterator const_iterator; 110 111 const_iterator begin() const { return Fields.begin(); } 112 const_iterator end() const { return Fields.end(); } 113 }; 114 115 typedef std::vector<uint8_t> DecoderTable; 116 typedef uint32_t DecoderFixup; 117 typedef std::vector<DecoderFixup> FixupList; 118 typedef std::vector<FixupList> FixupScopeList; 119 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 120 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 121 struct DecoderTableInfo { 122 DecoderTable Table; 123 FixupScopeList FixupStack; 124 PredicateSet Predicates; 125 DecoderSet Decoders; 126 }; 127 128 struct EncodingAndInst { 129 const Record *EncodingDef; 130 const CodeGenInstruction *Inst; 131 StringRef HwModeName; 132 133 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 134 StringRef HwModeName = "") 135 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 136 }; 137 138 struct EncodingIDAndOpcode { 139 unsigned EncodingID; 140 unsigned Opcode; 141 142 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 143 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 144 : EncodingID(EncodingID), Opcode(Opcode) {} 145 }; 146 147 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>; 148 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>; 149 150 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 151 if (Value.EncodingDef != Value.Inst->TheDef) 152 OS << Value.EncodingDef->getName() << ":"; 153 OS << Value.Inst->TheDef->getName(); 154 return OS; 155 } 156 157 class DecoderEmitter { 158 RecordKeeper &RK; 159 std::vector<EncodingAndInst> NumberedEncodings; 160 161 public: 162 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 163 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 164 165 // Emit the decoder state machine table. 166 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 167 unsigned Indentation, unsigned BitWidth, StringRef Namespace, 168 const EncodingIDsVec &EncodingIDs) const; 169 void emitInstrLenTable(formatted_raw_ostream &OS, 170 std::vector<unsigned> &InstrLen) const; 171 void emitPredicateFunction(formatted_raw_ostream &OS, 172 PredicateSet &Predicates, 173 unsigned Indentation) const; 174 void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders, 175 unsigned Indentation) const; 176 177 // run - Output the code emitter 178 void run(raw_ostream &o); 179 180 private: 181 CodeGenTarget Target; 182 183 public: 184 std::string PredicateNamespace; 185 }; 186 187 } // end anonymous namespace 188 189 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 190 // for a bit value. 191 // 192 // BIT_UNFILTERED is used as the init value for a filter position. It is used 193 // only for filter processings. 194 typedef enum { 195 BIT_TRUE, // '1' 196 BIT_FALSE, // '0' 197 BIT_UNSET, // '?' 198 BIT_UNFILTERED // unfiltered 199 } bit_value_t; 200 201 static bool ValueSet(bit_value_t V) { 202 return (V == BIT_TRUE || V == BIT_FALSE); 203 } 204 205 static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); } 206 207 static int Value(bit_value_t V) { 208 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 209 } 210 211 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 212 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 213 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 214 215 // The bit is uninitialized. 216 return BIT_UNSET; 217 } 218 219 // Prints the bit value for each position. 220 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 221 for (unsigned index = bits.getNumBits(); index > 0; --index) { 222 switch (bitFromBits(bits, index - 1)) { 223 case BIT_TRUE: 224 o << "1"; 225 break; 226 case BIT_FALSE: 227 o << "0"; 228 break; 229 case BIT_UNSET: 230 o << "_"; 231 break; 232 default: 233 llvm_unreachable("unexpected return value from bitFromBits"); 234 } 235 } 236 } 237 238 static BitsInit &getBitsField(const Record &def, StringRef str) { 239 const RecordVal *RV = def.getValue(str); 240 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 241 return *Bits; 242 243 // variable length instruction 244 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 245 SmallVector<Init *, 16> Bits; 246 247 for (const auto &SI : VLI) { 248 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 249 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 250 Bits.push_back(BI->getBit(Idx)); 251 } 252 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 253 Bits.push_back(const_cast<BitInit *>(BI)); 254 } else { 255 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 256 Bits.push_back(UnsetInit::get(def.getRecords())); 257 } 258 } 259 260 return *BitsInit::get(def.getRecords(), Bits); 261 } 262 263 // Representation of the instruction to work on. 264 typedef std::vector<bit_value_t> insn_t; 265 266 namespace { 267 268 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 269 270 class FilterChooser; 271 272 /// Filter - Filter works with FilterChooser to produce the decoding tree for 273 /// the ISA. 274 /// 275 /// It is useful to think of a Filter as governing the switch stmts of the 276 /// decoding tree in a certain level. Each case stmt delegates to an inferior 277 /// FilterChooser to decide what further decoding logic to employ, or in another 278 /// words, what other remaining bits to look at. The FilterChooser eventually 279 /// chooses a best Filter to do its job. 280 /// 281 /// This recursive scheme ends when the number of Opcodes assigned to the 282 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 283 /// the Filter/FilterChooser combo does not know how to distinguish among the 284 /// Opcodes assigned. 285 /// 286 /// An example of a conflict is 287 /// 288 /// Conflict: 289 /// 111101000.00........00010000.... 290 /// 111101000.00........0001........ 291 /// 1111010...00........0001........ 292 /// 1111010...00.................... 293 /// 1111010......................... 294 /// 1111............................ 295 /// ................................ 296 /// VST4q8a 111101000_00________00010000____ 297 /// VST4q8b 111101000_00________00010000____ 298 /// 299 /// The Debug output shows the path that the decoding tree follows to reach the 300 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 301 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 302 /// 303 /// The encoding info in the .td files does not specify this meta information, 304 /// which could have been used by the decoder to resolve the conflict. The 305 /// decoder could try to decode the even/odd register numbering and assign to 306 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 307 /// version and return the Opcode since the two have the same Asm format string. 308 class Filter { 309 protected: 310 const FilterChooser 311 *Owner; // points to the FilterChooser who owns this filter 312 unsigned StartBit; // the starting bit position 313 unsigned NumBits; // number of bits to filter 314 bool Mixed; // a mixed region contains both set and unset bits 315 316 // Map of well-known segment value to the set of uid's with that value. 317 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions; 318 319 // Set of uid's with non-constant segment values. 320 std::vector<EncodingIDAndOpcode> VariableInstructions; 321 322 // Map of well-known segment value to its delegate. 323 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 324 325 // Number of instructions which fall under FilteredInstructions category. 326 unsigned NumFiltered; 327 328 // Keeps track of the last opcode in the filtered bucket. 329 EncodingIDAndOpcode LastOpcFiltered; 330 331 public: 332 Filter(Filter &&f); 333 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 334 335 ~Filter() = default; 336 337 unsigned getNumFiltered() const { return NumFiltered; } 338 339 EncodingIDAndOpcode getSingletonOpc() const { 340 assert(NumFiltered == 1); 341 return LastOpcFiltered; 342 } 343 344 // Return the filter chooser for the group of instructions without constant 345 // segment values. 346 const FilterChooser &getVariableFC() const { 347 assert(NumFiltered == 1); 348 assert(FilterChooserMap.size() == 1); 349 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 350 } 351 352 // Divides the decoding task into sub tasks and delegates them to the 353 // inferior FilterChooser's. 354 // 355 // A special case arises when there's only one entry in the filtered 356 // instructions. In order to unambiguously decode the singleton, we need to 357 // match the remaining undecoded encoding bits against the singleton. 358 void recurse(); 359 360 // Emit table entries to decode instructions given a segment or segments of 361 // bits. 362 void emitTableEntry(DecoderTableInfo &TableInfo) const; 363 364 // Returns the number of fanout produced by the filter. More fanout implies 365 // the filter distinguishes more categories of instructions. 366 unsigned usefulness() const; 367 }; // end class Filter 368 369 } // end anonymous namespace 370 371 // These are states of our finite state machines used in FilterChooser's 372 // filterProcessor() which produces the filter candidates to use. 373 typedef enum { 374 ATTR_NONE, 375 ATTR_FILTERED, 376 ATTR_ALL_SET, 377 ATTR_ALL_UNSET, 378 ATTR_MIXED 379 } bitAttr_t; 380 381 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 382 /// in order to perform the decoding of instructions at the current level. 383 /// 384 /// Decoding proceeds from the top down. Based on the well-known encoding bits 385 /// of instructions available, FilterChooser builds up the possible Filters that 386 /// can further the task of decoding by distinguishing among the remaining 387 /// candidate instructions. 388 /// 389 /// Once a filter has been chosen, it is called upon to divide the decoding task 390 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 391 /// processings. 392 /// 393 /// It is useful to think of a Filter as governing the switch stmts of the 394 /// decoding tree. And each case is delegated to an inferior FilterChooser to 395 /// decide what further remaining bits to look at. 396 namespace { 397 398 class FilterChooser { 399 protected: 400 friend class Filter; 401 402 // Vector of codegen instructions to choose our filter. 403 ArrayRef<EncodingAndInst> AllInstructions; 404 405 // Vector of uid's for this filter chooser to work on. 406 // The first member of the pair is the opcode id being decoded, the second is 407 // the opcode id that should be emitted. 408 const std::vector<EncodingIDAndOpcode> &Opcodes; 409 410 // Lookup table for the operand decoding of instructions. 411 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 412 413 // Vector of candidate filters. 414 std::vector<Filter> Filters; 415 416 // Array of bit values passed down from our parent. 417 // Set to all BIT_UNFILTERED's for Parent == NULL. 418 std::vector<bit_value_t> FilterBitValues; 419 420 // Links to the FilterChooser above us in the decoding tree. 421 const FilterChooser *Parent; 422 423 // Index of the best filter from Filters. 424 int BestIndex; 425 426 // Width of instructions 427 unsigned BitWidth; 428 429 // Parent emitter 430 const DecoderEmitter *Emitter; 431 432 public: 433 FilterChooser(ArrayRef<EncodingAndInst> Insts, 434 const std::vector<EncodingIDAndOpcode> &IDs, 435 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 436 unsigned BW, const DecoderEmitter *E) 437 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 438 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 439 BitWidth(BW), Emitter(E) { 440 doFilter(); 441 } 442 443 FilterChooser(ArrayRef<EncodingAndInst> Insts, 444 const std::vector<EncodingIDAndOpcode> &IDs, 445 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 446 const std::vector<bit_value_t> &ParentFilterBitValues, 447 const FilterChooser &parent) 448 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 449 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 450 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 451 doFilter(); 452 } 453 454 FilterChooser(const FilterChooser &) = delete; 455 void operator=(const FilterChooser &) = delete; 456 457 unsigned getBitWidth() const { return BitWidth; } 458 459 protected: 460 // Populates the insn given the uid. 461 void insnWithID(insn_t &Insn, unsigned Opcode) const { 462 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 463 BitsInit &Bits = getBitsField(*EncodingDef, "Inst"); 464 Insn.resize(std::max(BitWidth, Bits.getNumBits()), BIT_UNSET); 465 // We may have a SoftFail bitmask, which specifies a mask where an encoding 466 // may differ from the value in "Inst" and yet still be valid, but the 467 // disassembler should return SoftFail instead of Success. 468 // 469 // This is used for marking UNPREDICTABLE instructions in the ARM world. 470 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 471 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 472 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 473 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 474 Insn[i] = BIT_UNSET; 475 else 476 Insn[i] = bitFromBits(Bits, i); 477 } 478 } 479 480 // Emit the name of the encoding/instruction pair. 481 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 482 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 483 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 484 if (EncodingDef != InstDef) 485 OS << EncodingDef->getName() << ":"; 486 OS << InstDef->getName(); 487 } 488 489 // Populates the field of the insn given the start position and the number of 490 // consecutive bits to scan for. 491 // 492 // Returns a pair of values (indicator, field), where the indicator is false 493 // if there exists any uninitialized bit value in the range and true if all 494 // bits are well-known. The second value is the potentially populated field. 495 std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit, 496 unsigned NumBits) const; 497 498 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 499 /// filter array as a series of chars. 500 void dumpFilterArray(raw_ostream &o, 501 const std::vector<bit_value_t> &filter) const; 502 503 /// dumpStack - dumpStack traverses the filter chooser chain and calls 504 /// dumpFilterArray on each filter chooser up to the top level one. 505 void dumpStack(raw_ostream &o, const char *prefix) const; 506 507 Filter &bestFilter() { 508 assert(BestIndex != -1 && "BestIndex not set"); 509 return Filters[BestIndex]; 510 } 511 512 bool PositionFiltered(unsigned i) const { 513 return ValueSet(FilterBitValues[i]); 514 } 515 516 // Calculates the island(s) needed to decode the instruction. 517 // This returns a lit of undecoded bits of an instructions, for example, 518 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 519 // decoded bits in order to verify that the instruction matches the Opcode. 520 unsigned getIslands(std::vector<unsigned> &StartBits, 521 std::vector<unsigned> &EndBits, 522 std::vector<uint64_t> &FieldVals, 523 const insn_t &Insn) const; 524 525 // Emits code to check the Predicates member of an instruction are true. 526 // Returns true if predicate matches were emitted, false otherwise. 527 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 528 unsigned Opc) const; 529 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 530 raw_ostream &OS) const; 531 532 bool doesOpcodeNeedPredicate(unsigned Opc) const; 533 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 534 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 535 536 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const; 537 538 // Emits table entries to decode the singleton. 539 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 540 EncodingIDAndOpcode Opc) const; 541 542 // Emits code to decode the singleton, and then to decode the rest. 543 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 544 const Filter &Best) const; 545 546 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 547 const OperandInfo &OpInfo, 548 bool &OpHasCompleteDecoder) const; 549 550 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 551 bool &HasCompleteDecoder) const; 552 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 553 bool &HasCompleteDecoder) const; 554 555 // Assign a single filter and run with it. 556 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 557 558 // reportRegion is a helper function for filterProcessor to mark a region as 559 // eligible for use as a filter region. 560 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 561 bool AllowMixed); 562 563 // FilterProcessor scans the well-known encoding bits of the instructions and 564 // builds up a list of candidate filters. It chooses the best filter and 565 // recursively descends down the decoding tree. 566 bool filterProcessor(bool AllowMixed, bool Greedy = true); 567 568 // Decides on the best configuration of filter(s) to use in order to decode 569 // the instructions. A conflict of instructions may occur, in which case we 570 // dump the conflict set to the standard error. 571 void doFilter(); 572 573 public: 574 // emitTableEntries - Emit state machine entries to decode our share of 575 // instructions. 576 void emitTableEntries(DecoderTableInfo &TableInfo) const; 577 }; 578 579 } // end anonymous namespace 580 581 /////////////////////////// 582 // // 583 // Filter Implementation // 584 // // 585 /////////////////////////// 586 587 Filter::Filter(Filter &&f) 588 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 589 FilteredInstructions(std::move(f.FilteredInstructions)), 590 VariableInstructions(std::move(f.VariableInstructions)), 591 FilterChooserMap(std::move(f.FilterChooserMap)), 592 NumFiltered(f.NumFiltered), LastOpcFiltered(f.LastOpcFiltered) {} 593 594 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 595 bool mixed) 596 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 597 assert(StartBit + NumBits - 1 < Owner->BitWidth); 598 599 NumFiltered = 0; 600 LastOpcFiltered = {0, 0}; 601 602 for (const auto &OpcPair : Owner->Opcodes) { 603 insn_t Insn; 604 605 // Populates the insn given the uid. 606 Owner->insnWithID(Insn, OpcPair.EncodingID); 607 608 // Scans the segment for possibly well-specified encoding bits. 609 auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits); 610 611 if (Ok) { 612 // The encoding bits are well-known. Lets add the uid of the 613 // instruction into the bucket keyed off the constant field value. 614 LastOpcFiltered = OpcPair; 615 FilteredInstructions[Field].push_back(LastOpcFiltered); 616 ++NumFiltered; 617 } else { 618 // Some of the encoding bit(s) are unspecified. This contributes to 619 // one additional member of "Variable" instructions. 620 VariableInstructions.push_back(OpcPair); 621 } 622 } 623 624 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) && 625 "Filter returns no instruction categories"); 626 } 627 628 // Divides the decoding task into sub tasks and delegates them to the 629 // inferior FilterChooser's. 630 // 631 // A special case arises when there's only one entry in the filtered 632 // instructions. In order to unambiguously decode the singleton, we need to 633 // match the remaining undecoded encoding bits against the singleton. 634 void Filter::recurse() { 635 // Starts by inheriting our parent filter chooser's filter bit values. 636 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 637 638 if (!VariableInstructions.empty()) { 639 // Conservatively marks each segment position as BIT_UNSET. 640 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 641 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 642 643 // Delegates to an inferior filter chooser for further processing on this 644 // group of instructions whose segment values are variable. 645 FilterChooserMap.insert(std::pair( 646 NO_FIXED_SEGMENTS_SENTINEL, 647 std::make_unique<FilterChooser>(Owner->AllInstructions, 648 VariableInstructions, Owner->Operands, 649 BitValueArray, *Owner))); 650 } 651 652 // No need to recurse for a singleton filtered instruction. 653 // See also Filter::emit*(). 654 if (getNumFiltered() == 1) { 655 assert(FilterChooserMap.size() == 1); 656 return; 657 } 658 659 // Otherwise, create sub choosers. 660 for (const auto &Inst : FilteredInstructions) { 661 662 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 663 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 664 if (Inst.first & (1ULL << bitIndex)) 665 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 666 else 667 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 668 } 669 670 // Delegates to an inferior filter chooser for further processing on this 671 // category of instructions. 672 FilterChooserMap.insert( 673 std::pair(Inst.first, std::make_unique<FilterChooser>( 674 Owner->AllInstructions, Inst.second, 675 Owner->Operands, BitValueArray, *Owner))); 676 } 677 } 678 679 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 680 uint32_t DestIdx) { 681 // Any NumToSkip fixups in the current scope can resolve to the 682 // current location. 683 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend(); 684 I != E; ++I) { 685 // Calculate the distance from the byte following the fixup entry byte 686 // to the destination. The Target is calculated from after the 16-bit 687 // NumToSkip entry itself, so subtract two from the displacement here 688 // to account for that. 689 uint32_t FixupIdx = *I; 690 uint32_t Delta = DestIdx - FixupIdx - 3; 691 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 692 // big. 693 assert(Delta < (1u << 24)); 694 Table[FixupIdx] = (uint8_t)Delta; 695 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 696 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 697 } 698 } 699 700 // Emit table entries to decode instructions given a segment or segments 701 // of bits. 702 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 703 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 704 TableInfo.Table.push_back(MCD::OPC_ExtractField); 705 706 SmallString<16> SBytes; 707 raw_svector_ostream S(SBytes); 708 encodeULEB128(StartBit, S); 709 TableInfo.Table.insert(TableInfo.Table.end(), SBytes.begin(), SBytes.end()); 710 TableInfo.Table.push_back(NumBits); 711 712 // A new filter entry begins a new scope for fixup resolution. 713 TableInfo.FixupStack.emplace_back(); 714 715 DecoderTable &Table = TableInfo.Table; 716 717 size_t PrevFilter = 0; 718 bool HasFallthrough = false; 719 for (const auto &Filter : FilterChooserMap) { 720 // Field value -1 implies a non-empty set of variable instructions. 721 // See also recurse(). 722 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 723 HasFallthrough = true; 724 725 // Each scope should always have at least one filter value to check 726 // for. 727 assert(PrevFilter != 0 && "empty filter set!"); 728 FixupList &CurScope = TableInfo.FixupStack.back(); 729 // Resolve any NumToSkip fixups in the current scope. 730 resolveTableFixups(Table, CurScope, Table.size()); 731 CurScope.clear(); 732 PrevFilter = 0; // Don't re-process the filter's fallthrough. 733 } else { 734 Table.push_back(MCD::OPC_FilterValue); 735 // Encode and emit the value to filter against. 736 uint8_t Buffer[16]; 737 unsigned Len = encodeULEB128(Filter.first, Buffer); 738 Table.insert(Table.end(), Buffer, Buffer + Len); 739 // Reserve space for the NumToSkip entry. We'll backpatch the value 740 // later. 741 PrevFilter = Table.size(); 742 Table.push_back(0); 743 Table.push_back(0); 744 Table.push_back(0); 745 } 746 747 // We arrive at a category of instructions with the same segment value. 748 // Now delegate to the sub filter chooser for further decodings. 749 // The case may fallthrough, which happens if the remaining well-known 750 // encoding bits do not match exactly. 751 Filter.second->emitTableEntries(TableInfo); 752 753 // Now that we've emitted the body of the handler, update the NumToSkip 754 // of the filter itself to be able to skip forward when false. Subtract 755 // two as to account for the width of the NumToSkip field itself. 756 if (PrevFilter) { 757 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 758 assert(NumToSkip < (1u << 24) && 759 "disassembler decoding table too large!"); 760 Table[PrevFilter] = (uint8_t)NumToSkip; 761 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 762 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 763 } 764 } 765 766 // Any remaining unresolved fixups bubble up to the parent fixup scope. 767 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 768 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 769 FixupScopeList::iterator Dest = Source - 1; 770 llvm::append_range(*Dest, *Source); 771 TableInfo.FixupStack.pop_back(); 772 773 // If there is no fallthrough, then the final filter should get fixed 774 // up according to the enclosing scope rather than the current position. 775 if (!HasFallthrough) 776 TableInfo.FixupStack.back().push_back(PrevFilter); 777 } 778 779 // Returns the number of fanout produced by the filter. More fanout implies 780 // the filter distinguishes more categories of instructions. 781 unsigned Filter::usefulness() const { 782 if (!VariableInstructions.empty()) 783 return FilteredInstructions.size(); 784 else 785 return FilteredInstructions.size() + 1; 786 } 787 788 ////////////////////////////////// 789 // // 790 // Filterchooser Implementation // 791 // // 792 ////////////////////////////////// 793 794 // Emit the decoder state machine table. 795 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 796 unsigned Indentation, unsigned BitWidth, 797 StringRef Namespace, 798 const EncodingIDsVec &EncodingIDs) const { 799 // We'll need to be able to map from a decoded opcode into the corresponding 800 // EncodingID for this specific combination of BitWidth and Namespace. This 801 // is used below to index into NumberedEncodings. 802 DenseMap<unsigned, unsigned> OpcodeToEncodingID; 803 OpcodeToEncodingID.reserve(EncodingIDs.size()); 804 for (const auto &EI : EncodingIDs) 805 OpcodeToEncodingID[EI.Opcode] = EI.EncodingID; 806 807 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 808 << BitWidth << "[] = {\n"; 809 810 Indentation += 2; 811 812 // Emit ULEB128 encoded value to OS, returning the number of bytes emitted. 813 auto emitULEB128 = [](DecoderTable::const_iterator I, 814 formatted_raw_ostream &OS) { 815 unsigned Len = 0; 816 while (*I >= 128) { 817 OS << (unsigned)*I++ << ", "; 818 Len++; 819 } 820 OS << (unsigned)*I++ << ", "; 821 return Len + 1; 822 }; 823 824 // Emit 24-bit numtoskip value to OS, returning the NumToSkip value. 825 auto emitNumToSkip = [](DecoderTable::const_iterator I, 826 formatted_raw_ostream &OS) { 827 uint8_t Byte = *I++; 828 uint32_t NumToSkip = Byte; 829 OS << (unsigned)Byte << ", "; 830 Byte = *I++; 831 OS << (unsigned)Byte << ", "; 832 NumToSkip |= Byte << 8; 833 Byte = *I++; 834 OS << utostr(Byte) << ", "; 835 NumToSkip |= Byte << 16; 836 return NumToSkip; 837 }; 838 839 // FIXME: We may be able to use the NumToSkip values to recover 840 // appropriate indentation levels. 841 DecoderTable::const_iterator I = Table.begin(); 842 DecoderTable::const_iterator E = Table.end(); 843 while (I != E) { 844 assert(I < E && "incomplete decode table entry!"); 845 846 uint64_t Pos = I - Table.begin(); 847 OS << "/* " << Pos << " */"; 848 OS.PadToColumn(12); 849 850 switch (*I) { 851 default: 852 PrintFatalError("invalid decode table opcode"); 853 case MCD::OPC_ExtractField: { 854 ++I; 855 OS.indent(Indentation) << "MCD::OPC_ExtractField, "; 856 857 // ULEB128 encoded start value. 858 const char *ErrMsg = nullptr; 859 unsigned Start = decodeULEB128(Table.data() + Pos + 1, nullptr, 860 Table.data() + Table.size(), &ErrMsg); 861 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 862 I += emitULEB128(I, OS); 863 864 unsigned Len = *I++; 865 OS << Len << ", // Inst{"; 866 if (Len > 1) 867 OS << (Start + Len - 1) << "-"; 868 OS << Start << "} ...\n"; 869 break; 870 } 871 case MCD::OPC_FilterValue: { 872 ++I; 873 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 874 // The filter value is ULEB128 encoded. 875 I += emitULEB128(I, OS); 876 877 // 24-bit numtoskip value. 878 uint32_t NumToSkip = emitNumToSkip(I, OS); 879 I += 3; 880 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 881 break; 882 } 883 case MCD::OPC_CheckField: { 884 ++I; 885 OS.indent(Indentation) << "MCD::OPC_CheckField, "; 886 // ULEB128 encoded start value. 887 I += emitULEB128(I, OS); 888 // 8-bit length. 889 unsigned Len = *I++; 890 OS << Len << ", "; 891 // ULEB128 encoded field value. 892 I += emitULEB128(I, OS); 893 894 // 24-bit numtoskip value. 895 uint32_t NumToSkip = emitNumToSkip(I, OS); 896 I += 3; 897 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 898 break; 899 } 900 case MCD::OPC_CheckPredicate: { 901 ++I; 902 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 903 I += emitULEB128(I, OS); 904 905 // 24-bit numtoskip value. 906 uint32_t NumToSkip = emitNumToSkip(I, OS); 907 I += 3; 908 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 909 break; 910 } 911 case MCD::OPC_Decode: 912 case MCD::OPC_TryDecode: { 913 bool IsTry = *I == MCD::OPC_TryDecode; 914 ++I; 915 // Decode the Opcode value. 916 const char *ErrMsg = nullptr; 917 unsigned Opc = decodeULEB128(Table.data() + Pos + 1, nullptr, 918 Table.data() + Table.size(), &ErrMsg); 919 assert(ErrMsg == nullptr && "ULEB128 value too large!"); 920 921 OS.indent(Indentation) 922 << "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, "; 923 I += emitULEB128(I, OS); 924 925 // Decoder index. 926 I += emitULEB128(I, OS); 927 928 auto EncI = OpcodeToEncodingID.find(Opc); 929 assert(EncI != OpcodeToEncodingID.end() && "no encoding entry"); 930 auto EncodingID = EncI->second; 931 932 if (!IsTry) { 933 OS << "// Opcode: " << NumberedEncodings[EncodingID] << "\n"; 934 break; 935 } 936 937 // Fallthrough for OPC_TryDecode. 938 939 // 24-bit numtoskip value. 940 uint32_t NumToSkip = emitNumToSkip(I, OS); 941 I += 3; 942 943 OS << "// Opcode: " << NumberedEncodings[EncodingID] 944 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 945 break; 946 } 947 case MCD::OPC_SoftFail: { 948 ++I; 949 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 950 // Positive mask 951 uint64_t Value = 0; 952 unsigned Shift = 0; 953 do { 954 OS << ", " << (unsigned)*I; 955 Value += ((uint64_t)(*I & 0x7f)) << Shift; 956 Shift += 7; 957 } while (*I++ >= 128); 958 if (Value > 127) { 959 OS << " /* 0x"; 960 OS.write_hex(Value); 961 OS << " */"; 962 } 963 // Negative mask 964 Value = 0; 965 Shift = 0; 966 do { 967 OS << ", " << (unsigned)*I; 968 Value += ((uint64_t)(*I & 0x7f)) << Shift; 969 Shift += 7; 970 } while (*I++ >= 128); 971 if (Value > 127) { 972 OS << " /* 0x"; 973 OS.write_hex(Value); 974 OS << " */"; 975 } 976 OS << ",\n"; 977 break; 978 } 979 case MCD::OPC_Fail: { 980 ++I; 981 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 982 break; 983 } 984 } 985 } 986 OS.indent(Indentation) << "0\n"; 987 988 Indentation -= 2; 989 990 OS.indent(Indentation) << "};\n\n"; 991 } 992 993 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 994 std::vector<unsigned> &InstrLen) const { 995 OS << "static const uint8_t InstrLenTable[] = {\n"; 996 for (unsigned &Len : InstrLen) { 997 OS << Len << ",\n"; 998 } 999 OS << "};\n\n"; 1000 } 1001 1002 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 1003 PredicateSet &Predicates, 1004 unsigned Indentation) const { 1005 // The predicate function is just a big switch statement based on the 1006 // input predicate index. 1007 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 1008 << "const FeatureBitset &Bits) {\n"; 1009 Indentation += 2; 1010 if (!Predicates.empty()) { 1011 OS.indent(Indentation) << "switch (Idx) {\n"; 1012 OS.indent(Indentation) 1013 << "default: llvm_unreachable(\"Invalid index!\");\n"; 1014 unsigned Index = 0; 1015 for (const auto &Predicate : Predicates) { 1016 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1017 OS.indent(Indentation + 2) << "return (" << Predicate << ");\n"; 1018 } 1019 OS.indent(Indentation) << "}\n"; 1020 } else { 1021 // No case statement to emit 1022 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 1023 } 1024 Indentation -= 2; 1025 OS.indent(Indentation) << "}\n\n"; 1026 } 1027 1028 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 1029 DecoderSet &Decoders, 1030 unsigned Indentation) const { 1031 // The decoder function is just a big switch statement based on the 1032 // input decoder index. 1033 OS.indent(Indentation) << "template <typename InsnType>\n"; 1034 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 1035 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 1036 OS.indent(Indentation) 1037 << " uint64_t " 1038 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 1039 Indentation += 2; 1040 OS.indent(Indentation) << "DecodeComplete = true;\n"; 1041 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 1042 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 1043 // possible but fall back to an InsnType-sized tmp for truly large fields. 1044 OS.indent(Indentation) << "using TmpType = " 1045 "std::conditional_t<std::is_integral<InsnType>::" 1046 "value, InsnType, uint64_t>;\n"; 1047 OS.indent(Indentation) << "TmpType tmp;\n"; 1048 OS.indent(Indentation) << "switch (Idx) {\n"; 1049 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1050 unsigned Index = 0; 1051 for (const auto &Decoder : Decoders) { 1052 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1053 OS << Decoder; 1054 OS.indent(Indentation + 2) << "return S;\n"; 1055 } 1056 OS.indent(Indentation) << "}\n"; 1057 Indentation -= 2; 1058 OS.indent(Indentation) << "}\n"; 1059 } 1060 1061 // Populates the field of the insn given the start position and the number of 1062 // consecutive bits to scan for. 1063 // 1064 // Returns a pair of values (indicator, field), where the indicator is false 1065 // if there exists any uninitialized bit value in the range and true if all 1066 // bits are well-known. The second value is the potentially populated field. 1067 std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn, 1068 unsigned StartBit, 1069 unsigned NumBits) const { 1070 uint64_t Field = 0; 1071 1072 for (unsigned i = 0; i < NumBits; ++i) { 1073 if (Insn[StartBit + i] == BIT_UNSET) 1074 return {false, Field}; 1075 1076 if (Insn[StartBit + i] == BIT_TRUE) 1077 Field = Field | (1ULL << i); 1078 } 1079 1080 return {true, Field}; 1081 } 1082 1083 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1084 /// filter array as a series of chars. 1085 void FilterChooser::dumpFilterArray( 1086 raw_ostream &o, const std::vector<bit_value_t> &filter) const { 1087 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1088 switch (filter[bitIndex - 1]) { 1089 case BIT_UNFILTERED: 1090 o << "."; 1091 break; 1092 case BIT_UNSET: 1093 o << "_"; 1094 break; 1095 case BIT_TRUE: 1096 o << "1"; 1097 break; 1098 case BIT_FALSE: 1099 o << "0"; 1100 break; 1101 } 1102 } 1103 } 1104 1105 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1106 /// dumpFilterArray on each filter chooser up to the top level one. 1107 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1108 const FilterChooser *current = this; 1109 1110 while (current) { 1111 o << prefix; 1112 dumpFilterArray(o, current->FilterBitValues); 1113 o << '\n'; 1114 current = current->Parent; 1115 } 1116 } 1117 1118 // Calculates the island(s) needed to decode the instruction. 1119 // This returns a list of undecoded bits of an instructions, for example, 1120 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1121 // decoded bits in order to verify that the instruction matches the Opcode. 1122 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1123 std::vector<unsigned> &EndBits, 1124 std::vector<uint64_t> &FieldVals, 1125 const insn_t &Insn) const { 1126 unsigned Num, BitNo; 1127 Num = BitNo = 0; 1128 1129 uint64_t FieldVal = 0; 1130 1131 // 0: Init 1132 // 1: Water (the bit value does not affect decoding) 1133 // 2: Island (well-known bit value needed for decoding) 1134 int State = 0; 1135 1136 for (unsigned i = 0; i < BitWidth; ++i) { 1137 int64_t Val = Value(Insn[i]); 1138 bool Filtered = PositionFiltered(i); 1139 switch (State) { 1140 default: 1141 llvm_unreachable("Unreachable code!"); 1142 case 0: 1143 case 1: 1144 if (Filtered || Val == -1) 1145 State = 1; // Still in Water 1146 else { 1147 State = 2; // Into the Island 1148 BitNo = 0; 1149 StartBits.push_back(i); 1150 FieldVal = Val; 1151 } 1152 break; 1153 case 2: 1154 if (Filtered || Val == -1) { 1155 State = 1; // Into the Water 1156 EndBits.push_back(i - 1); 1157 FieldVals.push_back(FieldVal); 1158 ++Num; 1159 } else { 1160 State = 2; // Still in Island 1161 ++BitNo; 1162 FieldVal = FieldVal | Val << BitNo; 1163 } 1164 break; 1165 } 1166 } 1167 // If we are still in Island after the loop, do some housekeeping. 1168 if (State == 2) { 1169 EndBits.push_back(BitWidth - 1); 1170 FieldVals.push_back(FieldVal); 1171 ++Num; 1172 } 1173 1174 assert(StartBits.size() == Num && EndBits.size() == Num && 1175 FieldVals.size() == Num); 1176 return Num; 1177 } 1178 1179 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1180 const OperandInfo &OpInfo, 1181 bool &OpHasCompleteDecoder) const { 1182 const std::string &Decoder = OpInfo.Decoder; 1183 1184 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1185 1186 if (UseInsertBits) { 1187 o.indent(Indentation) << "tmp = 0x"; 1188 o.write_hex(OpInfo.InitValue); 1189 o << ";\n"; 1190 } 1191 1192 for (const EncodingField &EF : OpInfo) { 1193 o.indent(Indentation); 1194 if (UseInsertBits) 1195 o << "insertBits(tmp, "; 1196 else 1197 o << "tmp = "; 1198 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1199 if (UseInsertBits) 1200 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1201 else if (EF.Offset != 0) 1202 o << " << " << EF.Offset; 1203 o << ";\n"; 1204 } 1205 1206 if (Decoder != "") { 1207 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1208 o.indent(Indentation) << "if (!Check(S, " << Decoder 1209 << "(MI, tmp, Address, Decoder))) { " 1210 << (OpHasCompleteDecoder ? "" 1211 : "DecodeComplete = false; ") 1212 << "return MCDisassembler::Fail; }\n"; 1213 } else { 1214 OpHasCompleteDecoder = true; 1215 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1216 } 1217 } 1218 1219 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1220 unsigned Opc, bool &HasCompleteDecoder) const { 1221 HasCompleteDecoder = true; 1222 1223 for (const auto &Op : Operands.find(Opc)->second) { 1224 // If a custom instruction decoder was specified, use that. 1225 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1226 HasCompleteDecoder = Op.HasCompleteDecoder; 1227 OS.indent(Indentation) 1228 << "if (!Check(S, " << Op.Decoder 1229 << "(MI, insn, Address, Decoder))) { " 1230 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1231 << "return MCDisassembler::Fail; }\n"; 1232 break; 1233 } 1234 1235 bool OpHasCompleteDecoder; 1236 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1237 if (!OpHasCompleteDecoder) 1238 HasCompleteDecoder = false; 1239 } 1240 } 1241 1242 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 1243 bool &HasCompleteDecoder) const { 1244 // Build up the predicate string. 1245 SmallString<256> Decoder; 1246 // FIXME: emitDecoder() function can take a buffer directly rather than 1247 // a stream. 1248 raw_svector_ostream S(Decoder); 1249 unsigned I = 4; 1250 emitDecoder(S, I, Opc, HasCompleteDecoder); 1251 1252 // Using the full decoder string as the key value here is a bit 1253 // heavyweight, but is effective. If the string comparisons become a 1254 // performance concern, we can implement a mangling of the predicate 1255 // data easily enough with a map back to the actual string. That's 1256 // overkill for now, though. 1257 1258 // Make sure the predicate is in the table. 1259 Decoders.insert(CachedHashString(Decoder)); 1260 // Now figure out the index for when we write out the table. 1261 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1262 return (unsigned)(P - Decoders.begin()); 1263 } 1264 1265 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1266 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1267 raw_ostream &OS) const { 1268 if (const auto *D = dyn_cast<DefInit>(&Val)) { 1269 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1270 return true; 1271 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1272 << "]"; 1273 return false; 1274 } 1275 if (const auto *D = dyn_cast<DagInit>(&Val)) { 1276 std::string Op = D->getOperator()->getAsString(); 1277 if (Op == "not" && D->getNumArgs() == 1) { 1278 OS << '!'; 1279 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1280 } 1281 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1282 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1283 if (Paren) 1284 OS << '('; 1285 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1286 for (auto *Arg : D->getArgs()) { 1287 OS << LS; 1288 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1289 return true; 1290 } 1291 if (Paren) 1292 OS << ')'; 1293 return false; 1294 } 1295 } 1296 return true; 1297 } 1298 1299 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1300 unsigned Opc) const { 1301 ListInit *Predicates = 1302 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1303 bool IsFirstEmission = true; 1304 for (unsigned i = 0; i < Predicates->size(); ++i) { 1305 Record *Pred = Predicates->getElementAsRecord(i); 1306 if (!Pred->getValue("AssemblerMatcherPredicate")) 1307 continue; 1308 1309 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1310 continue; 1311 1312 if (!IsFirstEmission) 1313 o << " && "; 1314 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1315 Predicates->size() > 1, o)) 1316 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1317 IsFirstEmission = false; 1318 } 1319 return !Predicates->empty(); 1320 } 1321 1322 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1323 ListInit *Predicates = 1324 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1325 for (unsigned i = 0; i < Predicates->size(); ++i) { 1326 Record *Pred = Predicates->getElementAsRecord(i); 1327 if (!Pred->getValue("AssemblerMatcherPredicate")) 1328 continue; 1329 1330 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1331 return true; 1332 } 1333 return false; 1334 } 1335 1336 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1337 StringRef Predicate) const { 1338 // Using the full predicate string as the key value here is a bit 1339 // heavyweight, but is effective. If the string comparisons become a 1340 // performance concern, we can implement a mangling of the predicate 1341 // data easily enough with a map back to the actual string. That's 1342 // overkill for now, though. 1343 1344 // Make sure the predicate is in the table. 1345 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1346 // Now figure out the index for when we write out the table. 1347 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1348 return (unsigned)(P - TableInfo.Predicates.begin()); 1349 } 1350 1351 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1352 unsigned Opc) const { 1353 if (!doesOpcodeNeedPredicate(Opc)) 1354 return; 1355 1356 // Build up the predicate string. 1357 SmallString<256> Predicate; 1358 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1359 // than a stream. 1360 raw_svector_ostream PS(Predicate); 1361 unsigned I = 0; 1362 emitPredicateMatch(PS, I, Opc); 1363 1364 // Figure out the index into the predicate table for the predicate just 1365 // computed. 1366 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1367 SmallString<16> PBytes; 1368 raw_svector_ostream S(PBytes); 1369 encodeULEB128(PIdx, S); 1370 1371 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1372 // Predicate index. 1373 for (const auto PB : PBytes) 1374 TableInfo.Table.push_back(PB); 1375 // Push location for NumToSkip backpatching. 1376 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1377 TableInfo.Table.push_back(0); 1378 TableInfo.Table.push_back(0); 1379 TableInfo.Table.push_back(0); 1380 } 1381 1382 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1383 unsigned Opc) const { 1384 const Record *EncodingDef = AllInstructions[Opc].EncodingDef; 1385 const RecordVal *RV = EncodingDef->getValue("SoftFail"); 1386 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1387 1388 if (!SFBits) 1389 return; 1390 BitsInit *InstBits = EncodingDef->getValueAsBitsInit("Inst"); 1391 1392 APInt PositiveMask(BitWidth, 0ULL); 1393 APInt NegativeMask(BitWidth, 0ULL); 1394 for (unsigned i = 0; i < BitWidth; ++i) { 1395 bit_value_t B = bitFromBits(*SFBits, i); 1396 bit_value_t IB = bitFromBits(*InstBits, i); 1397 1398 if (B != BIT_TRUE) 1399 continue; 1400 1401 switch (IB) { 1402 case BIT_FALSE: 1403 // The bit is meant to be false, so emit a check to see if it is true. 1404 PositiveMask.setBit(i); 1405 break; 1406 case BIT_TRUE: 1407 // The bit is meant to be true, so emit a check to see if it is false. 1408 NegativeMask.setBit(i); 1409 break; 1410 default: 1411 // The bit is not set; this must be an error! 1412 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1413 << AllInstructions[Opc] << " is set but Inst{" << i 1414 << "} is unset!\n" 1415 << " - You can only mark a bit as SoftFail if it is fully defined" 1416 << " (1/0 - not '?') in Inst\n"; 1417 return; 1418 } 1419 } 1420 1421 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1422 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1423 1424 if (!NeedPositiveMask && !NeedNegativeMask) 1425 return; 1426 1427 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1428 1429 SmallString<16> MaskBytes; 1430 raw_svector_ostream S(MaskBytes); 1431 if (NeedPositiveMask) { 1432 encodeULEB128(PositiveMask.getZExtValue(), S); 1433 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1434 TableInfo.Table.push_back(MaskBytes[i]); 1435 } else 1436 TableInfo.Table.push_back(0); 1437 if (NeedNegativeMask) { 1438 MaskBytes.clear(); 1439 encodeULEB128(NegativeMask.getZExtValue(), S); 1440 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1441 TableInfo.Table.push_back(MaskBytes[i]); 1442 } else 1443 TableInfo.Table.push_back(0); 1444 } 1445 1446 // Emits table entries to decode the singleton. 1447 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1448 EncodingIDAndOpcode Opc) const { 1449 std::vector<unsigned> StartBits; 1450 std::vector<unsigned> EndBits; 1451 std::vector<uint64_t> FieldVals; 1452 insn_t Insn; 1453 insnWithID(Insn, Opc.EncodingID); 1454 1455 // Look for islands of undecoded bits of the singleton. 1456 getIslands(StartBits, EndBits, FieldVals, Insn); 1457 1458 unsigned Size = StartBits.size(); 1459 1460 // Emit the predicate table entry if one is needed. 1461 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1462 1463 // Check any additional encoding fields needed. 1464 for (unsigned I = Size; I != 0; --I) { 1465 unsigned NumBits = EndBits[I - 1] - StartBits[I - 1] + 1; 1466 assert((NumBits < (1u << 8)) && "NumBits overflowed uint8 table entry!"); 1467 TableInfo.Table.push_back(MCD::OPC_CheckField); 1468 uint8_t Buffer[16], *P; 1469 encodeULEB128(StartBits[I - 1], Buffer); 1470 for (P = Buffer; *P >= 128; ++P) 1471 TableInfo.Table.push_back(*P); 1472 TableInfo.Table.push_back(*P); 1473 TableInfo.Table.push_back(NumBits); 1474 encodeULEB128(FieldVals[I - 1], Buffer); 1475 for (P = Buffer; *P >= 128; ++P) 1476 TableInfo.Table.push_back(*P); 1477 TableInfo.Table.push_back(*P); 1478 // Push location for NumToSkip backpatching. 1479 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1480 // The fixup is always 24-bits, so go ahead and allocate the space 1481 // in the table so all our relative position calculations work OK even 1482 // before we fully resolve the real value here. 1483 TableInfo.Table.push_back(0); 1484 TableInfo.Table.push_back(0); 1485 TableInfo.Table.push_back(0); 1486 } 1487 1488 // Check for soft failure of the match. 1489 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1490 1491 bool HasCompleteDecoder; 1492 unsigned DIdx = 1493 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1494 1495 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1496 // whether the instruction decoder is complete or not. If it is complete 1497 // then it handles all possible values of remaining variable/unfiltered bits 1498 // and for any value can determine if the bitpattern is a valid instruction 1499 // or not. This means OPC_Decode will be the final step in the decoding 1500 // process. If it is not complete, then the Fail return code from the 1501 // decoder method indicates that additional processing should be done to see 1502 // if there is any other instruction that also matches the bitpattern and 1503 // can decode it. 1504 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode 1505 : MCD::OPC_TryDecode); 1506 NumEncodingsSupported++; 1507 uint8_t Buffer[16], *p; 1508 encodeULEB128(Opc.Opcode, Buffer); 1509 for (p = Buffer; *p >= 128; ++p) 1510 TableInfo.Table.push_back(*p); 1511 TableInfo.Table.push_back(*p); 1512 1513 SmallString<16> Bytes; 1514 raw_svector_ostream S(Bytes); 1515 encodeULEB128(DIdx, S); 1516 1517 // Decoder index. 1518 for (const auto B : Bytes) 1519 TableInfo.Table.push_back(B); 1520 1521 if (!HasCompleteDecoder) { 1522 // Push location for NumToSkip backpatching. 1523 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1524 // Allocate the space for the fixup. 1525 TableInfo.Table.push_back(0); 1526 TableInfo.Table.push_back(0); 1527 TableInfo.Table.push_back(0); 1528 } 1529 } 1530 1531 // Emits table entries to decode the singleton, and then to decode the rest. 1532 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1533 const Filter &Best) const { 1534 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1535 1536 // complex singletons need predicate checks from the first singleton 1537 // to refer forward to the variable filterchooser that follows. 1538 TableInfo.FixupStack.emplace_back(); 1539 1540 emitSingletonTableEntry(TableInfo, Opc); 1541 1542 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1543 TableInfo.Table.size()); 1544 TableInfo.FixupStack.pop_back(); 1545 1546 Best.getVariableFC().emitTableEntries(TableInfo); 1547 } 1548 1549 // Assign a single filter and run with it. Top level API client can initialize 1550 // with a single filter to start the filtering process. 1551 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1552 bool mixed) { 1553 Filters.clear(); 1554 Filters.emplace_back(*this, startBit, numBit, true); 1555 BestIndex = 0; // Sole Filter instance to choose from. 1556 bestFilter().recurse(); 1557 } 1558 1559 // reportRegion is a helper function for filterProcessor to mark a region as 1560 // eligible for use as a filter region. 1561 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1562 unsigned BitIndex, bool AllowMixed) { 1563 if (RA == ATTR_MIXED && AllowMixed) 1564 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1565 else if (RA == ATTR_ALL_SET && !AllowMixed) 1566 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1567 } 1568 1569 // FilterProcessor scans the well-known encoding bits of the instructions and 1570 // builds up a list of candidate filters. It chooses the best filter and 1571 // recursively descends down the decoding tree. 1572 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1573 Filters.clear(); 1574 BestIndex = -1; 1575 unsigned numInstructions = Opcodes.size(); 1576 1577 assert(numInstructions && "Filter created with no instructions"); 1578 1579 // No further filtering is necessary. 1580 if (numInstructions == 1) 1581 return true; 1582 1583 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1584 // instructions is 3. 1585 if (AllowMixed && !Greedy) { 1586 assert(numInstructions == 3); 1587 1588 for (const auto &Opcode : Opcodes) { 1589 std::vector<unsigned> StartBits; 1590 std::vector<unsigned> EndBits; 1591 std::vector<uint64_t> FieldVals; 1592 insn_t Insn; 1593 1594 insnWithID(Insn, Opcode.EncodingID); 1595 1596 // Look for islands of undecoded bits of any instruction. 1597 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1598 // Found an instruction with island(s). Now just assign a filter. 1599 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1600 return true; 1601 } 1602 } 1603 } 1604 1605 unsigned BitIndex; 1606 1607 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1608 // The automaton consumes the corresponding bit from each 1609 // instruction. 1610 // 1611 // Input symbols: 0, 1, and _ (unset). 1612 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1613 // Initial state: NONE. 1614 // 1615 // (NONE) ------- [01] -> (ALL_SET) 1616 // (NONE) ------- _ ----> (ALL_UNSET) 1617 // (ALL_SET) ---- [01] -> (ALL_SET) 1618 // (ALL_SET) ---- _ ----> (MIXED) 1619 // (ALL_UNSET) -- [01] -> (MIXED) 1620 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1621 // (MIXED) ------ . ----> (MIXED) 1622 // (FILTERED)---- . ----> (FILTERED) 1623 1624 std::vector<bitAttr_t> bitAttrs; 1625 1626 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1627 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1628 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1629 if (FilterBitValues[BitIndex] == BIT_TRUE || 1630 FilterBitValues[BitIndex] == BIT_FALSE) 1631 bitAttrs.push_back(ATTR_FILTERED); 1632 else 1633 bitAttrs.push_back(ATTR_NONE); 1634 1635 for (const auto &OpcPair : Opcodes) { 1636 insn_t insn; 1637 1638 insnWithID(insn, OpcPair.EncodingID); 1639 1640 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1641 switch (bitAttrs[BitIndex]) { 1642 case ATTR_NONE: 1643 if (insn[BitIndex] == BIT_UNSET) 1644 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1645 else 1646 bitAttrs[BitIndex] = ATTR_ALL_SET; 1647 break; 1648 case ATTR_ALL_SET: 1649 if (insn[BitIndex] == BIT_UNSET) 1650 bitAttrs[BitIndex] = ATTR_MIXED; 1651 break; 1652 case ATTR_ALL_UNSET: 1653 if (insn[BitIndex] != BIT_UNSET) 1654 bitAttrs[BitIndex] = ATTR_MIXED; 1655 break; 1656 case ATTR_MIXED: 1657 case ATTR_FILTERED: 1658 break; 1659 } 1660 } 1661 } 1662 1663 // The regionAttr automaton consumes the bitAttrs automatons' state, 1664 // lowest-to-highest. 1665 // 1666 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1667 // States: NONE, ALL_SET, MIXED 1668 // Initial state: NONE 1669 // 1670 // (NONE) ----- F --> (NONE) 1671 // (NONE) ----- S --> (ALL_SET) ; and set region start 1672 // (NONE) ----- U --> (NONE) 1673 // (NONE) ----- M --> (MIXED) ; and set region start 1674 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1675 // (ALL_SET) -- S --> (ALL_SET) 1676 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1677 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1678 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1679 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1680 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1681 // (MIXED) ---- M --> (MIXED) 1682 1683 bitAttr_t RA = ATTR_NONE; 1684 unsigned StartBit = 0; 1685 1686 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1687 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1688 1689 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1690 1691 switch (RA) { 1692 case ATTR_NONE: 1693 switch (bitAttr) { 1694 case ATTR_FILTERED: 1695 break; 1696 case ATTR_ALL_SET: 1697 StartBit = BitIndex; 1698 RA = ATTR_ALL_SET; 1699 break; 1700 case ATTR_ALL_UNSET: 1701 break; 1702 case ATTR_MIXED: 1703 StartBit = BitIndex; 1704 RA = ATTR_MIXED; 1705 break; 1706 default: 1707 llvm_unreachable("Unexpected bitAttr!"); 1708 } 1709 break; 1710 case ATTR_ALL_SET: 1711 switch (bitAttr) { 1712 case ATTR_FILTERED: 1713 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1714 RA = ATTR_NONE; 1715 break; 1716 case ATTR_ALL_SET: 1717 break; 1718 case ATTR_ALL_UNSET: 1719 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1720 RA = ATTR_NONE; 1721 break; 1722 case ATTR_MIXED: 1723 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1724 StartBit = BitIndex; 1725 RA = ATTR_MIXED; 1726 break; 1727 default: 1728 llvm_unreachable("Unexpected bitAttr!"); 1729 } 1730 break; 1731 case ATTR_MIXED: 1732 switch (bitAttr) { 1733 case ATTR_FILTERED: 1734 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1735 StartBit = BitIndex; 1736 RA = ATTR_NONE; 1737 break; 1738 case ATTR_ALL_SET: 1739 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1740 StartBit = BitIndex; 1741 RA = ATTR_ALL_SET; 1742 break; 1743 case ATTR_ALL_UNSET: 1744 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1745 RA = ATTR_NONE; 1746 break; 1747 case ATTR_MIXED: 1748 break; 1749 default: 1750 llvm_unreachable("Unexpected bitAttr!"); 1751 } 1752 break; 1753 case ATTR_ALL_UNSET: 1754 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1755 case ATTR_FILTERED: 1756 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1757 } 1758 } 1759 1760 // At the end, if we're still in ALL_SET or MIXED states, report a region 1761 switch (RA) { 1762 case ATTR_NONE: 1763 break; 1764 case ATTR_FILTERED: 1765 break; 1766 case ATTR_ALL_SET: 1767 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1768 break; 1769 case ATTR_ALL_UNSET: 1770 break; 1771 case ATTR_MIXED: 1772 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1773 break; 1774 } 1775 1776 // We have finished with the filter processings. Now it's time to choose 1777 // the best performing filter. 1778 BestIndex = 0; 1779 bool AllUseless = true; 1780 unsigned BestScore = 0; 1781 1782 for (const auto &[Idx, Filter] : enumerate(Filters)) { 1783 unsigned Usefulness = Filter.usefulness(); 1784 1785 if (Usefulness) 1786 AllUseless = false; 1787 1788 if (Usefulness > BestScore) { 1789 BestIndex = Idx; 1790 BestScore = Usefulness; 1791 } 1792 } 1793 1794 if (!AllUseless) 1795 bestFilter().recurse(); 1796 1797 return !AllUseless; 1798 } // end of FilterChooser::filterProcessor(bool) 1799 1800 // Decides on the best configuration of filter(s) to use in order to decode 1801 // the instructions. A conflict of instructions may occur, in which case we 1802 // dump the conflict set to the standard error. 1803 void FilterChooser::doFilter() { 1804 unsigned Num = Opcodes.size(); 1805 assert(Num && "FilterChooser created with no instructions"); 1806 1807 // Try regions of consecutive known bit values first. 1808 if (filterProcessor(false)) 1809 return; 1810 1811 // Then regions of mixed bits (both known and unitialized bit values allowed). 1812 if (filterProcessor(true)) 1813 return; 1814 1815 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1816 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1817 // well-known encoding pattern. In such case, we backtrack and scan for the 1818 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1819 if (Num == 3 && filterProcessor(true, false)) 1820 return; 1821 1822 // If we come to here, the instruction decoding has failed. 1823 // Set the BestIndex to -1 to indicate so. 1824 BestIndex = -1; 1825 } 1826 1827 // emitTableEntries - Emit state machine entries to decode our share of 1828 // instructions. 1829 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1830 if (Opcodes.size() == 1) { 1831 // There is only one instruction in the set, which is great! 1832 // Call emitSingletonDecoder() to see whether there are any remaining 1833 // encodings bits. 1834 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1835 return; 1836 } 1837 1838 // Choose the best filter to do the decodings! 1839 if (BestIndex != -1) { 1840 const Filter &Best = Filters[BestIndex]; 1841 if (Best.getNumFiltered() == 1) 1842 emitSingletonTableEntry(TableInfo, Best); 1843 else 1844 Best.emitTableEntry(TableInfo); 1845 return; 1846 } 1847 1848 // We don't know how to decode these instructions! Dump the 1849 // conflict set and bail. 1850 1851 // Print out useful conflict information for postmortem analysis. 1852 errs() << "Decoding Conflict:\n"; 1853 1854 dumpStack(errs(), "\t\t"); 1855 1856 for (auto Opcode : Opcodes) { 1857 errs() << '\t'; 1858 emitNameWithID(errs(), Opcode.EncodingID); 1859 errs() << " "; 1860 dumpBits( 1861 errs(), 1862 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1863 errs() << '\n'; 1864 } 1865 } 1866 1867 static std::string findOperandDecoderMethod(Record *Record) { 1868 std::string Decoder; 1869 1870 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1871 StringInit *String = 1872 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1873 if (String) { 1874 Decoder = std::string(String->getValue()); 1875 if (!Decoder.empty()) 1876 return Decoder; 1877 } 1878 1879 if (Record->isSubClassOf("RegisterOperand")) 1880 // Allows use of a DecoderMethod in referenced RegisterClass if set. 1881 return findOperandDecoderMethod(Record->getValueAsDef("RegClass")); 1882 1883 if (Record->isSubClassOf("RegisterClass")) { 1884 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1885 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1886 Decoder = "DecodePointerLikeRegClass" + 1887 utostr(Record->getValueAsInt("RegClassKind")); 1888 } 1889 1890 return Decoder; 1891 } 1892 1893 OperandInfo getOpInfo(Record *TypeRecord) { 1894 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1895 1896 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1897 BitInit *HasCompleteDecoderBit = 1898 HasCompleteDecoderVal 1899 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1900 : nullptr; 1901 bool HasCompleteDecoder = 1902 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1903 1904 return OperandInfo(Decoder, HasCompleteDecoder); 1905 } 1906 1907 void parseVarLenInstOperand(const Record &Def, 1908 std::vector<OperandInfo> &Operands, 1909 const CodeGenInstruction &CGI) { 1910 1911 const RecordVal *RV = Def.getValue("Inst"); 1912 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1913 SmallVector<int> TiedTo; 1914 1915 for (const auto &[Idx, Op] : enumerate(CGI.Operands)) { 1916 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1917 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1918 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1919 else 1920 Operands.push_back(getOpInfo(Op.Rec)); 1921 1922 int TiedReg = Op.getTiedRegister(); 1923 TiedTo.push_back(-1); 1924 if (TiedReg != -1) { 1925 TiedTo[Idx] = TiedReg; 1926 TiedTo[TiedReg] = Idx; 1927 } 1928 } 1929 1930 unsigned CurrBitPos = 0; 1931 for (const auto &EncodingSegment : VLI) { 1932 unsigned Offset = 0; 1933 StringRef OpName; 1934 1935 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1936 OpName = SI->getValue(); 1937 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1938 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1939 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1940 } 1941 1942 if (!OpName.empty()) { 1943 auto OpSubOpPair = 1944 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1945 OpName); 1946 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1947 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1948 if (!EncodingSegment.CustomDecoder.empty()) 1949 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1950 1951 int TiedReg = TiedTo[OpSubOpPair.first]; 1952 if (TiedReg != -1) { 1953 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1954 std::pair(TiedReg, OpSubOpPair.second)); 1955 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1956 } 1957 } 1958 1959 CurrBitPos += EncodingSegment.BitWidth; 1960 } 1961 } 1962 1963 static void debugDumpRecord(const Record &Rec) { 1964 // Dump the record, so we can see what's going on... 1965 std::string E; 1966 raw_string_ostream S(E); 1967 S << "Dumping record for previous error:\n"; 1968 S << Rec; 1969 PrintNote(E); 1970 } 1971 1972 /// For an operand field named OpName: populate OpInfo.InitValue with the 1973 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1974 /// insert from the decoded instruction. 1975 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1976 std::map<std::string, std::string> &TiedNames, 1977 StringRef OpName, OperandInfo &OpInfo) { 1978 // Some bits of the operand may be required to be 1 depending on the 1979 // instruction's encoding. Collect those bits. 1980 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1981 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1982 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1983 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1984 if (OpBit->getValue()) 1985 OpInfo.InitValue |= 1ULL << I; 1986 1987 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1988 VarInit *Var; 1989 unsigned Offset = 0; 1990 for (; J != Bits.getNumBits(); ++J) { 1991 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1992 if (BJ) { 1993 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1994 if (I == J) 1995 Offset = BJ->getBitNum(); 1996 else if (BJ->getBitNum() != Offset + J - I) 1997 break; 1998 } else { 1999 Var = dyn_cast<VarInit>(Bits.getBit(J)); 2000 } 2001 if (!Var || (Var->getName() != OpName && 2002 Var->getName() != TiedNames[std::string(OpName)])) 2003 break; 2004 } 2005 if (I == J) 2006 ++J; 2007 else 2008 OpInfo.addField(I, J - I, Offset); 2009 } 2010 } 2011 2012 static unsigned 2013 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 2014 const CodeGenInstruction &CGI, unsigned Opc, 2015 std::map<unsigned, std::vector<OperandInfo>> &Operands, 2016 bool IsVarLenInst) { 2017 const Record &Def = *CGI.TheDef; 2018 // If all the bit positions are not specified; do not decode this instruction. 2019 // We are bound to fail! For proper disassembly, the well-known encoding bits 2020 // of the instruction must be fully specified. 2021 2022 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 2023 if (Bits.allInComplete()) 2024 return 0; 2025 2026 std::vector<OperandInfo> InsnOperands; 2027 2028 // If the instruction has specified a custom decoding hook, use that instead 2029 // of trying to auto-generate the decoder. 2030 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 2031 if (InstDecoder != "") { 2032 bool HasCompleteInstDecoder = 2033 EncodingDef.getValueAsBit("hasCompleteDecoder"); 2034 InsnOperands.push_back( 2035 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 2036 Operands[Opc] = InsnOperands; 2037 return Bits.getNumBits(); 2038 } 2039 2040 // Generate a description of the operand of the instruction that we know 2041 // how to decode automatically. 2042 // FIXME: We'll need to have a way to manually override this as needed. 2043 2044 // Gather the outputs/inputs of the instruction, so we can find their 2045 // positions in the encoding. This assumes for now that they appear in the 2046 // MCInst in the order that they're listed. 2047 std::vector<std::pair<Init *, StringRef>> InOutOperands; 2048 DagInit *Out = Def.getValueAsDag("OutOperandList"); 2049 DagInit *In = Def.getValueAsDag("InOperandList"); 2050 for (const auto &[Idx, Arg] : enumerate(Out->getArgs())) 2051 InOutOperands.push_back(std::pair(Arg, Out->getArgNameStr(Idx))); 2052 for (const auto &[Idx, Arg] : enumerate(In->getArgs())) 2053 InOutOperands.push_back(std::pair(Arg, In->getArgNameStr(Idx))); 2054 2055 // Search for tied operands, so that we can correctly instantiate 2056 // operands that are not explicitly represented in the encoding. 2057 std::map<std::string, std::string> TiedNames; 2058 for (const auto &[I, Op] : enumerate(CGI.Operands)) { 2059 for (const auto &[J, CI] : enumerate(Op.Constraints)) { 2060 if (CI.isTied()) { 2061 std::pair<unsigned, unsigned> SO = 2062 CGI.Operands.getSubOperandNumber(CI.getTiedOperand()); 2063 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2064 if (TiedName.empty()) 2065 TiedName = CGI.Operands[SO.first].Name; 2066 std::string MyName = Op.SubOpNames[J]; 2067 if (MyName.empty()) 2068 MyName = Op.Name; 2069 2070 TiedNames[MyName] = TiedName; 2071 TiedNames[TiedName] = MyName; 2072 } 2073 } 2074 } 2075 2076 if (IsVarLenInst) { 2077 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2078 } else { 2079 // For each operand, see if we can figure out where it is encoded. 2080 for (const auto &Op : InOutOperands) { 2081 Init *OpInit = Op.first; 2082 StringRef OpName = Op.second; 2083 2084 // We're ready to find the instruction encoding locations for this 2085 // operand. 2086 2087 // First, find the operand type ("OpInit"), and sub-op names 2088 // ("SubArgDag") if present. 2089 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2090 if (SubArgDag) 2091 OpInit = SubArgDag->getOperator(); 2092 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2093 // Lookup the sub-operands from the operand type record (note that only 2094 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2095 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2096 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2097 : nullptr; 2098 2099 // Lookup the decoder method and construct a new OperandInfo to hold our 2100 // result. 2101 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2102 2103 // If we have named sub-operands... 2104 if (SubArgDag) { 2105 // Then there should not be a custom decoder specified on the top-level 2106 // type. 2107 if (!OpInfo.Decoder.empty()) { 2108 PrintError(EncodingDef.getLoc(), 2109 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2110 OpInit->getAsString() + 2111 "\" with a custom DecoderMethod, but also named " 2112 "sub-operands."); 2113 continue; 2114 } 2115 2116 // Decode each of the sub-ops separately. 2117 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2118 for (const auto &[I, Arg] : enumerate(SubOps->getArgs())) { 2119 StringRef SubOpName = SubArgDag->getArgNameStr(I); 2120 OperandInfo SubOpInfo = getOpInfo(cast<DefInit>(Arg)->getDef()); 2121 2122 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2123 SubOpInfo); 2124 InsnOperands.push_back(SubOpInfo); 2125 } 2126 continue; 2127 } 2128 2129 // Otherwise, if we have an operand with sub-operands, but they aren't 2130 // named... 2131 if (SubOps && OpInfo.Decoder.empty()) { 2132 // If it's a single sub-operand, and no custom decoder, use the decoder 2133 // from the one sub-operand. 2134 if (SubOps->getNumArgs() == 1) 2135 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2136 2137 // If we have multiple sub-ops, there'd better have a custom 2138 // decoder. (Otherwise we don't know how to populate them properly...) 2139 if (SubOps->getNumArgs() > 1) { 2140 PrintError(EncodingDef.getLoc(), 2141 "DecoderEmitter: operand \"" + OpName + 2142 "\" uses MIOperandInfo with multiple ops, but doesn't " 2143 "have a custom decoder!"); 2144 debugDumpRecord(EncodingDef); 2145 continue; 2146 } 2147 } 2148 2149 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2150 // FIXME: it should be an error not to find a definition for a given 2151 // operand, rather than just failing to add it to the resulting 2152 // instruction! (This is a longstanding bug, which will be addressed in an 2153 // upcoming change.) 2154 if (OpInfo.numFields() > 0) 2155 InsnOperands.push_back(OpInfo); 2156 } 2157 } 2158 Operands[Opc] = InsnOperands; 2159 2160 #if 0 2161 LLVM_DEBUG({ 2162 // Dumps the instruction encoding bits. 2163 dumpBits(errs(), Bits); 2164 2165 errs() << '\n'; 2166 2167 // Dumps the list of operand info. 2168 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2169 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2170 const std::string &OperandName = Info.Name; 2171 const Record &OperandDef = *Info.Rec; 2172 2173 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2174 } 2175 }); 2176 #endif 2177 2178 return Bits.getNumBits(); 2179 } 2180 2181 // emitFieldFromInstruction - Emit the templated helper function 2182 // fieldFromInstruction(). 2183 // On Windows we make sure that this function is not inlined when 2184 // using the VS compiler. It has a bug which causes the function 2185 // to be optimized out in some circumstances. See llvm.org/pr38292 2186 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2187 OS << R"( 2188 // Helper functions for extracting fields from encoded instructions. 2189 // InsnType must either be integral or an APInt-like object that must: 2190 // * be default-constructible and copy-constructible 2191 // * be constructible from an APInt (this can be private) 2192 // * Support insertBits(bits, startBit, numBits) 2193 // * Support extractBitsAsZExtValue(numBits, startBit) 2194 // * Support the ~, &, ==, and != operators with other objects of the same type 2195 // * Support the != and bitwise & with uint64_t 2196 // * Support put (<<) to raw_ostream& 2197 template <typename InsnType> 2198 #if defined(_MSC_VER) && !defined(__clang__) 2199 __declspec(noinline) 2200 #endif 2201 static std::enable_if_t<std::is_integral<InsnType>::value, InsnType> 2202 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2203 unsigned numBits) { 2204 assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!"); 2205 assert(startBit + numBits <= (sizeof(InsnType) * 8) && 2206 "Instruction field out of bounds!"); 2207 InsnType fieldMask; 2208 if (numBits == sizeof(InsnType) * 8) 2209 fieldMask = (InsnType)(-1LL); 2210 else 2211 fieldMask = (((InsnType)1 << numBits) - 1) << startBit; 2212 return (insn & fieldMask) >> startBit; 2213 } 2214 2215 template <typename InsnType> 2216 static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t> 2217 fieldFromInstruction(const InsnType &insn, unsigned startBit, 2218 unsigned numBits) { 2219 return insn.extractBitsAsZExtValue(numBits, startBit); 2220 } 2221 )"; 2222 } 2223 2224 // emitInsertBits - Emit the templated helper function insertBits(). 2225 static void emitInsertBits(formatted_raw_ostream &OS) { 2226 OS << R"( 2227 // Helper function for inserting bits extracted from an encoded instruction into 2228 // a field. 2229 template <typename InsnType> 2230 static std::enable_if_t<std::is_integral<InsnType>::value> 2231 insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) { 2232 assert(startBit + numBits <= sizeof field * 8); 2233 field |= (InsnType)bits << startBit; 2234 } 2235 2236 template <typename InsnType> 2237 static std::enable_if_t<!std::is_integral<InsnType>::value> 2238 insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) { 2239 field.insertBits(bits, startBit, numBits); 2240 } 2241 )"; 2242 } 2243 2244 // emitDecodeInstruction - Emit the templated helper function 2245 // decodeInstruction(). 2246 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2247 bool IsVarLenInst) { 2248 OS << R"( 2249 template <typename InsnType> 2250 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, 2251 InsnType insn, uint64_t Address, 2252 const MCDisassembler *DisAsm, 2253 const MCSubtargetInfo &STI)"; 2254 if (IsVarLenInst) { 2255 OS << ",\n " 2256 "llvm::function_ref<void(APInt &, uint64_t)> makeUp"; 2257 } 2258 OS << R"() { 2259 const FeatureBitset &Bits = STI.getFeatureBits(); 2260 2261 const uint8_t *Ptr = DecodeTable; 2262 uint64_t CurFieldValue = 0; 2263 DecodeStatus S = MCDisassembler::Success; 2264 while (true) { 2265 ptrdiff_t Loc = Ptr - DecodeTable; 2266 switch (*Ptr) { 2267 default: 2268 errs() << Loc << ": Unexpected decode table opcode!\n"; 2269 return MCDisassembler::Fail; 2270 case MCD::OPC_ExtractField: { 2271 // Decode the start value. 2272 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr); 2273 unsigned Len = *Ptr++;)"; 2274 if (IsVarLenInst) 2275 OS << "\n makeUp(insn, Start + Len);"; 2276 OS << R"( 2277 CurFieldValue = fieldFromInstruction(insn, Start, Len); 2278 LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", " 2279 << Len << "): " << CurFieldValue << "\n"); 2280 break; 2281 } 2282 case MCD::OPC_FilterValue: { 2283 // Decode the field value. 2284 uint64_t Val = decodeULEB128AndIncUnsafe(++Ptr); 2285 // NumToSkip is a plain 24-bit integer. 2286 unsigned NumToSkip = *Ptr++; 2287 NumToSkip |= (*Ptr++) << 8; 2288 NumToSkip |= (*Ptr++) << 16; 2289 2290 // Perform the filter operation. 2291 if (Val != CurFieldValue) 2292 Ptr += NumToSkip; 2293 LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip 2294 << "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:") 2295 << " continuing at " << (Ptr - DecodeTable) << "\n"); 2296 2297 break; 2298 } 2299 case MCD::OPC_CheckField: { 2300 // Decode the start value. 2301 unsigned Start = decodeULEB128AndIncUnsafe(++Ptr); 2302 unsigned Len = *Ptr;)"; 2303 if (IsVarLenInst) 2304 OS << "\n makeUp(insn, Start + Len);"; 2305 OS << R"( 2306 uint64_t FieldValue = fieldFromInstruction(insn, Start, Len); 2307 // Decode the field value. 2308 unsigned PtrLen = 0; 2309 uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen); 2310 Ptr += PtrLen; 2311 // NumToSkip is a plain 24-bit integer. 2312 unsigned NumToSkip = *Ptr++; 2313 NumToSkip |= (*Ptr++) << 8; 2314 NumToSkip |= (*Ptr++) << 16; 2315 2316 // If the actual and expected values don't match, skip. 2317 if (ExpectedValue != FieldValue) 2318 Ptr += NumToSkip; 2319 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", " 2320 << Len << ", " << ExpectedValue << ", " << NumToSkip 2321 << "): FieldValue = " << FieldValue << ", ExpectedValue = " 2322 << ExpectedValue << ": " 2323 << ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n")); 2324 break; 2325 } 2326 case MCD::OPC_CheckPredicate: { 2327 // Decode the Predicate Index value. 2328 unsigned PIdx = decodeULEB128AndIncUnsafe(++Ptr); 2329 // NumToSkip is a plain 24-bit integer. 2330 unsigned NumToSkip = *Ptr++; 2331 NumToSkip |= (*Ptr++) << 8; 2332 NumToSkip |= (*Ptr++) << 16; 2333 // Check the predicate. 2334 bool Pred; 2335 if (!(Pred = checkDecoderPredicate(PIdx, Bits))) 2336 Ptr += NumToSkip; 2337 (void)Pred; 2338 LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): " 2339 << (Pred ? "PASS\n" : "FAIL\n")); 2340 2341 break; 2342 } 2343 case MCD::OPC_Decode: { 2344 // Decode the Opcode value. 2345 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr); 2346 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); 2347 2348 MI.clear(); 2349 MI.setOpcode(Opc); 2350 bool DecodeComplete;)"; 2351 if (IsVarLenInst) { 2352 OS << "\n unsigned Len = InstrLenTable[Opc];\n" 2353 << " makeUp(insn, Len);"; 2354 } 2355 OS << R"( 2356 S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete); 2357 assert(DecodeComplete); 2358 2359 LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc 2360 << ", using decoder " << DecodeIdx << ": " 2361 << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2362 return S; 2363 } 2364 case MCD::OPC_TryDecode: { 2365 // Decode the Opcode value. 2366 unsigned Opc = decodeULEB128AndIncUnsafe(++Ptr); 2367 unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr); 2368 // NumToSkip is a plain 24-bit integer. 2369 unsigned NumToSkip = *Ptr++; 2370 NumToSkip |= (*Ptr++) << 8; 2371 NumToSkip |= (*Ptr++) << 16; 2372 2373 // Perform the decode operation. 2374 MCInst TmpMI; 2375 TmpMI.setOpcode(Opc); 2376 bool DecodeComplete; 2377 S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete); 2378 LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc 2379 << ", using decoder " << DecodeIdx << ": "); 2380 2381 if (DecodeComplete) { 2382 // Decoding complete. 2383 LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n"); 2384 MI = TmpMI; 2385 return S; 2386 } else { 2387 assert(S == MCDisassembler::Fail); 2388 // If the decoding was incomplete, skip. 2389 Ptr += NumToSkip; 2390 LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n"); 2391 // Reset decode status. This also drops a SoftFail status that could be 2392 // set before the decode attempt. 2393 S = MCDisassembler::Success; 2394 } 2395 break; 2396 } 2397 case MCD::OPC_SoftFail: { 2398 // Decode the mask values. 2399 uint64_t PositiveMask = decodeULEB128AndIncUnsafe(++Ptr); 2400 uint64_t NegativeMask = decodeULEB128AndIncUnsafe(Ptr); 2401 bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0; 2402 if (Fail) 2403 S = MCDisassembler::SoftFail; 2404 LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n")); 2405 break; 2406 } 2407 case MCD::OPC_Fail: { 2408 LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n"); 2409 return MCDisassembler::Fail; 2410 } 2411 } 2412 } 2413 llvm_unreachable("bogosity detected in disassembler state machine!"); 2414 } 2415 2416 )"; 2417 } 2418 2419 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2420 // callers are expected to early-exit in that condition. (Note, the '&' operator 2421 // is correct to propagate the values of this enum; see comment on 'enum 2422 // DecodeStatus'.) 2423 static void emitCheck(formatted_raw_ostream &OS) { 2424 OS << R"( 2425 static bool Check(DecodeStatus &Out, DecodeStatus In) { 2426 Out = static_cast<DecodeStatus>(Out & In); 2427 return Out != MCDisassembler::Fail; 2428 } 2429 2430 )"; 2431 } 2432 2433 // Collect all HwModes referenced by the target for encoding purposes, 2434 // returning a vector of corresponding names. 2435 static void collectHwModesReferencedForEncodings( 2436 const CodeGenHwModes &HWM, std::vector<StringRef> &Names, 2437 NamespacesHwModesMap &NamespacesWithHwModes) { 2438 SmallBitVector BV(HWM.getNumModeIds()); 2439 for (const auto &MS : HWM.getHwModeSelects()) { 2440 for (const HwModeSelect::PairType &P : MS.second.Items) { 2441 if (P.second->isSubClassOf("InstructionEncoding")) { 2442 std::string DecoderNamespace = 2443 std::string(P.second->getValueAsString("DecoderNamespace")); 2444 if (P.first == DefaultMode) { 2445 NamespacesWithHwModes[DecoderNamespace].insert(""); 2446 } else { 2447 NamespacesWithHwModes[DecoderNamespace].insert( 2448 HWM.getMode(P.first).Name); 2449 } 2450 BV.set(P.first); 2451 } 2452 } 2453 } 2454 transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) { 2455 if (M == DefaultMode) 2456 return StringRef(""); 2457 return HWM.getModeName(M, /*IncludeDefault=*/true); 2458 }); 2459 } 2460 2461 static void 2462 handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr, 2463 const std::vector<StringRef> &HwModeNames, 2464 NamespacesHwModesMap &NamespacesWithHwModes, 2465 std::vector<EncodingAndInst> &GlobalEncodings) { 2466 const Record *InstDef = Instr->TheDef; 2467 2468 switch (DecoderEmitterSuppressDuplicates) { 2469 case SUPPRESSION_DISABLE: { 2470 for (StringRef HwModeName : HwModeNames) 2471 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); 2472 break; 2473 } 2474 case SUPPRESSION_LEVEL1: { 2475 std::string DecoderNamespace = 2476 std::string(InstDef->getValueAsString("DecoderNamespace")); 2477 auto It = NamespacesWithHwModes.find(DecoderNamespace); 2478 if (It != NamespacesWithHwModes.end()) { 2479 for (StringRef HwModeName : It->second) 2480 GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); 2481 } else { 2482 // Only emit the encoding once, as it's DecoderNamespace doesn't 2483 // contain any HwModes. 2484 GlobalEncodings.emplace_back(InstDef, Instr, ""); 2485 } 2486 break; 2487 } 2488 case SUPPRESSION_LEVEL2: 2489 GlobalEncodings.emplace_back(InstDef, Instr, ""); 2490 break; 2491 } 2492 } 2493 2494 // Emits disassembler code for instruction decoding. 2495 void DecoderEmitter::run(raw_ostream &o) { 2496 formatted_raw_ostream OS(o); 2497 OS << R"( 2498 #include "llvm/MC/MCInst.h" 2499 #include "llvm/MC/MCSubtargetInfo.h" 2500 #include "llvm/Support/DataTypes.h" 2501 #include "llvm/Support/Debug.h" 2502 #include "llvm/Support/LEB128.h" 2503 #include "llvm/Support/raw_ostream.h" 2504 #include "llvm/TargetParser/SubtargetFeature.h" 2505 #include <assert.h> 2506 2507 namespace llvm { 2508 )"; 2509 2510 emitFieldFromInstruction(OS); 2511 emitInsertBits(OS); 2512 emitCheck(OS); 2513 2514 Target.reverseBitsForLittleEndianEncoding(); 2515 2516 // Parameterize the decoders based on namespace and instruction width. 2517 2518 // First, collect all encoding-related HwModes referenced by the target. 2519 // And establish a mapping table between DecoderNamespace and HwMode. 2520 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2521 const CodeGenHwModes &HWM = Target.getHwModes(); 2522 std::vector<StringRef> HwModeNames; 2523 NamespacesHwModesMap NamespacesWithHwModes; 2524 collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes); 2525 if (HwModeNames.empty()) 2526 HwModeNames.push_back(""); 2527 2528 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2529 NumberedEncodings.reserve(NumberedInstructions.size()); 2530 for (const auto &NumberedInstruction : NumberedInstructions) { 2531 const Record *InstDef = NumberedInstruction->TheDef; 2532 if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) { 2533 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2534 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2535 for (auto &[ModeId, Encoding] : EBM) { 2536 // DecoderTables with DefaultMode should not have any suffix. 2537 if (ModeId == DefaultMode) { 2538 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, ""); 2539 } else { 2540 NumberedEncodings.emplace_back(Encoding, NumberedInstruction, 2541 HWM.getMode(ModeId).Name); 2542 } 2543 } 2544 continue; 2545 } 2546 } 2547 // This instruction is encoded the same on all HwModes. 2548 // According to user needs, provide varying degrees of suppression. 2549 handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames, 2550 NamespacesWithHwModes, NumberedEncodings); 2551 } 2552 for (const auto &NumberedAlias : 2553 RK.getAllDerivedDefinitions("AdditionalEncoding")) 2554 NumberedEncodings.emplace_back( 2555 NumberedAlias, 2556 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2557 2558 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2559 OpcMap; 2560 std::map<unsigned, std::vector<OperandInfo>> Operands; 2561 std::vector<unsigned> InstrLen; 2562 bool IsVarLenInst = Target.hasVariableLengthEncodings(); 2563 unsigned MaxInstLen = 0; 2564 2565 for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) { 2566 const Record *EncodingDef = NumberedEncoding.EncodingDef; 2567 const CodeGenInstruction *Inst = NumberedEncoding.Inst; 2568 const Record *Def = Inst->TheDef; 2569 unsigned Size = EncodingDef->getValueAsInt("Size"); 2570 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2571 Def->getValueAsBit("isPseudo") || 2572 Def->getValueAsBit("isAsmParserOnly") || 2573 Def->getValueAsBit("isCodeGenOnly")) { 2574 NumEncodingsLackingDisasm++; 2575 continue; 2576 } 2577 2578 if (NEI < NumberedInstructions.size()) 2579 NumInstructions++; 2580 NumEncodings++; 2581 2582 if (!Size && !IsVarLenInst) 2583 continue; 2584 2585 if (IsVarLenInst) 2586 InstrLen.resize(NumberedInstructions.size(), 0); 2587 2588 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, NEI, 2589 Operands, IsVarLenInst)) { 2590 if (IsVarLenInst) { 2591 MaxInstLen = std::max(MaxInstLen, Len); 2592 InstrLen[NEI] = Len; 2593 } 2594 std::string DecoderNamespace = 2595 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2596 if (!NumberedEncoding.HwModeName.empty()) 2597 DecoderNamespace += 2598 std::string("_") + NumberedEncoding.HwModeName.str(); 2599 OpcMap[std::pair(DecoderNamespace, Size)].emplace_back( 2600 NEI, Target.getInstrIntValue(Def)); 2601 } else { 2602 NumEncodingsOmitted++; 2603 } 2604 } 2605 2606 DecoderTableInfo TableInfo; 2607 for (const auto &Opc : OpcMap) { 2608 // Emit the decoder for this namespace+width combination. 2609 ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(), 2610 NumberedEncodings.size()); 2611 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2612 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2613 2614 // The decode table is cleared for each top level decoder function. The 2615 // predicates and decoders themselves, however, are shared across all 2616 // decoders to give more opportunities for uniqueing. 2617 TableInfo.Table.clear(); 2618 TableInfo.FixupStack.clear(); 2619 TableInfo.Table.reserve(16384); 2620 TableInfo.FixupStack.emplace_back(); 2621 FC.emitTableEntries(TableInfo); 2622 // Any NumToSkip fixups in the top level scope can resolve to the 2623 // OPC_Fail at the end of the table. 2624 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2625 // Resolve any NumToSkip fixups in the current scope. 2626 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2627 TableInfo.Table.size()); 2628 TableInfo.FixupStack.clear(); 2629 2630 TableInfo.Table.push_back(MCD::OPC_Fail); 2631 2632 // Print the table to the output stream. 2633 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first, 2634 Opc.second); 2635 } 2636 2637 // For variable instruction, we emit a instruction length table 2638 // to let the decoder know how long the instructions are. 2639 // You can see example usage in M68k's disassembler. 2640 if (IsVarLenInst) 2641 emitInstrLenTable(OS, InstrLen); 2642 // Emit the predicate function. 2643 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2644 2645 // Emit the decoder function. 2646 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2647 2648 // Emit the main entry point for the decoder, decodeInstruction(). 2649 emitDecodeInstruction(OS, IsVarLenInst); 2650 2651 OS << "\n} // end namespace llvm\n"; 2652 } 2653 2654 namespace llvm { 2655 2656 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2657 const std::string &PredicateNamespace) { 2658 DecoderEmitter(RK, PredicateNamespace).run(OS); 2659 } 2660 2661 } // end namespace llvm 2662