1 //===---------------- DecoderEmitter.cpp - Decoder Generator --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // It contains the tablegen backend that emits the decoder functions for 10 // targets with fixed/variable length instruction set. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenInstruction.h" 15 #include "CodeGenTarget.h" 16 #include "InfoByHwMode.h" 17 #include "VarLenCodeEmitterGen.h" 18 #include "llvm/ADT/APInt.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/CachedHashString.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/MC/MCDecoderOps.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/FormattedStream.h" 32 #include "llvm/Support/LEB128.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/TableGen/Error.h" 35 #include "llvm/TableGen/Record.h" 36 #include <algorithm> 37 #include <cassert> 38 #include <cstddef> 39 #include <cstdint> 40 #include <map> 41 #include <memory> 42 #include <set> 43 #include <string> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "decoder-emitter" 50 51 namespace { 52 53 STATISTIC(NumEncodings, "Number of encodings considered"); 54 STATISTIC(NumEncodingsLackingDisasm, "Number of encodings without disassembler info"); 55 STATISTIC(NumInstructions, "Number of instructions considered"); 56 STATISTIC(NumEncodingsSupported, "Number of encodings supported"); 57 STATISTIC(NumEncodingsOmitted, "Number of encodings omitted"); 58 59 struct EncodingField { 60 unsigned Base, Width, Offset; 61 EncodingField(unsigned B, unsigned W, unsigned O) 62 : Base(B), Width(W), Offset(O) { } 63 }; 64 65 struct OperandInfo { 66 std::vector<EncodingField> Fields; 67 std::string Decoder; 68 bool HasCompleteDecoder; 69 uint64_t InitValue; 70 71 OperandInfo(std::string D, bool HCD) 72 : Decoder(std::move(D)), HasCompleteDecoder(HCD), InitValue(0) {} 73 74 void addField(unsigned Base, unsigned Width, unsigned Offset) { 75 Fields.push_back(EncodingField(Base, Width, Offset)); 76 } 77 78 unsigned numFields() const { return Fields.size(); } 79 80 typedef std::vector<EncodingField>::const_iterator const_iterator; 81 82 const_iterator begin() const { return Fields.begin(); } 83 const_iterator end() const { return Fields.end(); } 84 }; 85 86 typedef std::vector<uint8_t> DecoderTable; 87 typedef uint32_t DecoderFixup; 88 typedef std::vector<DecoderFixup> FixupList; 89 typedef std::vector<FixupList> FixupScopeList; 90 typedef SmallSetVector<CachedHashString, 16> PredicateSet; 91 typedef SmallSetVector<CachedHashString, 16> DecoderSet; 92 struct DecoderTableInfo { 93 DecoderTable Table; 94 FixupScopeList FixupStack; 95 PredicateSet Predicates; 96 DecoderSet Decoders; 97 }; 98 99 struct EncodingAndInst { 100 const Record *EncodingDef; 101 const CodeGenInstruction *Inst; 102 StringRef HwModeName; 103 104 EncodingAndInst(const Record *EncodingDef, const CodeGenInstruction *Inst, 105 StringRef HwModeName = "") 106 : EncodingDef(EncodingDef), Inst(Inst), HwModeName(HwModeName) {} 107 }; 108 109 struct EncodingIDAndOpcode { 110 unsigned EncodingID; 111 unsigned Opcode; 112 113 EncodingIDAndOpcode() : EncodingID(0), Opcode(0) {} 114 EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode) 115 : EncodingID(EncodingID), Opcode(Opcode) {} 116 }; 117 118 raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { 119 if (Value.EncodingDef != Value.Inst->TheDef) 120 OS << Value.EncodingDef->getName() << ":"; 121 OS << Value.Inst->TheDef->getName(); 122 return OS; 123 } 124 125 class DecoderEmitter { 126 RecordKeeper &RK; 127 std::vector<EncodingAndInst> NumberedEncodings; 128 129 public: 130 DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace) 131 : RK(R), Target(R), PredicateNamespace(std::move(PredicateNamespace)) {} 132 133 // Emit the decoder state machine table. 134 void emitTable(formatted_raw_ostream &o, DecoderTable &Table, 135 unsigned Indentation, unsigned BitWidth, 136 StringRef Namespace) const; 137 void emitInstrLenTable(formatted_raw_ostream &OS, 138 std::vector<unsigned> &InstrLen) const; 139 void emitPredicateFunction(formatted_raw_ostream &OS, 140 PredicateSet &Predicates, 141 unsigned Indentation) const; 142 void emitDecoderFunction(formatted_raw_ostream &OS, 143 DecoderSet &Decoders, 144 unsigned Indentation) const; 145 146 // run - Output the code emitter 147 void run(raw_ostream &o); 148 149 private: 150 CodeGenTarget Target; 151 152 public: 153 std::string PredicateNamespace; 154 }; 155 156 } // end anonymous namespace 157 158 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system 159 // for a bit value. 160 // 161 // BIT_UNFILTERED is used as the init value for a filter position. It is used 162 // only for filter processings. 163 typedef enum { 164 BIT_TRUE, // '1' 165 BIT_FALSE, // '0' 166 BIT_UNSET, // '?' 167 BIT_UNFILTERED // unfiltered 168 } bit_value_t; 169 170 static bool ValueSet(bit_value_t V) { 171 return (V == BIT_TRUE || V == BIT_FALSE); 172 } 173 174 static bool ValueNotSet(bit_value_t V) { 175 return (V == BIT_UNSET); 176 } 177 178 static int Value(bit_value_t V) { 179 return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1); 180 } 181 182 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) { 183 if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index))) 184 return bit->getValue() ? BIT_TRUE : BIT_FALSE; 185 186 // The bit is uninitialized. 187 return BIT_UNSET; 188 } 189 190 // Prints the bit value for each position. 191 static void dumpBits(raw_ostream &o, const BitsInit &bits) { 192 for (unsigned index = bits.getNumBits(); index > 0; --index) { 193 switch (bitFromBits(bits, index - 1)) { 194 case BIT_TRUE: 195 o << "1"; 196 break; 197 case BIT_FALSE: 198 o << "0"; 199 break; 200 case BIT_UNSET: 201 o << "_"; 202 break; 203 default: 204 llvm_unreachable("unexpected return value from bitFromBits"); 205 } 206 } 207 } 208 209 static BitsInit &getBitsField(const Record &def, StringRef str) { 210 const RecordVal *RV = def.getValue(str); 211 if (BitsInit *Bits = dyn_cast<BitsInit>(RV->getValue())) 212 return *Bits; 213 214 // variable length instruction 215 VarLenInst VLI = VarLenInst(cast<DagInit>(RV->getValue()), RV); 216 SmallVector<Init *, 16> Bits; 217 218 for (auto &SI : VLI) { 219 if (const BitsInit *BI = dyn_cast<BitsInit>(SI.Value)) { 220 for (unsigned Idx = 0U; Idx < BI->getNumBits(); ++Idx) { 221 Bits.push_back(BI->getBit(Idx)); 222 } 223 } else if (const BitInit *BI = dyn_cast<BitInit>(SI.Value)) { 224 Bits.push_back(const_cast<BitInit *>(BI)); 225 } else { 226 for (unsigned Idx = 0U; Idx < SI.BitWidth; ++Idx) 227 Bits.push_back(UnsetInit::get(def.getRecords())); 228 } 229 } 230 231 return *BitsInit::get(def.getRecords(), Bits); 232 } 233 234 // Representation of the instruction to work on. 235 typedef std::vector<bit_value_t> insn_t; 236 237 namespace { 238 239 static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -1ULL; 240 241 class FilterChooser; 242 243 /// Filter - Filter works with FilterChooser to produce the decoding tree for 244 /// the ISA. 245 /// 246 /// It is useful to think of a Filter as governing the switch stmts of the 247 /// decoding tree in a certain level. Each case stmt delegates to an inferior 248 /// FilterChooser to decide what further decoding logic to employ, or in another 249 /// words, what other remaining bits to look at. The FilterChooser eventually 250 /// chooses a best Filter to do its job. 251 /// 252 /// This recursive scheme ends when the number of Opcodes assigned to the 253 /// FilterChooser becomes 1 or if there is a conflict. A conflict happens when 254 /// the Filter/FilterChooser combo does not know how to distinguish among the 255 /// Opcodes assigned. 256 /// 257 /// An example of a conflict is 258 /// 259 /// Conflict: 260 /// 111101000.00........00010000.... 261 /// 111101000.00........0001........ 262 /// 1111010...00........0001........ 263 /// 1111010...00.................... 264 /// 1111010......................... 265 /// 1111............................ 266 /// ................................ 267 /// VST4q8a 111101000_00________00010000____ 268 /// VST4q8b 111101000_00________00010000____ 269 /// 270 /// The Debug output shows the path that the decoding tree follows to reach the 271 /// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced 272 /// even registers, while VST4q8b is a vst4 to double-spaced odd registers. 273 /// 274 /// The encoding info in the .td files does not specify this meta information, 275 /// which could have been used by the decoder to resolve the conflict. The 276 /// decoder could try to decode the even/odd register numbering and assign to 277 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a" 278 /// version and return the Opcode since the two have the same Asm format string. 279 class Filter { 280 protected: 281 const FilterChooser *Owner;// points to the FilterChooser who owns this filter 282 unsigned StartBit; // the starting bit position 283 unsigned NumBits; // number of bits to filter 284 bool Mixed; // a mixed region contains both set and unset bits 285 286 // Map of well-known segment value to the set of uid's with that value. 287 std::map<uint64_t, std::vector<EncodingIDAndOpcode>> 288 FilteredInstructions; 289 290 // Set of uid's with non-constant segment values. 291 std::vector<EncodingIDAndOpcode> VariableInstructions; 292 293 // Map of well-known segment value to its delegate. 294 std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap; 295 296 // Number of instructions which fall under FilteredInstructions category. 297 unsigned NumFiltered; 298 299 // Keeps track of the last opcode in the filtered bucket. 300 EncodingIDAndOpcode LastOpcFiltered; 301 302 public: 303 Filter(Filter &&f); 304 Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed); 305 306 ~Filter() = default; 307 308 unsigned getNumFiltered() const { return NumFiltered; } 309 310 EncodingIDAndOpcode getSingletonOpc() const { 311 assert(NumFiltered == 1); 312 return LastOpcFiltered; 313 } 314 315 // Return the filter chooser for the group of instructions without constant 316 // segment values. 317 const FilterChooser &getVariableFC() const { 318 assert(NumFiltered == 1); 319 assert(FilterChooserMap.size() == 1); 320 return *(FilterChooserMap.find(NO_FIXED_SEGMENTS_SENTINEL)->second); 321 } 322 323 // Divides the decoding task into sub tasks and delegates them to the 324 // inferior FilterChooser's. 325 // 326 // A special case arises when there's only one entry in the filtered 327 // instructions. In order to unambiguously decode the singleton, we need to 328 // match the remaining undecoded encoding bits against the singleton. 329 void recurse(); 330 331 // Emit table entries to decode instructions given a segment or segments of 332 // bits. 333 void emitTableEntry(DecoderTableInfo &TableInfo) const; 334 335 // Returns the number of fanout produced by the filter. More fanout implies 336 // the filter distinguishes more categories of instructions. 337 unsigned usefulness() const; 338 }; // end class Filter 339 340 } // end anonymous namespace 341 342 // These are states of our finite state machines used in FilterChooser's 343 // filterProcessor() which produces the filter candidates to use. 344 typedef enum { 345 ATTR_NONE, 346 ATTR_FILTERED, 347 ATTR_ALL_SET, 348 ATTR_ALL_UNSET, 349 ATTR_MIXED 350 } bitAttr_t; 351 352 /// FilterChooser - FilterChooser chooses the best filter among a set of Filters 353 /// in order to perform the decoding of instructions at the current level. 354 /// 355 /// Decoding proceeds from the top down. Based on the well-known encoding bits 356 /// of instructions available, FilterChooser builds up the possible Filters that 357 /// can further the task of decoding by distinguishing among the remaining 358 /// candidate instructions. 359 /// 360 /// Once a filter has been chosen, it is called upon to divide the decoding task 361 /// into sub-tasks and delegates them to its inferior FilterChoosers for further 362 /// processings. 363 /// 364 /// It is useful to think of a Filter as governing the switch stmts of the 365 /// decoding tree. And each case is delegated to an inferior FilterChooser to 366 /// decide what further remaining bits to look at. 367 namespace { 368 369 class FilterChooser { 370 protected: 371 friend class Filter; 372 373 // Vector of codegen instructions to choose our filter. 374 ArrayRef<EncodingAndInst> AllInstructions; 375 376 // Vector of uid's for this filter chooser to work on. 377 // The first member of the pair is the opcode id being decoded, the second is 378 // the opcode id that should be emitted. 379 const std::vector<EncodingIDAndOpcode> &Opcodes; 380 381 // Lookup table for the operand decoding of instructions. 382 const std::map<unsigned, std::vector<OperandInfo>> &Operands; 383 384 // Vector of candidate filters. 385 std::vector<Filter> Filters; 386 387 // Array of bit values passed down from our parent. 388 // Set to all BIT_UNFILTERED's for Parent == NULL. 389 std::vector<bit_value_t> FilterBitValues; 390 391 // Links to the FilterChooser above us in the decoding tree. 392 const FilterChooser *Parent; 393 394 // Index of the best filter from Filters. 395 int BestIndex; 396 397 // Width of instructions 398 unsigned BitWidth; 399 400 // Parent emitter 401 const DecoderEmitter *Emitter; 402 403 public: 404 FilterChooser(ArrayRef<EncodingAndInst> Insts, 405 const std::vector<EncodingIDAndOpcode> &IDs, 406 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 407 unsigned BW, const DecoderEmitter *E) 408 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 409 FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1), 410 BitWidth(BW), Emitter(E) { 411 doFilter(); 412 } 413 414 FilterChooser(ArrayRef<EncodingAndInst> Insts, 415 const std::vector<EncodingIDAndOpcode> &IDs, 416 const std::map<unsigned, std::vector<OperandInfo>> &Ops, 417 const std::vector<bit_value_t> &ParentFilterBitValues, 418 const FilterChooser &parent) 419 : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), 420 FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1), 421 BitWidth(parent.BitWidth), Emitter(parent.Emitter) { 422 doFilter(); 423 } 424 425 FilterChooser(const FilterChooser &) = delete; 426 void operator=(const FilterChooser &) = delete; 427 428 unsigned getBitWidth() const { return BitWidth; } 429 430 protected: 431 // Populates the insn given the uid. 432 void insnWithID(insn_t &Insn, unsigned Opcode) const { 433 BitsInit &Bits = getBitsField(*AllInstructions[Opcode].EncodingDef, "Inst"); 434 Insn.resize(BitWidth > Bits.getNumBits() ? BitWidth : Bits.getNumBits(), 435 BIT_UNSET); 436 // We may have a SoftFail bitmask, which specifies a mask where an encoding 437 // may differ from the value in "Inst" and yet still be valid, but the 438 // disassembler should return SoftFail instead of Success. 439 // 440 // This is used for marking UNPREDICTABLE instructions in the ARM world. 441 const RecordVal *RV = 442 AllInstructions[Opcode].EncodingDef->getValue("SoftFail"); 443 const BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 444 for (unsigned i = 0; i < Bits.getNumBits(); ++i) { 445 if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE) 446 Insn[i] = BIT_UNSET; 447 else 448 Insn[i] = bitFromBits(Bits, i); 449 } 450 } 451 452 // Emit the name of the encoding/instruction pair. 453 void emitNameWithID(raw_ostream &OS, unsigned Opcode) const { 454 const Record *EncodingDef = AllInstructions[Opcode].EncodingDef; 455 const Record *InstDef = AllInstructions[Opcode].Inst->TheDef; 456 if (EncodingDef != InstDef) 457 OS << EncodingDef->getName() << ":"; 458 OS << InstDef->getName(); 459 } 460 461 // Populates the field of the insn given the start position and the number of 462 // consecutive bits to scan for. 463 // 464 // Returns false if there exists any uninitialized bit value in the range. 465 // Returns true, otherwise. 466 bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit, 467 unsigned NumBits) const; 468 469 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 470 /// filter array as a series of chars. 471 void dumpFilterArray(raw_ostream &o, 472 const std::vector<bit_value_t> & filter) const; 473 474 /// dumpStack - dumpStack traverses the filter chooser chain and calls 475 /// dumpFilterArray on each filter chooser up to the top level one. 476 void dumpStack(raw_ostream &o, const char *prefix) const; 477 478 Filter &bestFilter() { 479 assert(BestIndex != -1 && "BestIndex not set"); 480 return Filters[BestIndex]; 481 } 482 483 bool PositionFiltered(unsigned i) const { 484 return ValueSet(FilterBitValues[i]); 485 } 486 487 // Calculates the island(s) needed to decode the instruction. 488 // This returns a lit of undecoded bits of an instructions, for example, 489 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 490 // decoded bits in order to verify that the instruction matches the Opcode. 491 unsigned getIslands(std::vector<unsigned> &StartBits, 492 std::vector<unsigned> &EndBits, 493 std::vector<uint64_t> &FieldVals, 494 const insn_t &Insn) const; 495 496 // Emits code to check the Predicates member of an instruction are true. 497 // Returns true if predicate matches were emitted, false otherwise. 498 bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 499 unsigned Opc) const; 500 bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 501 raw_ostream &OS) const; 502 503 bool doesOpcodeNeedPredicate(unsigned Opc) const; 504 unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const; 505 void emitPredicateTableEntry(DecoderTableInfo &TableInfo, 506 unsigned Opc) const; 507 508 void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 509 unsigned Opc) const; 510 511 // Emits table entries to decode the singleton. 512 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 513 EncodingIDAndOpcode Opc) const; 514 515 // Emits code to decode the singleton, and then to decode the rest. 516 void emitSingletonTableEntry(DecoderTableInfo &TableInfo, 517 const Filter &Best) const; 518 519 void emitBinaryParser(raw_ostream &o, unsigned &Indentation, 520 const OperandInfo &OpInfo, 521 bool &OpHasCompleteDecoder) const; 522 523 void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc, 524 bool &HasCompleteDecoder) const; 525 unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc, 526 bool &HasCompleteDecoder) const; 527 528 // Assign a single filter and run with it. 529 void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed); 530 531 // reportRegion is a helper function for filterProcessor to mark a region as 532 // eligible for use as a filter region. 533 void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex, 534 bool AllowMixed); 535 536 // FilterProcessor scans the well-known encoding bits of the instructions and 537 // builds up a list of candidate filters. It chooses the best filter and 538 // recursively descends down the decoding tree. 539 bool filterProcessor(bool AllowMixed, bool Greedy = true); 540 541 // Decides on the best configuration of filter(s) to use in order to decode 542 // the instructions. A conflict of instructions may occur, in which case we 543 // dump the conflict set to the standard error. 544 void doFilter(); 545 546 public: 547 // emitTableEntries - Emit state machine entries to decode our share of 548 // instructions. 549 void emitTableEntries(DecoderTableInfo &TableInfo) const; 550 }; 551 552 } // end anonymous namespace 553 554 /////////////////////////// 555 // // 556 // Filter Implementation // 557 // // 558 /////////////////////////// 559 560 Filter::Filter(Filter &&f) 561 : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed), 562 FilteredInstructions(std::move(f.FilteredInstructions)), 563 VariableInstructions(std::move(f.VariableInstructions)), 564 FilterChooserMap(std::move(f.FilterChooserMap)), NumFiltered(f.NumFiltered), 565 LastOpcFiltered(f.LastOpcFiltered) { 566 } 567 568 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, 569 bool mixed) 570 : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) { 571 assert(StartBit + NumBits - 1 < Owner->BitWidth); 572 573 NumFiltered = 0; 574 LastOpcFiltered = {0, 0}; 575 576 for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) { 577 insn_t Insn; 578 579 // Populates the insn given the uid. 580 Owner->insnWithID(Insn, Owner->Opcodes[i].EncodingID); 581 582 uint64_t Field; 583 // Scans the segment for possibly well-specified encoding bits. 584 bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits); 585 586 if (ok) { 587 // The encoding bits are well-known. Lets add the uid of the 588 // instruction into the bucket keyed off the constant field value. 589 LastOpcFiltered = Owner->Opcodes[i]; 590 FilteredInstructions[Field].push_back(LastOpcFiltered); 591 ++NumFiltered; 592 } else { 593 // Some of the encoding bit(s) are unspecified. This contributes to 594 // one additional member of "Variable" instructions. 595 VariableInstructions.push_back(Owner->Opcodes[i]); 596 } 597 } 598 599 assert((FilteredInstructions.size() + VariableInstructions.size() > 0) 600 && "Filter returns no instruction categories"); 601 } 602 603 // Divides the decoding task into sub tasks and delegates them to the 604 // inferior FilterChooser's. 605 // 606 // A special case arises when there's only one entry in the filtered 607 // instructions. In order to unambiguously decode the singleton, we need to 608 // match the remaining undecoded encoding bits against the singleton. 609 void Filter::recurse() { 610 // Starts by inheriting our parent filter chooser's filter bit values. 611 std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues); 612 613 if (!VariableInstructions.empty()) { 614 // Conservatively marks each segment position as BIT_UNSET. 615 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) 616 BitValueArray[StartBit + bitIndex] = BIT_UNSET; 617 618 // Delegates to an inferior filter chooser for further processing on this 619 // group of instructions whose segment values are variable. 620 FilterChooserMap.insert(std::make_pair(NO_FIXED_SEGMENTS_SENTINEL, 621 std::make_unique<FilterChooser>(Owner->AllInstructions, 622 VariableInstructions, Owner->Operands, BitValueArray, *Owner))); 623 } 624 625 // No need to recurse for a singleton filtered instruction. 626 // See also Filter::emit*(). 627 if (getNumFiltered() == 1) { 628 assert(FilterChooserMap.size() == 1); 629 return; 630 } 631 632 // Otherwise, create sub choosers. 633 for (const auto &Inst : FilteredInstructions) { 634 635 // Marks all the segment positions with either BIT_TRUE or BIT_FALSE. 636 for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) { 637 if (Inst.first & (1ULL << bitIndex)) 638 BitValueArray[StartBit + bitIndex] = BIT_TRUE; 639 else 640 BitValueArray[StartBit + bitIndex] = BIT_FALSE; 641 } 642 643 // Delegates to an inferior filter chooser for further processing on this 644 // category of instructions. 645 FilterChooserMap.insert(std::make_pair( 646 Inst.first, std::make_unique<FilterChooser>( 647 Owner->AllInstructions, Inst.second, 648 Owner->Operands, BitValueArray, *Owner))); 649 } 650 } 651 652 static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups, 653 uint32_t DestIdx) { 654 // Any NumToSkip fixups in the current scope can resolve to the 655 // current location. 656 for (FixupList::const_reverse_iterator I = Fixups.rbegin(), 657 E = Fixups.rend(); 658 I != E; ++I) { 659 // Calculate the distance from the byte following the fixup entry byte 660 // to the destination. The Target is calculated from after the 16-bit 661 // NumToSkip entry itself, so subtract two from the displacement here 662 // to account for that. 663 uint32_t FixupIdx = *I; 664 uint32_t Delta = DestIdx - FixupIdx - 3; 665 // Our NumToSkip entries are 24-bits. Make sure our table isn't too 666 // big. 667 assert(Delta < (1u << 24)); 668 Table[FixupIdx] = (uint8_t)Delta; 669 Table[FixupIdx + 1] = (uint8_t)(Delta >> 8); 670 Table[FixupIdx + 2] = (uint8_t)(Delta >> 16); 671 } 672 } 673 674 // Emit table entries to decode instructions given a segment or segments 675 // of bits. 676 void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const { 677 TableInfo.Table.push_back(MCD::OPC_ExtractField); 678 TableInfo.Table.push_back(StartBit); 679 TableInfo.Table.push_back(NumBits); 680 681 // A new filter entry begins a new scope for fixup resolution. 682 TableInfo.FixupStack.emplace_back(); 683 684 DecoderTable &Table = TableInfo.Table; 685 686 size_t PrevFilter = 0; 687 bool HasFallthrough = false; 688 for (auto &Filter : FilterChooserMap) { 689 // Field value -1 implies a non-empty set of variable instructions. 690 // See also recurse(). 691 if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) { 692 HasFallthrough = true; 693 694 // Each scope should always have at least one filter value to check 695 // for. 696 assert(PrevFilter != 0 && "empty filter set!"); 697 FixupList &CurScope = TableInfo.FixupStack.back(); 698 // Resolve any NumToSkip fixups in the current scope. 699 resolveTableFixups(Table, CurScope, Table.size()); 700 CurScope.clear(); 701 PrevFilter = 0; // Don't re-process the filter's fallthrough. 702 } else { 703 Table.push_back(MCD::OPC_FilterValue); 704 // Encode and emit the value to filter against. 705 uint8_t Buffer[16]; 706 unsigned Len = encodeULEB128(Filter.first, Buffer); 707 Table.insert(Table.end(), Buffer, Buffer + Len); 708 // Reserve space for the NumToSkip entry. We'll backpatch the value 709 // later. 710 PrevFilter = Table.size(); 711 Table.push_back(0); 712 Table.push_back(0); 713 Table.push_back(0); 714 } 715 716 // We arrive at a category of instructions with the same segment value. 717 // Now delegate to the sub filter chooser for further decodings. 718 // The case may fallthrough, which happens if the remaining well-known 719 // encoding bits do not match exactly. 720 Filter.second->emitTableEntries(TableInfo); 721 722 // Now that we've emitted the body of the handler, update the NumToSkip 723 // of the filter itself to be able to skip forward when false. Subtract 724 // two as to account for the width of the NumToSkip field itself. 725 if (PrevFilter) { 726 uint32_t NumToSkip = Table.size() - PrevFilter - 3; 727 assert(NumToSkip < (1u << 24) && "disassembler decoding table too large!"); 728 Table[PrevFilter] = (uint8_t)NumToSkip; 729 Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8); 730 Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16); 731 } 732 } 733 734 // Any remaining unresolved fixups bubble up to the parent fixup scope. 735 assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!"); 736 FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1; 737 FixupScopeList::iterator Dest = Source - 1; 738 llvm::append_range(*Dest, *Source); 739 TableInfo.FixupStack.pop_back(); 740 741 // If there is no fallthrough, then the final filter should get fixed 742 // up according to the enclosing scope rather than the current position. 743 if (!HasFallthrough) 744 TableInfo.FixupStack.back().push_back(PrevFilter); 745 } 746 747 // Returns the number of fanout produced by the filter. More fanout implies 748 // the filter distinguishes more categories of instructions. 749 unsigned Filter::usefulness() const { 750 if (!VariableInstructions.empty()) 751 return FilteredInstructions.size(); 752 else 753 return FilteredInstructions.size() + 1; 754 } 755 756 ////////////////////////////////// 757 // // 758 // Filterchooser Implementation // 759 // // 760 ////////////////////////////////// 761 762 // Emit the decoder state machine table. 763 void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table, 764 unsigned Indentation, unsigned BitWidth, 765 StringRef Namespace) const { 766 OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace 767 << BitWidth << "[] = {\n"; 768 769 Indentation += 2; 770 771 // FIXME: We may be able to use the NumToSkip values to recover 772 // appropriate indentation levels. 773 DecoderTable::const_iterator I = Table.begin(); 774 DecoderTable::const_iterator E = Table.end(); 775 while (I != E) { 776 assert (I < E && "incomplete decode table entry!"); 777 778 uint64_t Pos = I - Table.begin(); 779 OS << "/* " << Pos << " */"; 780 OS.PadToColumn(12); 781 782 switch (*I) { 783 default: 784 PrintFatalError("invalid decode table opcode"); 785 case MCD::OPC_ExtractField: { 786 ++I; 787 unsigned Start = *I++; 788 unsigned Len = *I++; 789 OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", " 790 << Len << ", // Inst{"; 791 if (Len > 1) 792 OS << (Start + Len - 1) << "-"; 793 OS << Start << "} ...\n"; 794 break; 795 } 796 case MCD::OPC_FilterValue: { 797 ++I; 798 OS.indent(Indentation) << "MCD::OPC_FilterValue, "; 799 // The filter value is ULEB128 encoded. 800 while (*I >= 128) 801 OS << (unsigned)*I++ << ", "; 802 OS << (unsigned)*I++ << ", "; 803 804 // 24-bit numtoskip value. 805 uint8_t Byte = *I++; 806 uint32_t NumToSkip = Byte; 807 OS << (unsigned)Byte << ", "; 808 Byte = *I++; 809 OS << (unsigned)Byte << ", "; 810 NumToSkip |= Byte << 8; 811 Byte = *I++; 812 OS << utostr(Byte) << ", "; 813 NumToSkip |= Byte << 16; 814 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 815 break; 816 } 817 case MCD::OPC_CheckField: { 818 ++I; 819 unsigned Start = *I++; 820 unsigned Len = *I++; 821 OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", " 822 << Len << ", ";// << Val << ", " << NumToSkip << ",\n"; 823 // ULEB128 encoded field value. 824 for (; *I >= 128; ++I) 825 OS << (unsigned)*I << ", "; 826 OS << (unsigned)*I++ << ", "; 827 // 24-bit numtoskip value. 828 uint8_t Byte = *I++; 829 uint32_t NumToSkip = Byte; 830 OS << (unsigned)Byte << ", "; 831 Byte = *I++; 832 OS << (unsigned)Byte << ", "; 833 NumToSkip |= Byte << 8; 834 Byte = *I++; 835 OS << utostr(Byte) << ", "; 836 NumToSkip |= Byte << 16; 837 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 838 break; 839 } 840 case MCD::OPC_CheckPredicate: { 841 ++I; 842 OS.indent(Indentation) << "MCD::OPC_CheckPredicate, "; 843 for (; *I >= 128; ++I) 844 OS << (unsigned)*I << ", "; 845 OS << (unsigned)*I++ << ", "; 846 847 // 24-bit numtoskip value. 848 uint8_t Byte = *I++; 849 uint32_t NumToSkip = Byte; 850 OS << (unsigned)Byte << ", "; 851 Byte = *I++; 852 OS << (unsigned)Byte << ", "; 853 NumToSkip |= Byte << 8; 854 Byte = *I++; 855 OS << utostr(Byte) << ", "; 856 NumToSkip |= Byte << 16; 857 OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 858 break; 859 } 860 case MCD::OPC_Decode: 861 case MCD::OPC_TryDecode: { 862 bool IsTry = *I == MCD::OPC_TryDecode; 863 ++I; 864 // Extract the ULEB128 encoded Opcode to a buffer. 865 uint8_t Buffer[16], *p = Buffer; 866 while ((*p++ = *I++) >= 128) 867 assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer) 868 && "ULEB128 value too large!"); 869 // Decode the Opcode value. 870 unsigned Opc = decodeULEB128(Buffer); 871 OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "") 872 << "Decode, "; 873 for (p = Buffer; *p >= 128; ++p) 874 OS << (unsigned)*p << ", "; 875 OS << (unsigned)*p << ", "; 876 877 // Decoder index. 878 for (; *I >= 128; ++I) 879 OS << (unsigned)*I << ", "; 880 OS << (unsigned)*I++ << ", "; 881 882 if (!IsTry) { 883 OS << "// Opcode: " << NumberedEncodings[Opc] << "\n"; 884 break; 885 } 886 887 // Fallthrough for OPC_TryDecode. 888 889 // 24-bit numtoskip value. 890 uint8_t Byte = *I++; 891 uint32_t NumToSkip = Byte; 892 OS << (unsigned)Byte << ", "; 893 Byte = *I++; 894 OS << (unsigned)Byte << ", "; 895 NumToSkip |= Byte << 8; 896 Byte = *I++; 897 OS << utostr(Byte) << ", "; 898 NumToSkip |= Byte << 16; 899 900 OS << "// Opcode: " << NumberedEncodings[Opc] 901 << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n"; 902 break; 903 } 904 case MCD::OPC_SoftFail: { 905 ++I; 906 OS.indent(Indentation) << "MCD::OPC_SoftFail"; 907 // Positive mask 908 uint64_t Value = 0; 909 unsigned Shift = 0; 910 do { 911 OS << ", " << (unsigned)*I; 912 Value += (*I & 0x7f) << Shift; 913 Shift += 7; 914 } while (*I++ >= 128); 915 if (Value > 127) { 916 OS << " /* 0x"; 917 OS.write_hex(Value); 918 OS << " */"; 919 } 920 // Negative mask 921 Value = 0; 922 Shift = 0; 923 do { 924 OS << ", " << (unsigned)*I; 925 Value += (*I & 0x7f) << Shift; 926 Shift += 7; 927 } while (*I++ >= 128); 928 if (Value > 127) { 929 OS << " /* 0x"; 930 OS.write_hex(Value); 931 OS << " */"; 932 } 933 OS << ",\n"; 934 break; 935 } 936 case MCD::OPC_Fail: { 937 ++I; 938 OS.indent(Indentation) << "MCD::OPC_Fail,\n"; 939 break; 940 } 941 } 942 } 943 OS.indent(Indentation) << "0\n"; 944 945 Indentation -= 2; 946 947 OS.indent(Indentation) << "};\n\n"; 948 } 949 950 void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS, 951 std::vector<unsigned> &InstrLen) const { 952 OS << "static const uint8_t InstrLenTable[] = {\n"; 953 for (unsigned &Len : InstrLen) { 954 OS << Len << ",\n"; 955 } 956 OS << "};\n\n"; 957 } 958 959 void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS, 960 PredicateSet &Predicates, 961 unsigned Indentation) const { 962 // The predicate function is just a big switch statement based on the 963 // input predicate index. 964 OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, " 965 << "const FeatureBitset &Bits) {\n"; 966 Indentation += 2; 967 if (!Predicates.empty()) { 968 OS.indent(Indentation) << "switch (Idx) {\n"; 969 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 970 unsigned Index = 0; 971 for (const auto &Predicate : Predicates) { 972 OS.indent(Indentation) << "case " << Index++ << ":\n"; 973 OS.indent(Indentation+2) << "return (" << Predicate << ");\n"; 974 } 975 OS.indent(Indentation) << "}\n"; 976 } else { 977 // No case statement to emit 978 OS.indent(Indentation) << "llvm_unreachable(\"Invalid index!\");\n"; 979 } 980 Indentation -= 2; 981 OS.indent(Indentation) << "}\n\n"; 982 } 983 984 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS, 985 DecoderSet &Decoders, 986 unsigned Indentation) const { 987 // The decoder function is just a big switch statement based on the 988 // input decoder index. 989 OS.indent(Indentation) << "template <typename InsnType>\n"; 990 OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S," 991 << " unsigned Idx, InsnType insn, MCInst &MI,\n"; 992 OS.indent(Indentation) 993 << " uint64_t " 994 << "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n"; 995 Indentation += 2; 996 OS.indent(Indentation) << "DecodeComplete = true;\n"; 997 // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits 998 // It would be better for emitBinaryParser to use a 64-bit tmp whenever 999 // possible but fall back to an InsnType-sized tmp for truly large fields. 1000 OS.indent(Indentation) << "using TmpType = " 1001 "std::conditional_t<std::is_integral<InsnType>::" 1002 "value, InsnType, uint64_t>;\n"; 1003 OS.indent(Indentation) << "TmpType tmp;\n"; 1004 OS.indent(Indentation) << "switch (Idx) {\n"; 1005 OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n"; 1006 unsigned Index = 0; 1007 for (const auto &Decoder : Decoders) { 1008 OS.indent(Indentation) << "case " << Index++ << ":\n"; 1009 OS << Decoder; 1010 OS.indent(Indentation+2) << "return S;\n"; 1011 } 1012 OS.indent(Indentation) << "}\n"; 1013 Indentation -= 2; 1014 OS.indent(Indentation) << "}\n\n"; 1015 } 1016 1017 // Populates the field of the insn given the start position and the number of 1018 // consecutive bits to scan for. 1019 // 1020 // Returns false if and on the first uninitialized bit value encountered. 1021 // Returns true, otherwise. 1022 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn, 1023 unsigned StartBit, unsigned NumBits) const { 1024 Field = 0; 1025 1026 for (unsigned i = 0; i < NumBits; ++i) { 1027 if (Insn[StartBit + i] == BIT_UNSET) 1028 return false; 1029 1030 if (Insn[StartBit + i] == BIT_TRUE) 1031 Field = Field | (1ULL << i); 1032 } 1033 1034 return true; 1035 } 1036 1037 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given 1038 /// filter array as a series of chars. 1039 void FilterChooser::dumpFilterArray(raw_ostream &o, 1040 const std::vector<bit_value_t> &filter) const { 1041 for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) { 1042 switch (filter[bitIndex - 1]) { 1043 case BIT_UNFILTERED: 1044 o << "."; 1045 break; 1046 case BIT_UNSET: 1047 o << "_"; 1048 break; 1049 case BIT_TRUE: 1050 o << "1"; 1051 break; 1052 case BIT_FALSE: 1053 o << "0"; 1054 break; 1055 } 1056 } 1057 } 1058 1059 /// dumpStack - dumpStack traverses the filter chooser chain and calls 1060 /// dumpFilterArray on each filter chooser up to the top level one. 1061 void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const { 1062 const FilterChooser *current = this; 1063 1064 while (current) { 1065 o << prefix; 1066 dumpFilterArray(o, current->FilterBitValues); 1067 o << '\n'; 1068 current = current->Parent; 1069 } 1070 } 1071 1072 // Calculates the island(s) needed to decode the instruction. 1073 // This returns a list of undecoded bits of an instructions, for example, 1074 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be 1075 // decoded bits in order to verify that the instruction matches the Opcode. 1076 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits, 1077 std::vector<unsigned> &EndBits, 1078 std::vector<uint64_t> &FieldVals, 1079 const insn_t &Insn) const { 1080 unsigned Num, BitNo; 1081 Num = BitNo = 0; 1082 1083 uint64_t FieldVal = 0; 1084 1085 // 0: Init 1086 // 1: Water (the bit value does not affect decoding) 1087 // 2: Island (well-known bit value needed for decoding) 1088 int State = 0; 1089 1090 for (unsigned i = 0; i < BitWidth; ++i) { 1091 int64_t Val = Value(Insn[i]); 1092 bool Filtered = PositionFiltered(i); 1093 switch (State) { 1094 default: llvm_unreachable("Unreachable code!"); 1095 case 0: 1096 case 1: 1097 if (Filtered || Val == -1) 1098 State = 1; // Still in Water 1099 else { 1100 State = 2; // Into the Island 1101 BitNo = 0; 1102 StartBits.push_back(i); 1103 FieldVal = Val; 1104 } 1105 break; 1106 case 2: 1107 if (Filtered || Val == -1) { 1108 State = 1; // Into the Water 1109 EndBits.push_back(i - 1); 1110 FieldVals.push_back(FieldVal); 1111 ++Num; 1112 } else { 1113 State = 2; // Still in Island 1114 ++BitNo; 1115 FieldVal = FieldVal | Val << BitNo; 1116 } 1117 break; 1118 } 1119 } 1120 // If we are still in Island after the loop, do some housekeeping. 1121 if (State == 2) { 1122 EndBits.push_back(BitWidth - 1); 1123 FieldVals.push_back(FieldVal); 1124 ++Num; 1125 } 1126 1127 assert(StartBits.size() == Num && EndBits.size() == Num && 1128 FieldVals.size() == Num); 1129 return Num; 1130 } 1131 1132 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation, 1133 const OperandInfo &OpInfo, 1134 bool &OpHasCompleteDecoder) const { 1135 const std::string &Decoder = OpInfo.Decoder; 1136 1137 bool UseInsertBits = OpInfo.numFields() != 1 || OpInfo.InitValue != 0; 1138 1139 if (UseInsertBits) { 1140 o.indent(Indentation) << "tmp = 0x"; 1141 o.write_hex(OpInfo.InitValue); 1142 o << ";\n"; 1143 } 1144 1145 for (const EncodingField &EF : OpInfo) { 1146 o.indent(Indentation); 1147 if (UseInsertBits) 1148 o << "insertBits(tmp, "; 1149 else 1150 o << "tmp = "; 1151 o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << ')'; 1152 if (UseInsertBits) 1153 o << ", " << EF.Offset << ", " << EF.Width << ')'; 1154 else if (EF.Offset != 0) 1155 o << " << " << EF.Offset; 1156 o << ";\n"; 1157 } 1158 1159 if (Decoder != "") { 1160 OpHasCompleteDecoder = OpInfo.HasCompleteDecoder; 1161 o.indent(Indentation) << "if (!Check(S, " << Decoder 1162 << "(MI, tmp, Address, Decoder))) { " 1163 << (OpHasCompleteDecoder ? "" 1164 : "DecodeComplete = false; ") 1165 << "return MCDisassembler::Fail; }\n"; 1166 } else { 1167 OpHasCompleteDecoder = true; 1168 o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n"; 1169 } 1170 } 1171 1172 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation, 1173 unsigned Opc, bool &HasCompleteDecoder) const { 1174 HasCompleteDecoder = true; 1175 1176 for (const auto &Op : Operands.find(Opc)->second) { 1177 // If a custom instruction decoder was specified, use that. 1178 if (Op.numFields() == 0 && !Op.Decoder.empty()) { 1179 HasCompleteDecoder = Op.HasCompleteDecoder; 1180 OS.indent(Indentation) 1181 << "if (!Check(S, " << Op.Decoder 1182 << "(MI, insn, Address, Decoder))) { " 1183 << (HasCompleteDecoder ? "" : "DecodeComplete = false; ") 1184 << "return MCDisassembler::Fail; }\n"; 1185 break; 1186 } 1187 1188 bool OpHasCompleteDecoder; 1189 emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder); 1190 if (!OpHasCompleteDecoder) 1191 HasCompleteDecoder = false; 1192 } 1193 } 1194 1195 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, 1196 unsigned Opc, 1197 bool &HasCompleteDecoder) const { 1198 // Build up the predicate string. 1199 SmallString<256> Decoder; 1200 // FIXME: emitDecoder() function can take a buffer directly rather than 1201 // a stream. 1202 raw_svector_ostream S(Decoder); 1203 unsigned I = 4; 1204 emitDecoder(S, I, Opc, HasCompleteDecoder); 1205 1206 // Using the full decoder string as the key value here is a bit 1207 // heavyweight, but is effective. If the string comparisons become a 1208 // performance concern, we can implement a mangling of the predicate 1209 // data easily enough with a map back to the actual string. That's 1210 // overkill for now, though. 1211 1212 // Make sure the predicate is in the table. 1213 Decoders.insert(CachedHashString(Decoder)); 1214 // Now figure out the index for when we write out the table. 1215 DecoderSet::const_iterator P = find(Decoders, Decoder.str()); 1216 return (unsigned)(P - Decoders.begin()); 1217 } 1218 1219 // If ParenIfBinOp is true, print a surrounding () if Val uses && or ||. 1220 bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp, 1221 raw_ostream &OS) const { 1222 if (auto *D = dyn_cast<DefInit>(&Val)) { 1223 if (!D->getDef()->isSubClassOf("SubtargetFeature")) 1224 return true; 1225 OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString() 1226 << "]"; 1227 return false; 1228 } 1229 if (auto *D = dyn_cast<DagInit>(&Val)) { 1230 std::string Op = D->getOperator()->getAsString(); 1231 if (Op == "not" && D->getNumArgs() == 1) { 1232 OS << '!'; 1233 return emitPredicateMatchAux(*D->getArg(0), true, OS); 1234 } 1235 if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) { 1236 bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true); 1237 if (Paren) 1238 OS << '('; 1239 ListSeparator LS(Op == "any_of" ? " || " : " && "); 1240 for (auto *Arg : D->getArgs()) { 1241 OS << LS; 1242 if (emitPredicateMatchAux(*Arg, ParenIfBinOp, OS)) 1243 return true; 1244 } 1245 if (Paren) 1246 OS << ')'; 1247 return false; 1248 } 1249 } 1250 return true; 1251 } 1252 1253 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation, 1254 unsigned Opc) const { 1255 ListInit *Predicates = 1256 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1257 bool IsFirstEmission = true; 1258 for (unsigned i = 0; i < Predicates->size(); ++i) { 1259 Record *Pred = Predicates->getElementAsRecord(i); 1260 if (!Pred->getValue("AssemblerMatcherPredicate")) 1261 continue; 1262 1263 if (!isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1264 continue; 1265 1266 if (!IsFirstEmission) 1267 o << " && "; 1268 if (emitPredicateMatchAux(*Pred->getValueAsDag("AssemblerCondDag"), 1269 Predicates->size() > 1, o)) 1270 PrintFatalError(Pred->getLoc(), "Invalid AssemblerCondDag!"); 1271 IsFirstEmission = false; 1272 } 1273 return !Predicates->empty(); 1274 } 1275 1276 bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const { 1277 ListInit *Predicates = 1278 AllInstructions[Opc].EncodingDef->getValueAsListInit("Predicates"); 1279 for (unsigned i = 0; i < Predicates->size(); ++i) { 1280 Record *Pred = Predicates->getElementAsRecord(i); 1281 if (!Pred->getValue("AssemblerMatcherPredicate")) 1282 continue; 1283 1284 if (isa<DagInit>(Pred->getValue("AssemblerCondDag")->getValue())) 1285 return true; 1286 } 1287 return false; 1288 } 1289 1290 unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo, 1291 StringRef Predicate) const { 1292 // Using the full predicate string as the key value here is a bit 1293 // heavyweight, but is effective. If the string comparisons become a 1294 // performance concern, we can implement a mangling of the predicate 1295 // data easily enough with a map back to the actual string. That's 1296 // overkill for now, though. 1297 1298 // Make sure the predicate is in the table. 1299 TableInfo.Predicates.insert(CachedHashString(Predicate)); 1300 // Now figure out the index for when we write out the table. 1301 PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate); 1302 return (unsigned)(P - TableInfo.Predicates.begin()); 1303 } 1304 1305 void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo, 1306 unsigned Opc) const { 1307 if (!doesOpcodeNeedPredicate(Opc)) 1308 return; 1309 1310 // Build up the predicate string. 1311 SmallString<256> Predicate; 1312 // FIXME: emitPredicateMatch() functions can take a buffer directly rather 1313 // than a stream. 1314 raw_svector_ostream PS(Predicate); 1315 unsigned I = 0; 1316 emitPredicateMatch(PS, I, Opc); 1317 1318 // Figure out the index into the predicate table for the predicate just 1319 // computed. 1320 unsigned PIdx = getPredicateIndex(TableInfo, PS.str()); 1321 SmallString<16> PBytes; 1322 raw_svector_ostream S(PBytes); 1323 encodeULEB128(PIdx, S); 1324 1325 TableInfo.Table.push_back(MCD::OPC_CheckPredicate); 1326 // Predicate index 1327 for (unsigned i = 0, e = PBytes.size(); i != e; ++i) 1328 TableInfo.Table.push_back(PBytes[i]); 1329 // Push location for NumToSkip backpatching. 1330 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1331 TableInfo.Table.push_back(0); 1332 TableInfo.Table.push_back(0); 1333 TableInfo.Table.push_back(0); 1334 } 1335 1336 void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo, 1337 unsigned Opc) const { 1338 const RecordVal *RV = AllInstructions[Opc].EncodingDef->getValue("SoftFail"); 1339 BitsInit *SFBits = RV ? dyn_cast<BitsInit>(RV->getValue()) : nullptr; 1340 1341 if (!SFBits) return; 1342 BitsInit *InstBits = 1343 AllInstructions[Opc].EncodingDef->getValueAsBitsInit("Inst"); 1344 1345 APInt PositiveMask(BitWidth, 0ULL); 1346 APInt NegativeMask(BitWidth, 0ULL); 1347 for (unsigned i = 0; i < BitWidth; ++i) { 1348 bit_value_t B = bitFromBits(*SFBits, i); 1349 bit_value_t IB = bitFromBits(*InstBits, i); 1350 1351 if (B != BIT_TRUE) continue; 1352 1353 switch (IB) { 1354 case BIT_FALSE: 1355 // The bit is meant to be false, so emit a check to see if it is true. 1356 PositiveMask.setBit(i); 1357 break; 1358 case BIT_TRUE: 1359 // The bit is meant to be true, so emit a check to see if it is false. 1360 NegativeMask.setBit(i); 1361 break; 1362 default: 1363 // The bit is not set; this must be an error! 1364 errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " 1365 << AllInstructions[Opc] << " is set but Inst{" << i 1366 << "} is unset!\n" 1367 << " - You can only mark a bit as SoftFail if it is fully defined" 1368 << " (1/0 - not '?') in Inst\n"; 1369 return; 1370 } 1371 } 1372 1373 bool NeedPositiveMask = PositiveMask.getBoolValue(); 1374 bool NeedNegativeMask = NegativeMask.getBoolValue(); 1375 1376 if (!NeedPositiveMask && !NeedNegativeMask) 1377 return; 1378 1379 TableInfo.Table.push_back(MCD::OPC_SoftFail); 1380 1381 SmallString<16> MaskBytes; 1382 raw_svector_ostream S(MaskBytes); 1383 if (NeedPositiveMask) { 1384 encodeULEB128(PositiveMask.getZExtValue(), S); 1385 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1386 TableInfo.Table.push_back(MaskBytes[i]); 1387 } else 1388 TableInfo.Table.push_back(0); 1389 if (NeedNegativeMask) { 1390 MaskBytes.clear(); 1391 encodeULEB128(NegativeMask.getZExtValue(), S); 1392 for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i) 1393 TableInfo.Table.push_back(MaskBytes[i]); 1394 } else 1395 TableInfo.Table.push_back(0); 1396 } 1397 1398 // Emits table entries to decode the singleton. 1399 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1400 EncodingIDAndOpcode Opc) const { 1401 std::vector<unsigned> StartBits; 1402 std::vector<unsigned> EndBits; 1403 std::vector<uint64_t> FieldVals; 1404 insn_t Insn; 1405 insnWithID(Insn, Opc.EncodingID); 1406 1407 // Look for islands of undecoded bits of the singleton. 1408 getIslands(StartBits, EndBits, FieldVals, Insn); 1409 1410 unsigned Size = StartBits.size(); 1411 1412 // Emit the predicate table entry if one is needed. 1413 emitPredicateTableEntry(TableInfo, Opc.EncodingID); 1414 1415 // Check any additional encoding fields needed. 1416 for (unsigned I = Size; I != 0; --I) { 1417 unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1; 1418 TableInfo.Table.push_back(MCD::OPC_CheckField); 1419 TableInfo.Table.push_back(StartBits[I-1]); 1420 TableInfo.Table.push_back(NumBits); 1421 uint8_t Buffer[16], *p; 1422 encodeULEB128(FieldVals[I-1], Buffer); 1423 for (p = Buffer; *p >= 128 ; ++p) 1424 TableInfo.Table.push_back(*p); 1425 TableInfo.Table.push_back(*p); 1426 // Push location for NumToSkip backpatching. 1427 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1428 // The fixup is always 24-bits, so go ahead and allocate the space 1429 // in the table so all our relative position calculations work OK even 1430 // before we fully resolve the real value here. 1431 TableInfo.Table.push_back(0); 1432 TableInfo.Table.push_back(0); 1433 TableInfo.Table.push_back(0); 1434 } 1435 1436 // Check for soft failure of the match. 1437 emitSoftFailTableEntry(TableInfo, Opc.EncodingID); 1438 1439 bool HasCompleteDecoder; 1440 unsigned DIdx = 1441 getDecoderIndex(TableInfo.Decoders, Opc.EncodingID, HasCompleteDecoder); 1442 1443 // Produce OPC_Decode or OPC_TryDecode opcode based on the information 1444 // whether the instruction decoder is complete or not. If it is complete 1445 // then it handles all possible values of remaining variable/unfiltered bits 1446 // and for any value can determine if the bitpattern is a valid instruction 1447 // or not. This means OPC_Decode will be the final step in the decoding 1448 // process. If it is not complete, then the Fail return code from the 1449 // decoder method indicates that additional processing should be done to see 1450 // if there is any other instruction that also matches the bitpattern and 1451 // can decode it. 1452 TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode : 1453 MCD::OPC_TryDecode); 1454 NumEncodingsSupported++; 1455 uint8_t Buffer[16], *p; 1456 encodeULEB128(Opc.Opcode, Buffer); 1457 for (p = Buffer; *p >= 128 ; ++p) 1458 TableInfo.Table.push_back(*p); 1459 TableInfo.Table.push_back(*p); 1460 1461 SmallString<16> Bytes; 1462 raw_svector_ostream S(Bytes); 1463 encodeULEB128(DIdx, S); 1464 1465 // Decoder index 1466 for (unsigned i = 0, e = Bytes.size(); i != e; ++i) 1467 TableInfo.Table.push_back(Bytes[i]); 1468 1469 if (!HasCompleteDecoder) { 1470 // Push location for NumToSkip backpatching. 1471 TableInfo.FixupStack.back().push_back(TableInfo.Table.size()); 1472 // Allocate the space for the fixup. 1473 TableInfo.Table.push_back(0); 1474 TableInfo.Table.push_back(0); 1475 TableInfo.Table.push_back(0); 1476 } 1477 } 1478 1479 // Emits table entries to decode the singleton, and then to decode the rest. 1480 void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo, 1481 const Filter &Best) const { 1482 EncodingIDAndOpcode Opc = Best.getSingletonOpc(); 1483 1484 // complex singletons need predicate checks from the first singleton 1485 // to refer forward to the variable filterchooser that follows. 1486 TableInfo.FixupStack.emplace_back(); 1487 1488 emitSingletonTableEntry(TableInfo, Opc); 1489 1490 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 1491 TableInfo.Table.size()); 1492 TableInfo.FixupStack.pop_back(); 1493 1494 Best.getVariableFC().emitTableEntries(TableInfo); 1495 } 1496 1497 // Assign a single filter and run with it. Top level API client can initialize 1498 // with a single filter to start the filtering process. 1499 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit, 1500 bool mixed) { 1501 Filters.clear(); 1502 Filters.emplace_back(*this, startBit, numBit, true); 1503 BestIndex = 0; // Sole Filter instance to choose from. 1504 bestFilter().recurse(); 1505 } 1506 1507 // reportRegion is a helper function for filterProcessor to mark a region as 1508 // eligible for use as a filter region. 1509 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit, 1510 unsigned BitIndex, bool AllowMixed) { 1511 if (RA == ATTR_MIXED && AllowMixed) 1512 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, true); 1513 else if (RA == ATTR_ALL_SET && !AllowMixed) 1514 Filters.emplace_back(*this, StartBit, BitIndex - StartBit, false); 1515 } 1516 1517 // FilterProcessor scans the well-known encoding bits of the instructions and 1518 // builds up a list of candidate filters. It chooses the best filter and 1519 // recursively descends down the decoding tree. 1520 bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) { 1521 Filters.clear(); 1522 BestIndex = -1; 1523 unsigned numInstructions = Opcodes.size(); 1524 1525 assert(numInstructions && "Filter created with no instructions"); 1526 1527 // No further filtering is necessary. 1528 if (numInstructions == 1) 1529 return true; 1530 1531 // Heuristics. See also doFilter()'s "Heuristics" comment when num of 1532 // instructions is 3. 1533 if (AllowMixed && !Greedy) { 1534 assert(numInstructions == 3); 1535 1536 for (auto Opcode : Opcodes) { 1537 std::vector<unsigned> StartBits; 1538 std::vector<unsigned> EndBits; 1539 std::vector<uint64_t> FieldVals; 1540 insn_t Insn; 1541 1542 insnWithID(Insn, Opcode.EncodingID); 1543 1544 // Look for islands of undecoded bits of any instruction. 1545 if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) { 1546 // Found an instruction with island(s). Now just assign a filter. 1547 runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true); 1548 return true; 1549 } 1550 } 1551 } 1552 1553 unsigned BitIndex; 1554 1555 // We maintain BIT_WIDTH copies of the bitAttrs automaton. 1556 // The automaton consumes the corresponding bit from each 1557 // instruction. 1558 // 1559 // Input symbols: 0, 1, and _ (unset). 1560 // States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED. 1561 // Initial state: NONE. 1562 // 1563 // (NONE) ------- [01] -> (ALL_SET) 1564 // (NONE) ------- _ ----> (ALL_UNSET) 1565 // (ALL_SET) ---- [01] -> (ALL_SET) 1566 // (ALL_SET) ---- _ ----> (MIXED) 1567 // (ALL_UNSET) -- [01] -> (MIXED) 1568 // (ALL_UNSET) -- _ ----> (ALL_UNSET) 1569 // (MIXED) ------ . ----> (MIXED) 1570 // (FILTERED)---- . ----> (FILTERED) 1571 1572 std::vector<bitAttr_t> bitAttrs; 1573 1574 // FILTERED bit positions provide no entropy and are not worthy of pursuing. 1575 // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position. 1576 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) 1577 if (FilterBitValues[BitIndex] == BIT_TRUE || 1578 FilterBitValues[BitIndex] == BIT_FALSE) 1579 bitAttrs.push_back(ATTR_FILTERED); 1580 else 1581 bitAttrs.push_back(ATTR_NONE); 1582 1583 for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) { 1584 insn_t insn; 1585 1586 insnWithID(insn, Opcodes[InsnIndex].EncodingID); 1587 1588 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1589 switch (bitAttrs[BitIndex]) { 1590 case ATTR_NONE: 1591 if (insn[BitIndex] == BIT_UNSET) 1592 bitAttrs[BitIndex] = ATTR_ALL_UNSET; 1593 else 1594 bitAttrs[BitIndex] = ATTR_ALL_SET; 1595 break; 1596 case ATTR_ALL_SET: 1597 if (insn[BitIndex] == BIT_UNSET) 1598 bitAttrs[BitIndex] = ATTR_MIXED; 1599 break; 1600 case ATTR_ALL_UNSET: 1601 if (insn[BitIndex] != BIT_UNSET) 1602 bitAttrs[BitIndex] = ATTR_MIXED; 1603 break; 1604 case ATTR_MIXED: 1605 case ATTR_FILTERED: 1606 break; 1607 } 1608 } 1609 } 1610 1611 // The regionAttr automaton consumes the bitAttrs automatons' state, 1612 // lowest-to-highest. 1613 // 1614 // Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed) 1615 // States: NONE, ALL_SET, MIXED 1616 // Initial state: NONE 1617 // 1618 // (NONE) ----- F --> (NONE) 1619 // (NONE) ----- S --> (ALL_SET) ; and set region start 1620 // (NONE) ----- U --> (NONE) 1621 // (NONE) ----- M --> (MIXED) ; and set region start 1622 // (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region 1623 // (ALL_SET) -- S --> (ALL_SET) 1624 // (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region 1625 // (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region 1626 // (MIXED) ---- F --> (NONE) ; and report a MIXED region 1627 // (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region 1628 // (MIXED) ---- U --> (NONE) ; and report a MIXED region 1629 // (MIXED) ---- M --> (MIXED) 1630 1631 bitAttr_t RA = ATTR_NONE; 1632 unsigned StartBit = 0; 1633 1634 for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) { 1635 bitAttr_t bitAttr = bitAttrs[BitIndex]; 1636 1637 assert(bitAttr != ATTR_NONE && "Bit without attributes"); 1638 1639 switch (RA) { 1640 case ATTR_NONE: 1641 switch (bitAttr) { 1642 case ATTR_FILTERED: 1643 break; 1644 case ATTR_ALL_SET: 1645 StartBit = BitIndex; 1646 RA = ATTR_ALL_SET; 1647 break; 1648 case ATTR_ALL_UNSET: 1649 break; 1650 case ATTR_MIXED: 1651 StartBit = BitIndex; 1652 RA = ATTR_MIXED; 1653 break; 1654 default: 1655 llvm_unreachable("Unexpected bitAttr!"); 1656 } 1657 break; 1658 case ATTR_ALL_SET: 1659 switch (bitAttr) { 1660 case ATTR_FILTERED: 1661 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1662 RA = ATTR_NONE; 1663 break; 1664 case ATTR_ALL_SET: 1665 break; 1666 case ATTR_ALL_UNSET: 1667 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1668 RA = ATTR_NONE; 1669 break; 1670 case ATTR_MIXED: 1671 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1672 StartBit = BitIndex; 1673 RA = ATTR_MIXED; 1674 break; 1675 default: 1676 llvm_unreachable("Unexpected bitAttr!"); 1677 } 1678 break; 1679 case ATTR_MIXED: 1680 switch (bitAttr) { 1681 case ATTR_FILTERED: 1682 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1683 StartBit = BitIndex; 1684 RA = ATTR_NONE; 1685 break; 1686 case ATTR_ALL_SET: 1687 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1688 StartBit = BitIndex; 1689 RA = ATTR_ALL_SET; 1690 break; 1691 case ATTR_ALL_UNSET: 1692 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1693 RA = ATTR_NONE; 1694 break; 1695 case ATTR_MIXED: 1696 break; 1697 default: 1698 llvm_unreachable("Unexpected bitAttr!"); 1699 } 1700 break; 1701 case ATTR_ALL_UNSET: 1702 llvm_unreachable("regionAttr state machine has no ATTR_UNSET state"); 1703 case ATTR_FILTERED: 1704 llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state"); 1705 } 1706 } 1707 1708 // At the end, if we're still in ALL_SET or MIXED states, report a region 1709 switch (RA) { 1710 case ATTR_NONE: 1711 break; 1712 case ATTR_FILTERED: 1713 break; 1714 case ATTR_ALL_SET: 1715 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1716 break; 1717 case ATTR_ALL_UNSET: 1718 break; 1719 case ATTR_MIXED: 1720 reportRegion(RA, StartBit, BitIndex, AllowMixed); 1721 break; 1722 } 1723 1724 // We have finished with the filter processings. Now it's time to choose 1725 // the best performing filter. 1726 BestIndex = 0; 1727 bool AllUseless = true; 1728 unsigned BestScore = 0; 1729 1730 for (unsigned i = 0, e = Filters.size(); i != e; ++i) { 1731 unsigned Usefulness = Filters[i].usefulness(); 1732 1733 if (Usefulness) 1734 AllUseless = false; 1735 1736 if (Usefulness > BestScore) { 1737 BestIndex = i; 1738 BestScore = Usefulness; 1739 } 1740 } 1741 1742 if (!AllUseless) 1743 bestFilter().recurse(); 1744 1745 return !AllUseless; 1746 } // end of FilterChooser::filterProcessor(bool) 1747 1748 // Decides on the best configuration of filter(s) to use in order to decode 1749 // the instructions. A conflict of instructions may occur, in which case we 1750 // dump the conflict set to the standard error. 1751 void FilterChooser::doFilter() { 1752 unsigned Num = Opcodes.size(); 1753 assert(Num && "FilterChooser created with no instructions"); 1754 1755 // Try regions of consecutive known bit values first. 1756 if (filterProcessor(false)) 1757 return; 1758 1759 // Then regions of mixed bits (both known and unitialized bit values allowed). 1760 if (filterProcessor(true)) 1761 return; 1762 1763 // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where 1764 // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a 1765 // well-known encoding pattern. In such case, we backtrack and scan for the 1766 // the very first consecutive ATTR_ALL_SET region and assign a filter to it. 1767 if (Num == 3 && filterProcessor(true, false)) 1768 return; 1769 1770 // If we come to here, the instruction decoding has failed. 1771 // Set the BestIndex to -1 to indicate so. 1772 BestIndex = -1; 1773 } 1774 1775 // emitTableEntries - Emit state machine entries to decode our share of 1776 // instructions. 1777 void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const { 1778 if (Opcodes.size() == 1) { 1779 // There is only one instruction in the set, which is great! 1780 // Call emitSingletonDecoder() to see whether there are any remaining 1781 // encodings bits. 1782 emitSingletonTableEntry(TableInfo, Opcodes[0]); 1783 return; 1784 } 1785 1786 // Choose the best filter to do the decodings! 1787 if (BestIndex != -1) { 1788 const Filter &Best = Filters[BestIndex]; 1789 if (Best.getNumFiltered() == 1) 1790 emitSingletonTableEntry(TableInfo, Best); 1791 else 1792 Best.emitTableEntry(TableInfo); 1793 return; 1794 } 1795 1796 // We don't know how to decode these instructions! Dump the 1797 // conflict set and bail. 1798 1799 // Print out useful conflict information for postmortem analysis. 1800 errs() << "Decoding Conflict:\n"; 1801 1802 dumpStack(errs(), "\t\t"); 1803 1804 for (auto Opcode : Opcodes) { 1805 errs() << '\t'; 1806 emitNameWithID(errs(), Opcode.EncodingID); 1807 errs() << " "; 1808 dumpBits( 1809 errs(), 1810 getBitsField(*AllInstructions[Opcode.EncodingID].EncodingDef, "Inst")); 1811 errs() << '\n'; 1812 } 1813 } 1814 1815 static std::string findOperandDecoderMethod(Record *Record) { 1816 std::string Decoder; 1817 1818 RecordVal *DecoderString = Record->getValue("DecoderMethod"); 1819 StringInit *String = DecoderString ? 1820 dyn_cast<StringInit>(DecoderString->getValue()) : nullptr; 1821 if (String) { 1822 Decoder = std::string(String->getValue()); 1823 if (!Decoder.empty()) 1824 return Decoder; 1825 } 1826 1827 if (Record->isSubClassOf("RegisterOperand")) 1828 Record = Record->getValueAsDef("RegClass"); 1829 1830 if (Record->isSubClassOf("RegisterClass")) { 1831 Decoder = "Decode" + Record->getName().str() + "RegisterClass"; 1832 } else if (Record->isSubClassOf("PointerLikeRegClass")) { 1833 Decoder = "DecodePointerLikeRegClass" + 1834 utostr(Record->getValueAsInt("RegClassKind")); 1835 } 1836 1837 return Decoder; 1838 } 1839 1840 OperandInfo getOpInfo(Record *TypeRecord) { 1841 std::string Decoder = findOperandDecoderMethod(TypeRecord); 1842 1843 RecordVal *HasCompleteDecoderVal = TypeRecord->getValue("hasCompleteDecoder"); 1844 BitInit *HasCompleteDecoderBit = 1845 HasCompleteDecoderVal 1846 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 1847 : nullptr; 1848 bool HasCompleteDecoder = 1849 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 1850 1851 return OperandInfo(Decoder, HasCompleteDecoder); 1852 } 1853 1854 void parseVarLenInstOperand(const Record &Def, 1855 std::vector<OperandInfo> &Operands, 1856 const CodeGenInstruction &CGI) { 1857 1858 const RecordVal *RV = Def.getValue("Inst"); 1859 VarLenInst VLI(cast<DagInit>(RV->getValue()), RV); 1860 SmallVector<int> TiedTo; 1861 1862 for (unsigned Idx = 0; Idx < CGI.Operands.size(); ++Idx) { 1863 auto &Op = CGI.Operands[Idx]; 1864 if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > 0) 1865 for (auto *Arg : Op.MIOperandInfo->getArgs()) 1866 Operands.push_back(getOpInfo(cast<DefInit>(Arg)->getDef())); 1867 else 1868 Operands.push_back(getOpInfo(Op.Rec)); 1869 1870 int TiedReg = Op.getTiedRegister(); 1871 TiedTo.push_back(-1); 1872 if (TiedReg != -1) { 1873 TiedTo[Idx] = TiedReg; 1874 TiedTo[TiedReg] = Idx; 1875 } 1876 } 1877 1878 unsigned CurrBitPos = 0; 1879 for (auto &EncodingSegment : VLI) { 1880 unsigned Offset = 0; 1881 StringRef OpName; 1882 1883 if (const StringInit *SI = dyn_cast<StringInit>(EncodingSegment.Value)) { 1884 OpName = SI->getValue(); 1885 } else if (const DagInit *DI = dyn_cast<DagInit>(EncodingSegment.Value)) { 1886 OpName = cast<StringInit>(DI->getArg(0))->getValue(); 1887 Offset = cast<IntInit>(DI->getArg(2))->getValue(); 1888 } 1889 1890 if (!OpName.empty()) { 1891 auto OpSubOpPair = 1892 const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName( 1893 OpName); 1894 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(OpSubOpPair); 1895 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1896 if (!EncodingSegment.CustomDecoder.empty()) 1897 Operands[OpIdx].Decoder = EncodingSegment.CustomDecoder.str(); 1898 1899 int TiedReg = TiedTo[OpSubOpPair.first]; 1900 if (TiedReg != -1) { 1901 unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber( 1902 std::make_pair(TiedReg, OpSubOpPair.second)); 1903 Operands[OpIdx].addField(CurrBitPos, EncodingSegment.BitWidth, Offset); 1904 } 1905 } 1906 1907 CurrBitPos += EncodingSegment.BitWidth; 1908 } 1909 } 1910 1911 static void debugDumpRecord(const Record &Rec) { 1912 // Dump the record, so we can see what's going on... 1913 std::string E; 1914 raw_string_ostream S(E); 1915 S << "Dumping record for previous error:\n"; 1916 S << Rec; 1917 PrintNote(E); 1918 } 1919 1920 /// For an operand field named OpName: populate OpInfo.InitValue with the 1921 /// constant-valued bit values, and OpInfo.Fields with the ranges of bits to 1922 /// insert from the decoded instruction. 1923 static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, 1924 std::map<std::string, std::string> &TiedNames, 1925 StringRef OpName, OperandInfo &OpInfo) { 1926 // Some bits of the operand may be required to be 1 depending on the 1927 // instruction's encoding. Collect those bits. 1928 if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) 1929 if (const BitsInit *OpBits = dyn_cast<BitsInit>(EncodedValue->getValue())) 1930 for (unsigned I = 0; I < OpBits->getNumBits(); ++I) 1931 if (const BitInit *OpBit = dyn_cast<BitInit>(OpBits->getBit(I))) 1932 if (OpBit->getValue()) 1933 OpInfo.InitValue |= 1ULL << I; 1934 1935 for (unsigned I = 0, J = 0; I != Bits.getNumBits(); I = J) { 1936 VarInit *Var; 1937 unsigned Offset = 0; 1938 for (; J != Bits.getNumBits(); ++J) { 1939 VarBitInit *BJ = dyn_cast<VarBitInit>(Bits.getBit(J)); 1940 if (BJ) { 1941 Var = dyn_cast<VarInit>(BJ->getBitVar()); 1942 if (I == J) 1943 Offset = BJ->getBitNum(); 1944 else if (BJ->getBitNum() != Offset + J - I) 1945 break; 1946 } else { 1947 Var = dyn_cast<VarInit>(Bits.getBit(J)); 1948 } 1949 if (!Var || (Var->getName() != OpName && 1950 Var->getName() != TiedNames[std::string(OpName)])) 1951 break; 1952 } 1953 if (I == J) 1954 ++J; 1955 else 1956 OpInfo.addField(I, J - I, Offset); 1957 } 1958 } 1959 1960 static unsigned 1961 populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, 1962 const CodeGenInstruction &CGI, unsigned Opc, 1963 std::map<unsigned, std::vector<OperandInfo>> &Operands, 1964 bool IsVarLenInst) { 1965 const Record &Def = *CGI.TheDef; 1966 // If all the bit positions are not specified; do not decode this instruction. 1967 // We are bound to fail! For proper disassembly, the well-known encoding bits 1968 // of the instruction must be fully specified. 1969 1970 BitsInit &Bits = getBitsField(EncodingDef, "Inst"); 1971 if (Bits.allInComplete()) 1972 return 0; 1973 1974 std::vector<OperandInfo> InsnOperands; 1975 1976 // If the instruction has specified a custom decoding hook, use that instead 1977 // of trying to auto-generate the decoder. 1978 StringRef InstDecoder = EncodingDef.getValueAsString("DecoderMethod"); 1979 if (InstDecoder != "") { 1980 bool HasCompleteInstDecoder = EncodingDef.getValueAsBit("hasCompleteDecoder"); 1981 InsnOperands.push_back( 1982 OperandInfo(std::string(InstDecoder), HasCompleteInstDecoder)); 1983 Operands[Opc] = InsnOperands; 1984 return Bits.getNumBits(); 1985 } 1986 1987 // Generate a description of the operand of the instruction that we know 1988 // how to decode automatically. 1989 // FIXME: We'll need to have a way to manually override this as needed. 1990 1991 // Gather the outputs/inputs of the instruction, so we can find their 1992 // positions in the encoding. This assumes for now that they appear in the 1993 // MCInst in the order that they're listed. 1994 std::vector<std::pair<Init*, StringRef>> InOutOperands; 1995 DagInit *Out = Def.getValueAsDag("OutOperandList"); 1996 DagInit *In = Def.getValueAsDag("InOperandList"); 1997 for (unsigned i = 0; i < Out->getNumArgs(); ++i) 1998 InOutOperands.push_back( 1999 std::make_pair(Out->getArg(i), Out->getArgNameStr(i))); 2000 for (unsigned i = 0; i < In->getNumArgs(); ++i) 2001 InOutOperands.push_back( 2002 std::make_pair(In->getArg(i), In->getArgNameStr(i))); 2003 2004 // Search for tied operands, so that we can correctly instantiate 2005 // operands that are not explicitly represented in the encoding. 2006 std::map<std::string, std::string> TiedNames; 2007 for (unsigned i = 0; i < CGI.Operands.size(); ++i) { 2008 auto &Op = CGI.Operands[i]; 2009 for (unsigned j = 0; j < Op.Constraints.size(); ++j) { 2010 const CGIOperandList::ConstraintInfo &CI = Op.Constraints[j]; 2011 if (CI.isTied()) { 2012 int tiedTo = CI.getTiedOperand(); 2013 std::pair<unsigned, unsigned> SO = 2014 CGI.Operands.getSubOperandNumber(tiedTo); 2015 std::string TiedName = CGI.Operands[SO.first].SubOpNames[SO.second]; 2016 if (TiedName.empty()) 2017 TiedName = CGI.Operands[SO.first].Name; 2018 std::string MyName = Op.SubOpNames[j]; 2019 if (MyName.empty()) 2020 MyName = Op.Name; 2021 2022 TiedNames[MyName] = TiedName; 2023 TiedNames[TiedName] = MyName; 2024 } 2025 } 2026 } 2027 2028 if (IsVarLenInst) { 2029 parseVarLenInstOperand(EncodingDef, InsnOperands, CGI); 2030 } else { 2031 std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands; 2032 std::set<std::string> NumberedInsnOperandsNoTie; 2033 bool SupportPositionalDecoding = 2034 Target.getInstructionSet()->getValueAsBit( 2035 "useDeprecatedPositionallyEncodedOperands") && 2036 Target.getInstructionSet()->getValueAsBit( 2037 "decodePositionallyEncodedOperands"); 2038 if (SupportPositionalDecoding) { 2039 const std::vector<RecordVal> &Vals = Def.getValues(); 2040 unsigned NumberedOp = 0; 2041 2042 std::set<unsigned> NamedOpIndices; 2043 if (Target.getInstructionSet()->getValueAsBit( 2044 "noNamedPositionallyEncodedOperands")) 2045 // Collect the set of operand indices that might correspond to named 2046 // operand, and skip these when assigning operands based on position. 2047 for (unsigned i = 0, e = Vals.size(); i != e; ++i) { 2048 unsigned OpIdx; 2049 if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) 2050 continue; 2051 2052 NamedOpIndices.insert(OpIdx); 2053 } 2054 2055 for (unsigned i = 0, e = Vals.size(); i != e; ++i) { 2056 // Ignore fixed fields in the record, we're looking for values like: 2057 // bits<5> RST = { ?, ?, ?, ?, ? }; 2058 if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete()) 2059 continue; 2060 2061 // Determine if Vals[i] actually contributes to the Inst encoding. 2062 unsigned bi = 0; 2063 for (; bi < Bits.getNumBits(); ++bi) { 2064 VarInit *Var = nullptr; 2065 VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi)); 2066 if (BI) 2067 Var = dyn_cast<VarInit>(BI->getBitVar()); 2068 else 2069 Var = dyn_cast<VarInit>(Bits.getBit(bi)); 2070 2071 if (Var && Var->getName() == Vals[i].getName()) 2072 break; 2073 } 2074 2075 if (bi == Bits.getNumBits()) 2076 continue; 2077 2078 // Skip variables that correspond to explicitly-named operands. 2079 unsigned OpIdx; 2080 std::pair<unsigned, unsigned> SubOp; 2081 if (CGI.Operands.hasSubOperandAlias(Vals[i].getName(), SubOp) || 2082 CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx)) 2083 continue; 2084 2085 // Get the bit range for this operand: 2086 unsigned bitStart = bi++, bitWidth = 1; 2087 for (; bi < Bits.getNumBits(); ++bi) { 2088 VarInit *Var = nullptr; 2089 VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi)); 2090 if (BI) 2091 Var = dyn_cast<VarInit>(BI->getBitVar()); 2092 else 2093 Var = dyn_cast<VarInit>(Bits.getBit(bi)); 2094 2095 if (!Var) 2096 break; 2097 2098 if (Var->getName() != Vals[i].getName()) 2099 break; 2100 2101 ++bitWidth; 2102 } 2103 2104 unsigned NumberOps = CGI.Operands.size(); 2105 while (NumberedOp < NumberOps && 2106 (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) || 2107 (!NamedOpIndices.empty() && 2108 NamedOpIndices.count( 2109 CGI.Operands.getSubOperandNumber(NumberedOp).first)))) 2110 ++NumberedOp; 2111 2112 OpIdx = NumberedOp++; 2113 2114 // OpIdx now holds the ordered operand number of Vals[i]. 2115 std::pair<unsigned, unsigned> SO = 2116 CGI.Operands.getSubOperandNumber(OpIdx); 2117 const std::string &Name = CGI.Operands[SO.first].Name; 2118 2119 LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName() 2120 << ": " << Name << "(" << SO.first << ", " 2121 << SO.second << ") => " << Vals[i].getName() << "\n"); 2122 2123 std::string Decoder; 2124 Record *TypeRecord = CGI.Operands[SO.first].Rec; 2125 2126 RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod"); 2127 StringInit *String = 2128 DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) 2129 : nullptr; 2130 if (String && String->getValue() != "") 2131 Decoder = std::string(String->getValue()); 2132 2133 if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo && 2134 CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) { 2135 Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second); 2136 if (DefInit *DI = cast<DefInit>(Arg)) 2137 TypeRecord = DI->getDef(); 2138 } 2139 2140 bool isReg = false; 2141 if (TypeRecord->isSubClassOf("RegisterOperand")) 2142 TypeRecord = TypeRecord->getValueAsDef("RegClass"); 2143 if (TypeRecord->isSubClassOf("RegisterClass")) { 2144 Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass"; 2145 isReg = true; 2146 } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) { 2147 Decoder = "DecodePointerLikeRegClass" + 2148 utostr(TypeRecord->getValueAsInt("RegClassKind")); 2149 isReg = true; 2150 } 2151 2152 DecoderString = TypeRecord->getValue("DecoderMethod"); 2153 String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue()) 2154 : nullptr; 2155 if (!isReg && String && String->getValue() != "") 2156 Decoder = std::string(String->getValue()); 2157 2158 RecordVal *HasCompleteDecoderVal = 2159 TypeRecord->getValue("hasCompleteDecoder"); 2160 BitInit *HasCompleteDecoderBit = 2161 HasCompleteDecoderVal 2162 ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) 2163 : nullptr; 2164 bool HasCompleteDecoder = 2165 HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true; 2166 2167 OperandInfo OpInfo(Decoder, HasCompleteDecoder); 2168 OpInfo.addField(bitStart, bitWidth, 0); 2169 2170 NumberedInsnOperands[Name].push_back(OpInfo); 2171 2172 // FIXME: For complex operands with custom decoders we can't handle tied 2173 // sub-operands automatically. Skip those here and assume that this is 2174 // fixed up elsewhere. 2175 if (CGI.Operands[SO.first].MIOperandInfo && 2176 CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String && 2177 String->getValue() != "") 2178 NumberedInsnOperandsNoTie.insert(Name); 2179 } 2180 } 2181 2182 // For each operand, see if we can figure out where it is encoded. 2183 for (const auto &Op : InOutOperands) { 2184 Init *OpInit = Op.first; 2185 StringRef OpName = Op.second; 2186 2187 if (SupportPositionalDecoding) { 2188 if (!NumberedInsnOperands[std::string(OpName)].empty()) { 2189 llvm::append_range(InsnOperands, 2190 NumberedInsnOperands[std::string(OpName)]); 2191 continue; 2192 } 2193 if (!NumberedInsnOperands[TiedNames[std::string(OpName)]].empty()) { 2194 if (!NumberedInsnOperandsNoTie.count( 2195 TiedNames[std::string(OpName)])) { 2196 // Figure out to which (sub)operand we're tied. 2197 unsigned i = 2198 CGI.Operands.getOperandNamed(TiedNames[std::string(OpName)]); 2199 int tiedTo = CGI.Operands[i].getTiedRegister(); 2200 if (tiedTo == -1) { 2201 i = CGI.Operands.getOperandNamed(OpName); 2202 tiedTo = CGI.Operands[i].getTiedRegister(); 2203 } 2204 2205 if (tiedTo != -1) { 2206 std::pair<unsigned, unsigned> SO = 2207 CGI.Operands.getSubOperandNumber(tiedTo); 2208 2209 InsnOperands.push_back( 2210 NumberedInsnOperands[TiedNames[std::string(OpName)]] 2211 [SO.second]); 2212 } 2213 } 2214 continue; 2215 } 2216 } 2217 2218 // We're ready to find the instruction encoding locations for this operand. 2219 2220 // First, find the operand type ("OpInit"), and sub-op names 2221 // ("SubArgDag") if present. 2222 DagInit *SubArgDag = dyn_cast<DagInit>(OpInit); 2223 if (SubArgDag) 2224 OpInit = SubArgDag->getOperator(); 2225 Record *OpTypeRec = cast<DefInit>(OpInit)->getDef(); 2226 // Lookup the sub-operands from the operand type record (note that only 2227 // Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp). 2228 DagInit *SubOps = OpTypeRec->isSubClassOf("Operand") 2229 ? OpTypeRec->getValueAsDag("MIOperandInfo") 2230 : nullptr; 2231 2232 // Lookup the decoder method and construct a new OperandInfo to hold our result. 2233 OperandInfo OpInfo = getOpInfo(OpTypeRec); 2234 2235 // If we have named sub-operands... 2236 if (SubArgDag) { 2237 // Then there should not be a custom decoder specified on the top-level 2238 // type. 2239 if (!OpInfo.Decoder.empty()) { 2240 PrintError(EncodingDef.getLoc(), 2241 "DecoderEmitter: operand \"" + OpName + "\" has type \"" + 2242 OpInit->getAsString() + 2243 "\" with a custom DecoderMethod, but also named " 2244 "sub-operands."); 2245 continue; 2246 } 2247 2248 // Decode each of the sub-ops separately. 2249 assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs()); 2250 for (unsigned i = 0; i < SubOps->getNumArgs(); ++i) { 2251 StringRef SubOpName = SubArgDag->getArgNameStr(i); 2252 OperandInfo SubOpInfo = 2253 getOpInfo(cast<DefInit>(SubOps->getArg(i))->getDef()); 2254 2255 addOneOperandFields(EncodingDef, Bits, TiedNames, SubOpName, 2256 SubOpInfo); 2257 InsnOperands.push_back(SubOpInfo); 2258 } 2259 continue; 2260 } 2261 2262 // Otherwise, if we have an operand with sub-operands, but they aren't 2263 // named... 2264 if (SubOps && OpInfo.Decoder.empty()) { 2265 // If it's a single sub-operand, and no custom decoder, use the decoder 2266 // from the one sub-operand. 2267 if (SubOps->getNumArgs() == 1) 2268 OpInfo = getOpInfo(cast<DefInit>(SubOps->getArg(0))->getDef()); 2269 2270 // If we have multiple sub-ops, there'd better have a custom 2271 // decoder. (Otherwise we don't know how to populate them properly...) 2272 if (SubOps->getNumArgs() > 1) { 2273 PrintError(EncodingDef.getLoc(), 2274 "DecoderEmitter: operand \"" + OpName + 2275 "\" uses MIOperandInfo with multiple ops, but doesn't " 2276 "have a custom decoder!"); 2277 debugDumpRecord(EncodingDef); 2278 continue; 2279 } 2280 } 2281 2282 addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); 2283 // FIXME: it should be an error not to find a definition for a given 2284 // operand, rather than just failing to add it to the resulting 2285 // instruction! (This is a longstanding bug, which will be addressed in an 2286 // upcoming change.) 2287 if (OpInfo.numFields() > 0) 2288 InsnOperands.push_back(OpInfo); 2289 } 2290 } 2291 Operands[Opc] = InsnOperands; 2292 2293 #if 0 2294 LLVM_DEBUG({ 2295 // Dumps the instruction encoding bits. 2296 dumpBits(errs(), Bits); 2297 2298 errs() << '\n'; 2299 2300 // Dumps the list of operand info. 2301 for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) { 2302 const CGIOperandList::OperandInfo &Info = CGI.Operands[i]; 2303 const std::string &OperandName = Info.Name; 2304 const Record &OperandDef = *Info.Rec; 2305 2306 errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n"; 2307 } 2308 }); 2309 #endif 2310 2311 return Bits.getNumBits(); 2312 } 2313 2314 // emitFieldFromInstruction - Emit the templated helper function 2315 // fieldFromInstruction(). 2316 // On Windows we make sure that this function is not inlined when 2317 // using the VS compiler. It has a bug which causes the function 2318 // to be optimized out in some circumstances. See llvm.org/pr38292 2319 static void emitFieldFromInstruction(formatted_raw_ostream &OS) { 2320 OS << "// Helper functions for extracting fields from encoded instructions.\n" 2321 << "// InsnType must either be integral or an APInt-like object that " 2322 "must:\n" 2323 << "// * be default-constructible and copy-constructible\n" 2324 << "// * be constructible from an APInt (this can be private)\n" 2325 << "// * Support insertBits(bits, startBit, numBits)\n" 2326 << "// * Support extractBitsAsZExtValue(numBits, startBit)\n" 2327 << "// * Support the ~, &, ==, and != operators with other objects of " 2328 "the same type\n" 2329 << "// * Support the != and bitwise & with uint64_t\n" 2330 << "// * Support put (<<) to raw_ostream&\n" 2331 << "template <typename InsnType>\n" 2332 << "#if defined(_MSC_VER) && !defined(__clang__)\n" 2333 << "__declspec(noinline)\n" 2334 << "#endif\n" 2335 << "static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>\n" 2336 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2337 << " unsigned numBits) {\n" 2338 << " assert(startBit + numBits <= 64 && \"Cannot support >64-bit " 2339 "extractions!\");\n" 2340 << " assert(startBit + numBits <= (sizeof(InsnType) * 8) &&\n" 2341 << " \"Instruction field out of bounds!\");\n" 2342 << " InsnType fieldMask;\n" 2343 << " if (numBits == sizeof(InsnType) * 8)\n" 2344 << " fieldMask = (InsnType)(-1LL);\n" 2345 << " else\n" 2346 << " fieldMask = (((InsnType)1 << numBits) - 1) << startBit;\n" 2347 << " return (insn & fieldMask) >> startBit;\n" 2348 << "}\n" 2349 << "\n" 2350 << "template <typename InsnType>\n" 2351 << "static std::enable_if_t<!std::is_integral<InsnType>::value, " 2352 "uint64_t>\n" 2353 << "fieldFromInstruction(const InsnType &insn, unsigned startBit,\n" 2354 << " unsigned numBits) {\n" 2355 << " return insn.extractBitsAsZExtValue(numBits, startBit);\n" 2356 << "}\n\n"; 2357 } 2358 2359 // emitInsertBits - Emit the templated helper function insertBits(). 2360 static void emitInsertBits(formatted_raw_ostream &OS) { 2361 OS << "// Helper function for inserting bits extracted from an encoded " 2362 "instruction into\n" 2363 << "// a field.\n" 2364 << "template <typename InsnType>\n" 2365 << "static std::enable_if_t<std::is_integral<InsnType>::value>\n" 2366 << "insertBits(InsnType &field, InsnType bits, unsigned startBit, " 2367 "unsigned numBits) {\n" 2368 << " assert(startBit + numBits <= sizeof field * 8);\n" 2369 << " field |= (InsnType)bits << startBit;\n" 2370 << "}\n" 2371 << "\n" 2372 << "template <typename InsnType>\n" 2373 << "static std::enable_if_t<!std::is_integral<InsnType>::value>\n" 2374 << "insertBits(InsnType &field, uint64_t bits, unsigned startBit, " 2375 "unsigned numBits) {\n" 2376 << " field.insertBits(bits, startBit, numBits);\n" 2377 << "}\n\n"; 2378 } 2379 2380 // emitDecodeInstruction - Emit the templated helper function 2381 // decodeInstruction(). 2382 static void emitDecodeInstruction(formatted_raw_ostream &OS, 2383 bool IsVarLenInst) { 2384 OS << "template <typename InsnType>\n" 2385 << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], " 2386 "MCInst &MI,\n" 2387 << " InsnType insn, uint64_t " 2388 "Address,\n" 2389 << " const MCDisassembler *DisAsm,\n" 2390 << " const MCSubtargetInfo &STI"; 2391 if (IsVarLenInst) { 2392 OS << ",\n" 2393 << " llvm::function_ref<void(APInt " 2394 "&," 2395 << " uint64_t)> makeUp"; 2396 } 2397 OS << ") {\n" 2398 << " const FeatureBitset &Bits = STI.getFeatureBits();\n" 2399 << "\n" 2400 << " const uint8_t *Ptr = DecodeTable;\n" 2401 << " uint64_t CurFieldValue = 0;\n" 2402 << " DecodeStatus S = MCDisassembler::Success;\n" 2403 << " while (true) {\n" 2404 << " ptrdiff_t Loc = Ptr - DecodeTable;\n" 2405 << " switch (*Ptr) {\n" 2406 << " default:\n" 2407 << " errs() << Loc << \": Unexpected decode table opcode!\\n\";\n" 2408 << " return MCDisassembler::Fail;\n" 2409 << " case MCD::OPC_ExtractField: {\n" 2410 << " unsigned Start = *++Ptr;\n" 2411 << " unsigned Len = *++Ptr;\n" 2412 << " ++Ptr;\n"; 2413 if (IsVarLenInst) 2414 OS << " makeUp(insn, Start + Len);\n"; 2415 OS << " CurFieldValue = fieldFromInstruction(insn, Start, Len);\n" 2416 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << " 2417 "\", \"\n" 2418 << " << Len << \"): \" << CurFieldValue << \"\\n\");\n" 2419 << " break;\n" 2420 << " }\n" 2421 << " case MCD::OPC_FilterValue: {\n" 2422 << " // Decode the field value.\n" 2423 << " unsigned Len;\n" 2424 << " uint64_t Val = decodeULEB128(++Ptr, &Len);\n" 2425 << " Ptr += Len;\n" 2426 << " // NumToSkip is a plain 24-bit integer.\n" 2427 << " unsigned NumToSkip = *Ptr++;\n" 2428 << " NumToSkip |= (*Ptr++) << 8;\n" 2429 << " NumToSkip |= (*Ptr++) << 16;\n" 2430 << "\n" 2431 << " // Perform the filter operation.\n" 2432 << " if (Val != CurFieldValue)\n" 2433 << " Ptr += NumToSkip;\n" 2434 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << " 2435 "\", \" << NumToSkip\n" 2436 << " << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" " 2437 ": \"PASS:\")\n" 2438 << " << \" continuing at \" << (Ptr - DecodeTable) << " 2439 "\"\\n\");\n" 2440 << "\n" 2441 << " break;\n" 2442 << " }\n" 2443 << " case MCD::OPC_CheckField: {\n" 2444 << " unsigned Start = *++Ptr;\n" 2445 << " unsigned Len = *++Ptr;\n"; 2446 if (IsVarLenInst) 2447 OS << " makeUp(insn, Start + Len);\n"; 2448 OS << " uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);\n" 2449 << " // Decode the field value.\n" 2450 << " unsigned PtrLen = 0;\n" 2451 << " uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);\n" 2452 << " Ptr += PtrLen;\n" 2453 << " // NumToSkip is a plain 24-bit integer.\n" 2454 << " unsigned NumToSkip = *Ptr++;\n" 2455 << " NumToSkip |= (*Ptr++) << 8;\n" 2456 << " NumToSkip |= (*Ptr++) << 16;\n" 2457 << "\n" 2458 << " // If the actual and expected values don't match, skip.\n" 2459 << " if (ExpectedValue != FieldValue)\n" 2460 << " Ptr += NumToSkip;\n" 2461 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << " 2462 "\", \"\n" 2463 << " << Len << \", \" << ExpectedValue << \", \" << " 2464 "NumToSkip\n" 2465 << " << \"): FieldValue = \" << FieldValue << \", " 2466 "ExpectedValue = \"\n" 2467 << " << ExpectedValue << \": \"\n" 2468 << " << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : " 2469 "\"FAIL\\n\"));\n" 2470 << " break;\n" 2471 << " }\n" 2472 << " case MCD::OPC_CheckPredicate: {\n" 2473 << " unsigned Len;\n" 2474 << " // Decode the Predicate Index value.\n" 2475 << " unsigned PIdx = decodeULEB128(++Ptr, &Len);\n" 2476 << " Ptr += Len;\n" 2477 << " // NumToSkip is a plain 24-bit integer.\n" 2478 << " unsigned NumToSkip = *Ptr++;\n" 2479 << " NumToSkip |= (*Ptr++) << 8;\n" 2480 << " NumToSkip |= (*Ptr++) << 16;\n" 2481 << " // Check the predicate.\n" 2482 << " bool Pred;\n" 2483 << " if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n" 2484 << " Ptr += NumToSkip;\n" 2485 << " (void)Pred;\n" 2486 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx " 2487 "<< \"): \"\n" 2488 << " << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n" 2489 << "\n" 2490 << " break;\n" 2491 << " }\n" 2492 << " case MCD::OPC_Decode: {\n" 2493 << " unsigned Len;\n" 2494 << " // Decode the Opcode value.\n" 2495 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2496 << " Ptr += Len;\n" 2497 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2498 << " Ptr += Len;\n" 2499 << "\n" 2500 << " MI.clear();\n" 2501 << " MI.setOpcode(Opc);\n" 2502 << " bool DecodeComplete;\n"; 2503 if (IsVarLenInst) { 2504 OS << " Len = InstrLenTable[Opc];\n" 2505 << " makeUp(insn, Len);\n"; 2506 } 2507 OS << " S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, " 2508 "DecodeComplete);\n" 2509 << " assert(DecodeComplete);\n" 2510 << "\n" 2511 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n" 2512 << " << \", using decoder \" << DecodeIdx << \": \"\n" 2513 << " << (S != MCDisassembler::Fail ? \"PASS\" : " 2514 "\"FAIL\") << \"\\n\");\n" 2515 << " return S;\n" 2516 << " }\n" 2517 << " case MCD::OPC_TryDecode: {\n" 2518 << " unsigned Len;\n" 2519 << " // Decode the Opcode value.\n" 2520 << " unsigned Opc = decodeULEB128(++Ptr, &Len);\n" 2521 << " Ptr += Len;\n" 2522 << " unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n" 2523 << " Ptr += Len;\n" 2524 << " // NumToSkip is a plain 24-bit integer.\n" 2525 << " unsigned NumToSkip = *Ptr++;\n" 2526 << " NumToSkip |= (*Ptr++) << 8;\n" 2527 << " NumToSkip |= (*Ptr++) << 16;\n" 2528 << "\n" 2529 << " // Perform the decode operation.\n" 2530 << " MCInst TmpMI;\n" 2531 << " TmpMI.setOpcode(Opc);\n" 2532 << " bool DecodeComplete;\n" 2533 << " S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, " 2534 "DecodeComplete);\n" 2535 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << " 2536 "Opc\n" 2537 << " << \", using decoder \" << DecodeIdx << \": \");\n" 2538 << "\n" 2539 << " if (DecodeComplete) {\n" 2540 << " // Decoding complete.\n" 2541 << " LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : " 2542 "\"FAIL\") << \"\\n\");\n" 2543 << " MI = TmpMI;\n" 2544 << " return S;\n" 2545 << " } else {\n" 2546 << " assert(S == MCDisassembler::Fail);\n" 2547 << " // If the decoding was incomplete, skip.\n" 2548 << " Ptr += NumToSkip;\n" 2549 << " LLVM_DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - " 2550 "DecodeTable) << \"\\n\");\n" 2551 << " // Reset decode status. This also drops a SoftFail status " 2552 "that could be\n" 2553 << " // set before the decode attempt.\n" 2554 << " S = MCDisassembler::Success;\n" 2555 << " }\n" 2556 << " break;\n" 2557 << " }\n" 2558 << " case MCD::OPC_SoftFail: {\n" 2559 << " // Decode the mask values.\n" 2560 << " unsigned Len;\n" 2561 << " uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);\n" 2562 << " Ptr += Len;\n" 2563 << " uint64_t NegativeMask = decodeULEB128(Ptr, &Len);\n" 2564 << " Ptr += Len;\n" 2565 << " bool Fail = (insn & PositiveMask) != 0 || (~insn & " 2566 "NegativeMask) != 0;\n" 2567 << " if (Fail)\n" 2568 << " S = MCDisassembler::SoftFail;\n" 2569 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? " 2570 "\"FAIL\\n\" : \"PASS\\n\"));\n" 2571 << " break;\n" 2572 << " }\n" 2573 << " case MCD::OPC_Fail: {\n" 2574 << " LLVM_DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n" 2575 << " return MCDisassembler::Fail;\n" 2576 << " }\n" 2577 << " }\n" 2578 << " }\n" 2579 << " llvm_unreachable(\"bogosity detected in disassembler state " 2580 "machine!\");\n" 2581 << "}\n\n"; 2582 } 2583 2584 // Helper to propagate SoftFail status. Returns false if the status is Fail; 2585 // callers are expected to early-exit in that condition. (Note, the '&' operator 2586 // is correct to propagate the values of this enum; see comment on 'enum 2587 // DecodeStatus'.) 2588 static void emitCheck(formatted_raw_ostream &OS) { 2589 OS << "static bool Check(DecodeStatus &Out, DecodeStatus In) {\n" 2590 << " Out = static_cast<DecodeStatus>(Out & In);\n" 2591 << " return Out != MCDisassembler::Fail;\n" 2592 << "}\n\n"; 2593 } 2594 2595 // Emits disassembler code for instruction decoding. 2596 void DecoderEmitter::run(raw_ostream &o) { 2597 formatted_raw_ostream OS(o); 2598 OS << "#include \"llvm/MC/MCInst.h\"\n"; 2599 OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n"; 2600 OS << "#include \"llvm/MC/SubtargetFeature.h\"\n"; 2601 OS << "#include \"llvm/Support/DataTypes.h\"\n"; 2602 OS << "#include \"llvm/Support/Debug.h\"\n"; 2603 OS << "#include \"llvm/Support/LEB128.h\"\n"; 2604 OS << "#include \"llvm/Support/raw_ostream.h\"\n"; 2605 OS << "#include <assert.h>\n"; 2606 OS << '\n'; 2607 OS << "namespace llvm {\n\n"; 2608 2609 emitFieldFromInstruction(OS); 2610 emitInsertBits(OS); 2611 emitCheck(OS); 2612 2613 Target.reverseBitsForLittleEndianEncoding(); 2614 2615 // Parameterize the decoders based on namespace and instruction width. 2616 std::set<StringRef> HwModeNames; 2617 const auto &NumberedInstructions = Target.getInstructionsByEnumValue(); 2618 NumberedEncodings.reserve(NumberedInstructions.size()); 2619 DenseMap<Record *, unsigned> IndexOfInstruction; 2620 // First, collect all HwModes referenced by the target. 2621 for (const auto &NumberedInstruction : NumberedInstructions) { 2622 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2623 2624 if (const RecordVal *RV = 2625 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2626 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2627 const CodeGenHwModes &HWM = Target.getHwModes(); 2628 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2629 for (auto &KV : EBM) 2630 HwModeNames.insert(HWM.getMode(KV.first).Name); 2631 } 2632 } 2633 } 2634 2635 // If HwModeNames is empty, add the empty string so we always have one HwMode. 2636 if (HwModeNames.empty()) 2637 HwModeNames.insert(""); 2638 2639 for (const auto &NumberedInstruction : NumberedInstructions) { 2640 IndexOfInstruction[NumberedInstruction->TheDef] = NumberedEncodings.size(); 2641 2642 if (const RecordVal *RV = 2643 NumberedInstruction->TheDef->getValue("EncodingInfos")) { 2644 if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) { 2645 const CodeGenHwModes &HWM = Target.getHwModes(); 2646 EncodingInfoByHwMode EBM(DI->getDef(), HWM); 2647 for (auto &KV : EBM) { 2648 NumberedEncodings.emplace_back(KV.second, NumberedInstruction, 2649 HWM.getMode(KV.first).Name); 2650 HwModeNames.insert(HWM.getMode(KV.first).Name); 2651 } 2652 continue; 2653 } 2654 } 2655 // This instruction is encoded the same on all HwModes. Emit it for all 2656 // HwModes. 2657 for (StringRef HwModeName : HwModeNames) 2658 NumberedEncodings.emplace_back(NumberedInstruction->TheDef, 2659 NumberedInstruction, HwModeName); 2660 } 2661 for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) 2662 NumberedEncodings.emplace_back( 2663 NumberedAlias, 2664 &Target.getInstruction(NumberedAlias->getValueAsDef("AliasOf"))); 2665 2666 std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>> 2667 OpcMap; 2668 std::map<unsigned, std::vector<OperandInfo>> Operands; 2669 std::vector<unsigned> InstrLen; 2670 2671 bool IsVarLenInst = 2672 any_of(NumberedInstructions, [](const CodeGenInstruction *CGI) { 2673 RecordVal *RV = CGI->TheDef->getValue("Inst"); 2674 return RV && isa<DagInit>(RV->getValue()); 2675 }); 2676 unsigned MaxInstLen = 0; 2677 2678 for (unsigned i = 0; i < NumberedEncodings.size(); ++i) { 2679 const Record *EncodingDef = NumberedEncodings[i].EncodingDef; 2680 const CodeGenInstruction *Inst = NumberedEncodings[i].Inst; 2681 const Record *Def = Inst->TheDef; 2682 unsigned Size = EncodingDef->getValueAsInt("Size"); 2683 if (Def->getValueAsString("Namespace") == "TargetOpcode" || 2684 Def->getValueAsBit("isPseudo") || 2685 Def->getValueAsBit("isAsmParserOnly") || 2686 Def->getValueAsBit("isCodeGenOnly")) { 2687 NumEncodingsLackingDisasm++; 2688 continue; 2689 } 2690 2691 if (i < NumberedInstructions.size()) 2692 NumInstructions++; 2693 NumEncodings++; 2694 2695 if (!Size && !IsVarLenInst) 2696 continue; 2697 2698 if (IsVarLenInst) 2699 InstrLen.resize(NumberedInstructions.size(), 0); 2700 2701 if (unsigned Len = populateInstruction(Target, *EncodingDef, *Inst, i, 2702 Operands, IsVarLenInst)) { 2703 if (IsVarLenInst) { 2704 MaxInstLen = std::max(MaxInstLen, Len); 2705 InstrLen[i] = Len; 2706 } 2707 std::string DecoderNamespace = 2708 std::string(EncodingDef->getValueAsString("DecoderNamespace")); 2709 if (!NumberedEncodings[i].HwModeName.empty()) 2710 DecoderNamespace += 2711 std::string("_") + NumberedEncodings[i].HwModeName.str(); 2712 OpcMap[std::make_pair(DecoderNamespace, Size)].emplace_back( 2713 i, IndexOfInstruction.find(Def)->second); 2714 } else { 2715 NumEncodingsOmitted++; 2716 } 2717 } 2718 2719 DecoderTableInfo TableInfo; 2720 for (const auto &Opc : OpcMap) { 2721 // Emit the decoder for this namespace+width combination. 2722 ArrayRef<EncodingAndInst> NumberedEncodingsRef( 2723 NumberedEncodings.data(), NumberedEncodings.size()); 2724 FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands, 2725 IsVarLenInst ? MaxInstLen : 8 * Opc.first.second, this); 2726 2727 // The decode table is cleared for each top level decoder function. The 2728 // predicates and decoders themselves, however, are shared across all 2729 // decoders to give more opportunities for uniqueing. 2730 TableInfo.Table.clear(); 2731 TableInfo.FixupStack.clear(); 2732 TableInfo.Table.reserve(16384); 2733 TableInfo.FixupStack.emplace_back(); 2734 FC.emitTableEntries(TableInfo); 2735 // Any NumToSkip fixups in the top level scope can resolve to the 2736 // OPC_Fail at the end of the table. 2737 assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!"); 2738 // Resolve any NumToSkip fixups in the current scope. 2739 resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(), 2740 TableInfo.Table.size()); 2741 TableInfo.FixupStack.clear(); 2742 2743 TableInfo.Table.push_back(MCD::OPC_Fail); 2744 2745 // Print the table to the output stream. 2746 emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), Opc.first.first); 2747 } 2748 2749 // For variable instruction, we emit a instruction length table 2750 // to let the decoder know how long the instructions are. 2751 // You can see example usage in M68k's disassembler. 2752 if (IsVarLenInst) 2753 emitInstrLenTable(OS, InstrLen); 2754 // Emit the predicate function. 2755 emitPredicateFunction(OS, TableInfo.Predicates, 0); 2756 2757 // Emit the decoder function. 2758 emitDecoderFunction(OS, TableInfo.Decoders, 0); 2759 2760 // Emit the main entry point for the decoder, decodeInstruction(). 2761 emitDecodeInstruction(OS, IsVarLenInst); 2762 2763 OS << "\n} // end namespace llvm\n"; 2764 } 2765 2766 namespace llvm { 2767 2768 void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, 2769 const std::string &PredicateNamespace) { 2770 DecoderEmitter(RK, PredicateNamespace).run(OS); 2771 } 2772 2773 } // end namespace llvm 2774