1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting the memory fold tables of 10 // the X86 backend instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenInstruction.h" 15 #include "CodeGenTarget.h" 16 #include "X86RecognizableInstr.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Support/FormattedStream.h" 19 #include "llvm/Support/X86FoldTablesUtils.h" 20 #include "llvm/TableGen/Record.h" 21 #include "llvm/TableGen/TableGenBackend.h" 22 #include <set> 23 24 using namespace llvm; 25 using namespace X86Disassembler; 26 27 namespace { 28 // Represents an entry in the manual mapped instructions set. 29 struct ManualMapEntry { 30 const char *RegInstStr; 31 const char *MemInstStr; 32 uint16_t Strategy; 33 }; 34 35 // List of instructions requiring explicitly aligned memory. 36 const char *ExplicitAlign[] = { 37 "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", 38 "MOVNTDQ", "MOVNTDQA", "SHA1MSG1", "SHA1MSG2", "SHA1NEXTE", 39 "SHA1RNDS4", "SHA256MSG1", "SHA256MSG2", "SHA256RNDS2"}; 40 41 // List of instructions NOT requiring explicit memory alignment. 42 const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", 43 "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", 44 "PCMPISTRI"}; 45 46 const ManualMapEntry ManualMapSet[] = { 47 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS}, 48 #include "X86ManualFoldTables.def" 49 }; 50 51 const std::set<StringRef> NoFoldSet = { 52 #define NOFOLD(INSN) #INSN, 53 #include "X86ManualFoldTables.def" 54 }; 55 56 static bool isExplicitAlign(const CodeGenInstruction *Inst) { 57 return any_of(ExplicitAlign, [Inst](const char *InstStr) { 58 return Inst->TheDef->getName().contains(InstStr); 59 }); 60 } 61 62 static bool isExplicitUnalign(const CodeGenInstruction *Inst) { 63 return any_of(ExplicitUnalign, [Inst](const char *InstStr) { 64 return Inst->TheDef->getName().contains(InstStr); 65 }); 66 } 67 68 class X86FoldTablesEmitter { 69 RecordKeeper &Records; 70 CodeGenTarget Target; 71 72 // Represents an entry in the folding table 73 class X86FoldTableEntry { 74 const CodeGenInstruction *RegInst; 75 const CodeGenInstruction *MemInst; 76 77 public: 78 bool NoReverse = false; 79 bool NoForward = false; 80 bool FoldLoad = false; 81 bool FoldStore = false; 82 enum BcastType { 83 BCAST_NONE, 84 BCAST_W, 85 BCAST_D, 86 BCAST_Q, 87 BCAST_SS, 88 BCAST_SD, 89 BCAST_SH, 90 }; 91 BcastType BroadcastKind = BCAST_NONE; 92 93 Align Alignment; 94 95 X86FoldTableEntry() = default; 96 X86FoldTableEntry(const CodeGenInstruction *RegInst, 97 const CodeGenInstruction *MemInst) 98 : RegInst(RegInst), MemInst(MemInst) {} 99 100 void print(formatted_raw_ostream &OS) const { 101 OS.indent(2); 102 OS << "{X86::" << RegInst->TheDef->getName() << ", "; 103 OS << "X86::" << MemInst->TheDef->getName() << ", "; 104 105 std::string Attrs; 106 if (FoldLoad) 107 Attrs += "TB_FOLDED_LOAD|"; 108 if (FoldStore) 109 Attrs += "TB_FOLDED_STORE|"; 110 if (NoReverse) 111 Attrs += "TB_NO_REVERSE|"; 112 if (NoForward) 113 Attrs += "TB_NO_FORWARD|"; 114 if (Alignment != Align(1)) 115 Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|"; 116 switch (BroadcastKind) { 117 case BCAST_NONE: 118 break; 119 case BCAST_W: 120 Attrs += "TB_BCAST_W|"; 121 break; 122 case BCAST_D: 123 Attrs += "TB_BCAST_D|"; 124 break; 125 case BCAST_Q: 126 Attrs += "TB_BCAST_Q|"; 127 break; 128 case BCAST_SS: 129 Attrs += "TB_BCAST_SS|"; 130 break; 131 case BCAST_SD: 132 Attrs += "TB_BCAST_SD|"; 133 break; 134 case BCAST_SH: 135 Attrs += "TB_BCAST_SH|"; 136 break; 137 } 138 139 StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|"); 140 if (SimplifiedAttrs.empty()) 141 SimplifiedAttrs = "0"; 142 143 OS << SimplifiedAttrs << "},\n"; 144 } 145 146 #ifndef NDEBUG 147 // Check that Uses and Defs are same after memory fold. 148 void checkCorrectness() const { 149 auto &RegInstRec = *RegInst->TheDef; 150 auto &MemInstRec = *MemInst->TheDef; 151 auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses"); 152 auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses"); 153 auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs"); 154 auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs"); 155 if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem) 156 report_fatal_error("Uses/Defs couldn't be changed after folding " + 157 RegInstRec.getName() + " to " + 158 MemInstRec.getName()); 159 } 160 #endif 161 }; 162 163 // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the 164 // enum of the instruction, which is computed in 165 // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator 166 // here. 167 // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum? 168 struct CompareInstrsByEnum { 169 bool operator()(const CodeGenInstruction *LHS, 170 const CodeGenInstruction *RHS) const { 171 assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); 172 const auto &D1 = *LHS->TheDef; 173 const auto &D2 = *RHS->TheDef; 174 return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < 175 std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); 176 } 177 }; 178 179 typedef std::map<const CodeGenInstruction *, X86FoldTableEntry, 180 CompareInstrsByEnum> 181 FoldTable; 182 // Table2Addr - Holds instructions which their memory form performs 183 // load+store. 184 // 185 // Table#i - Holds instructions which the their memory form 186 // performs a load OR a store, and their #i'th operand is folded. 187 // 188 // BroadcastTable#i - Holds instructions which the their memory form performs 189 // a broadcast load and their #i'th operand is folded. 190 FoldTable Table2Addr; 191 FoldTable Table0; 192 FoldTable Table1; 193 FoldTable Table2; 194 FoldTable Table3; 195 FoldTable Table4; 196 FoldTable BroadcastTable1; 197 FoldTable BroadcastTable2; 198 FoldTable BroadcastTable3; 199 FoldTable BroadcastTable4; 200 201 public: 202 X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} 203 204 // run - Generate the 6 X86 memory fold tables. 205 void run(raw_ostream &OS); 206 207 private: 208 // Decides to which table to add the entry with the given instructions. 209 // S sets the strategy of adding the TB_NO_REVERSE flag. 210 void updateTables(const CodeGenInstruction *RegInst, 211 const CodeGenInstruction *MemInst, uint16_t S = 0, 212 bool IsManual = false, bool IsBroadcast = false); 213 214 // Generates X86FoldTableEntry with the given instructions and fill it with 215 // the appropriate flags, then adds it to a memory fold table. 216 void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst, 217 const CodeGenInstruction *MemInst, uint16_t S, 218 unsigned FoldedIdx, bool IsManual); 219 // Generates X86FoldTableEntry with the given instructions and adds it to a 220 // broadcast table. 221 void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst, 222 const CodeGenInstruction *MemInst); 223 224 // Print the given table as a static const C++ array of type 225 // X86FoldTableEntry. 226 void printTable(const FoldTable &Table, StringRef TableName, 227 formatted_raw_ostream &OS) { 228 OS << "static const X86FoldTableEntry " << TableName << "[] = {\n"; 229 230 for (auto &E : Table) 231 E.second.print(OS); 232 233 OS << "};\n\n"; 234 } 235 }; 236 237 // Return true if one of the instruction's operands is a RST register class 238 static bool hasRSTRegClass(const CodeGenInstruction *Inst) { 239 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 240 return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi"; 241 }); 242 } 243 244 // Return true if one of the instruction's operands is a ptr_rc_tailcall 245 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) { 246 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 247 return OpIn.Rec->getName() == "ptr_rc_tailcall"; 248 }); 249 } 250 251 static uint8_t byteFromBitsInit(const BitsInit *B) { 252 unsigned N = B->getNumBits(); 253 assert(N <= 8 && "Field is too large for uint8_t!"); 254 255 uint8_t Value = 0; 256 for (unsigned I = 0; I != N; ++I) { 257 BitInit *Bit = cast<BitInit>(B->getBit(I)); 258 Value |= Bit->getValue() << I; 259 } 260 return Value; 261 } 262 263 static bool mayFoldFromForm(uint8_t Form) { 264 switch (Form) { 265 default: 266 return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r; 267 case X86Local::MRMXr: 268 case X86Local::MRMXrCC: 269 case X86Local::MRMDestReg: 270 case X86Local::MRMSrcReg: 271 case X86Local::MRMSrcReg4VOp3: 272 case X86Local::MRMSrcRegOp4: 273 case X86Local::MRMSrcRegCC: 274 return true; 275 } 276 } 277 278 static bool mayFoldToForm(uint8_t Form) { 279 switch (Form) { 280 default: 281 return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m; 282 case X86Local::MRMXm: 283 case X86Local::MRMXmCC: 284 case X86Local::MRMDestMem: 285 case X86Local::MRMSrcMem: 286 case X86Local::MRMSrcMem4VOp3: 287 case X86Local::MRMSrcMemOp4: 288 case X86Local::MRMSrcMemCC: 289 return true; 290 } 291 } 292 293 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) { 294 switch (LHS) { 295 default: 296 llvm_unreachable("Unexpected Form!"); 297 case X86Local::MRM0r: 298 return RHS == X86Local::MRM0m; 299 case X86Local::MRM1r: 300 return RHS == X86Local::MRM1m; 301 case X86Local::MRM2r: 302 return RHS == X86Local::MRM2m; 303 case X86Local::MRM3r: 304 return RHS == X86Local::MRM3m; 305 case X86Local::MRM4r: 306 return RHS == X86Local::MRM4m; 307 case X86Local::MRM5r: 308 return RHS == X86Local::MRM5m; 309 case X86Local::MRM6r: 310 return RHS == X86Local::MRM6m; 311 case X86Local::MRM7r: 312 return RHS == X86Local::MRM7m; 313 case X86Local::MRMXr: 314 return RHS == X86Local::MRMXm; 315 case X86Local::MRMXrCC: 316 return RHS == X86Local::MRMXmCC; 317 case X86Local::MRMDestReg: 318 return RHS == X86Local::MRMDestMem; 319 case X86Local::MRMSrcReg: 320 return RHS == X86Local::MRMSrcMem; 321 case X86Local::MRMSrcReg4VOp3: 322 return RHS == X86Local::MRMSrcMem4VOp3; 323 case X86Local::MRMSrcRegOp4: 324 return RHS == X86Local::MRMSrcMemOp4; 325 case X86Local::MRMSrcRegCC: 326 return RHS == X86Local::MRMSrcMemCC; 327 } 328 } 329 330 static bool isNOREXRegClass(const Record *Op) { 331 return Op->getName().contains("_NOREX"); 332 } 333 334 // Function object - Operator() returns true if the given Reg instruction 335 // matches the Mem instruction of this object. 336 class IsMatch { 337 const CodeGenInstruction *MemInst; 338 const X86Disassembler::RecognizableInstrBase MemRI; 339 bool IsBroadcast; 340 const unsigned Variant; 341 342 public: 343 IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V) 344 : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {} 345 346 bool operator()(const CodeGenInstruction *RegInst) { 347 X86Disassembler::RecognizableInstrBase RegRI(*RegInst); 348 const Record *RegRec = RegInst->TheDef; 349 const Record *MemRec = MemInst->TheDef; 350 351 // EVEX_B means different things for memory and register forms. 352 // register form: rounding control or SAE 353 // memory form: broadcast 354 if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) 355 return false; 356 // EVEX_B indicates NDD for MAP4 instructions 357 if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) && 358 RegRI.OpMap != X86Local::T_MAP4) 359 return false; 360 361 if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) 362 return false; 363 364 // X86 encoding is crazy, e.g 365 // 366 // f3 0f c7 30 vmxon (%rax) 367 // f3 0f c7 f0 senduipi %rax 368 // 369 // This two instruction have similiar encoding fields but are unrelated 370 if (X86Disassembler::getMnemonic(MemInst, Variant) != 371 X86Disassembler::getMnemonic(RegInst, Variant)) 372 return false; 373 374 // Return false if any of the following fields of does not match. 375 if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, 376 RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, 377 RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, 378 RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, 379 RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, 380 RegRec->getValueAsBit("hasEVEX_RC"), 381 RegRec->getValueAsBit("hasLockPrefix"), 382 RegRec->getValueAsBit("hasNoTrackPrefix")) != 383 std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, 384 MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, 385 MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, 386 MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, 387 MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, 388 MemRec->getValueAsBit("hasEVEX_RC"), 389 MemRec->getValueAsBit("hasLockPrefix"), 390 MemRec->getValueAsBit("hasNoTrackPrefix"))) 391 return false; 392 393 // Make sure the sizes of the operands of both instructions suit each other. 394 // This is needed for instructions with intrinsic version (_Int). 395 // Where the only difference is the size of the operands. 396 // For example: VUCOMISDZrm and VUCOMISDrm_Int 397 // Also for instructions that their EVEX version was upgraded to work with 398 // k-registers. For example VPCMPEQBrm (xmm output register) and 399 // VPCMPEQBZ128rm (k register output register). 400 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 401 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 402 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 403 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 404 405 // Instructions with one output in their memory form use the memory folded 406 // operand as source and destination (Read-Modify-Write). 407 unsigned RegStartIdx = 408 (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0; 409 410 bool FoundFoldedOp = false; 411 for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) { 412 Record *MemOpRec = MemInst->Operands[I].Rec; 413 Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec; 414 415 if (MemOpRec == RegOpRec) 416 continue; 417 418 if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) && 419 ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) || 420 (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec)))) 421 return false; 422 423 if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) && 424 (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))) 425 return false; 426 427 if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) && 428 (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))) 429 return false; 430 431 // Only one operand can be folded. 432 if (FoundFoldedOp) 433 return false; 434 435 assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)); 436 FoundFoldedOp = true; 437 } 438 439 return FoundFoldedOp; 440 } 441 }; 442 443 } // end anonymous namespace 444 445 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, 446 const CodeGenInstruction *RegInst, 447 const CodeGenInstruction *MemInst, 448 uint16_t S, unsigned FoldedIdx, 449 bool IsManual) { 450 451 assert((IsManual || Table.find(RegInst) == Table.end()) && 452 "Override entry unexpectedly"); 453 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 454 Record *RegRec = RegInst->TheDef; 455 Record *MemRec = MemInst->TheDef; 456 457 Result.NoReverse = S & TB_NO_REVERSE; 458 Result.NoForward = S & TB_NO_FORWARD; 459 Result.FoldLoad = S & TB_FOLDED_LOAD; 460 Result.FoldStore = S & TB_FOLDED_STORE; 461 Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)); 462 if (IsManual) { 463 Table[RegInst] = Result; 464 return; 465 } 466 467 // Only table0 entries should explicitly specify a load or store flag. 468 if (&Table == &Table0) { 469 unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 470 unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 471 // If the instruction writes to the folded operand, it will appear as an 472 // output in the register form instruction and as an input in the memory 473 // form instruction. 474 // If the instruction reads from the folded operand, it well appear as in 475 // input in both forms. 476 if (MemInOpsNum == RegInOpsNum) 477 Result.FoldLoad = true; 478 else 479 Result.FoldStore = true; 480 } 481 482 Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec; 483 Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec; 484 485 // Unfolding code generates a load/store instruction according to the size of 486 // the register in the register form instruction. 487 // If the register's size is greater than the memory's operand size, do not 488 // allow unfolding. 489 490 // the unfolded load size will be based on the register size. If that’s bigger 491 // than the memory operand size, the unfolded load will load more memory and 492 // potentially cause a memory fault. 493 if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) 494 Result.NoReverse = true; 495 496 // Check no-kz version's isMoveReg 497 StringRef RegInstName = RegRec->getName(); 498 unsigned DropLen = 499 RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0); 500 Record *BaseDef = 501 DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr; 502 bool IsMoveReg = 503 BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg; 504 // A masked load can not be unfolded to a full load, otherwise it would access 505 // unexpected memory. A simple store can not be unfolded. 506 if (IsMoveReg && (BaseDef || Result.FoldStore)) 507 Result.NoReverse = true; 508 509 uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); 510 if (isExplicitAlign(RegInst)) { 511 // The instruction require explicitly aligned memory. 512 BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize"); 513 Result.Alignment = Align(byteFromBitsInit(VectSize)); 514 } else if (!Enc && !isExplicitUnalign(RegInst) && 515 getMemOperandSize(MemOpRec) > 64) { 516 // Instructions with XOP/VEX/EVEX encoding do not require alignment while 517 // SSE packed vector instructions require a 16 byte alignment. 518 Result.Alignment = Align(16); 519 } 520 // Expand is only ever created as a masked instruction. It is not safe to 521 // unfold a masked expand because we don't know if it came from an expand load 522 // intrinsic or folding a plain load. If it is from a expand load intrinsic, 523 // Unfolding to plain load would read more elements and could trigger a fault. 524 if (RegRec->getName().contains("EXPAND")) 525 Result.NoReverse = true; 526 527 Table[RegInst] = Result; 528 } 529 530 void X86FoldTablesEmitter::addBroadcastEntry( 531 FoldTable &Table, const CodeGenInstruction *RegInst, 532 const CodeGenInstruction *MemInst) { 533 534 assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly"); 535 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 536 537 DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList"); 538 for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) { 539 Result.BroadcastKind = 540 StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString()) 541 .Case("i16mem", X86FoldTableEntry::BCAST_W) 542 .Case("i32mem", X86FoldTableEntry::BCAST_D) 543 .Case("i64mem", X86FoldTableEntry::BCAST_Q) 544 .Case("f16mem", X86FoldTableEntry::BCAST_SH) 545 .Case("f32mem", X86FoldTableEntry::BCAST_SS) 546 .Case("f64mem", X86FoldTableEntry::BCAST_SD) 547 .Default(X86FoldTableEntry::BCAST_NONE); 548 if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE) 549 break; 550 } 551 assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE && 552 "Unknown memory operand for broadcast"); 553 554 Table[RegInst] = Result; 555 } 556 557 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst, 558 const CodeGenInstruction *MemInst, 559 uint16_t S, bool IsManual, 560 bool IsBroadcast) { 561 562 Record *RegRec = RegInst->TheDef; 563 Record *MemRec = MemInst->TheDef; 564 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 565 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 566 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 567 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 568 569 // Instructions which Read-Modify-Write should be added to Table2Addr. 570 if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) { 571 assert(!IsBroadcast && "Read-Modify-Write can not be broadcast"); 572 // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE. 573 addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0, 574 IsManual); 575 return; 576 } 577 578 if (MemInSize == RegInSize && MemOutSize == RegOutSize) { 579 // Load-Folding cases. 580 // If the i'th register form operand is a register and the i'th memory form 581 // operand is a memory operand, add instructions to Table#i. 582 for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) { 583 Record *RegOpRec = RegInst->Operands[I].Rec; 584 Record *MemOpRec = MemInst->Operands[I].Rec; 585 // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, 586 // TAILJMPr64_REX 587 if ((isRegisterOperand(RegOpRec) || 588 RegOpRec->isSubClassOf("PointerLikeRegClass")) && 589 isMemoryOperand(MemOpRec)) { 590 switch (I) { 591 case 0: 592 assert(!IsBroadcast && "BroadcastTable0 needs to be added"); 593 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 594 return; 595 case 1: 596 IsBroadcast 597 ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst) 598 : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual); 599 return; 600 case 2: 601 IsBroadcast 602 ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst) 603 : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual); 604 return; 605 case 3: 606 IsBroadcast 607 ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst) 608 : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual); 609 return; 610 case 4: 611 IsBroadcast 612 ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst) 613 : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual); 614 return; 615 } 616 } 617 } 618 } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) { 619 // Store-Folding cases. 620 // If the memory form instruction performs a store, the *output* 621 // register of the register form instructions disappear and instead a 622 // memory *input* operand appears in the memory form instruction. 623 // For example: 624 // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src) 625 // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src) 626 Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec; 627 Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec; 628 if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) && 629 getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) { 630 assert(!IsBroadcast && "Store can not be broadcast"); 631 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 632 } 633 } 634 } 635 636 void X86FoldTablesEmitter::run(raw_ostream &O) { 637 formatted_raw_ostream OS(O); 638 639 // Holds all memory instructions 640 std::vector<const CodeGenInstruction *> MemInsts; 641 // Holds all register instructions - divided according to opcode. 642 std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts; 643 644 ArrayRef<const CodeGenInstruction *> NumberedInstructions = 645 Target.getInstructionsByEnumValue(); 646 647 for (const CodeGenInstruction *Inst : NumberedInstructions) { 648 const Record *Rec = Inst->TheDef; 649 if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly")) 650 continue; 651 652 if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) 653 continue; 654 655 // Promoted legacy instruction is in EVEX space, and has REX2-encoding 656 // alternative. It's added due to HW design and never emitted by compiler. 657 if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == 658 X86Local::T_MAP4 && 659 byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == 660 X86Local::ExplicitEVEX) 661 continue; 662 663 // - Instructions including RST register class operands are not relevant 664 // for memory folding (for further details check the explanation in 665 // lib/Target/X86/X86InstrFPStack.td file). 666 // - Some instructions (listed in the manual map above) use the register 667 // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure 668 // safe mapping of these instruction we manually map them and exclude 669 // them from the automation. 670 if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst)) 671 continue; 672 673 // Add all the memory form instructions to MemInsts, and all the register 674 // form instructions to RegInsts[Opc], where Opc is the opcode of each 675 // instructions. this helps reducing the runtime of the backend. 676 const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits"); 677 uint8_t Form = byteFromBitsInit(FormBits); 678 if (mayFoldToForm(Form)) 679 MemInsts.push_back(Inst); 680 else if (mayFoldFromForm(Form)) { 681 uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode")); 682 RegInsts[Opc].push_back(Inst); 683 } 684 } 685 686 // Create a copy b/c the register instruction will removed when a new entry is 687 // added into memory fold tables. 688 auto RegInstsForBroadcast = RegInsts; 689 690 Record *AsmWriter = Target.getAsmWriter(); 691 unsigned Variant = AsmWriter->getValueAsInt("Variant"); 692 auto FixUp = [&](const CodeGenInstruction *RegInst) { 693 StringRef RegInstName = RegInst->TheDef->getName(); 694 if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt")) 695 if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4))) 696 RegInst = &Target.getInstruction(RegAltRec); 697 return RegInst; 698 }; 699 // For each memory form instruction, try to find its register form 700 // instruction. 701 for (const CodeGenInstruction *MemInst : MemInsts) { 702 uint8_t Opc = 703 byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode")); 704 705 auto RegInstsIt = RegInsts.find(Opc); 706 if (RegInstsIt == RegInsts.end()) 707 continue; 708 709 // Two forms (memory & register) of the same instruction must have the same 710 // opcode. 711 std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second; 712 713 // Memory fold tables 714 auto Match = 715 find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant)); 716 if (Match != OpcRegInsts.end()) { 717 updateTables(FixUp(*Match), MemInst); 718 OpcRegInsts.erase(Match); 719 } 720 721 // Broadcast tables 722 StringRef MemInstName = MemInst->TheDef->getName(); 723 if (!MemInstName.contains("mb") && !MemInstName.contains("mib")) 724 continue; 725 RegInstsIt = RegInstsForBroadcast.find(Opc); 726 assert(RegInstsIt != RegInstsForBroadcast.end() && 727 "Unexpected control flow"); 728 std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast = 729 RegInstsIt->second; 730 Match = find_if(OpcRegInstsForBroadcast, 731 IsMatch(MemInst, /*IsBroadcast=*/true, Variant)); 732 if (Match != OpcRegInstsForBroadcast.end()) { 733 updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false, 734 /*IsBroadcast=*/true); 735 OpcRegInstsForBroadcast.erase(Match); 736 } 737 } 738 739 // Add the manually mapped instructions listed above. 740 for (const ManualMapEntry &Entry : ManualMapSet) { 741 Record *RegInstIter = Records.getDef(Entry.RegInstStr); 742 Record *MemInstIter = Records.getDef(Entry.MemInstStr); 743 744 updateTables(&(Target.getInstruction(RegInstIter)), 745 &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); 746 } 747 748 #ifndef NDEBUG 749 auto CheckMemFoldTable = [](const FoldTable &Table) -> void { 750 for (const auto &Record : Table) { 751 auto &FoldEntry = Record.second; 752 FoldEntry.checkCorrectness(); 753 } 754 }; 755 CheckMemFoldTable(Table2Addr); 756 CheckMemFoldTable(Table0); 757 CheckMemFoldTable(Table1); 758 CheckMemFoldTable(Table2); 759 CheckMemFoldTable(Table3); 760 CheckMemFoldTable(Table4); 761 CheckMemFoldTable(BroadcastTable1); 762 CheckMemFoldTable(BroadcastTable2); 763 CheckMemFoldTable(BroadcastTable3); 764 CheckMemFoldTable(BroadcastTable4); 765 #endif 766 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS); 767 // Print all tables. 768 PRINT_TABLE(Table2Addr) 769 PRINT_TABLE(Table0) 770 PRINT_TABLE(Table1) 771 PRINT_TABLE(Table2) 772 PRINT_TABLE(Table3) 773 PRINT_TABLE(Table4) 774 PRINT_TABLE(BroadcastTable1) 775 PRINT_TABLE(BroadcastTable2) 776 PRINT_TABLE(BroadcastTable3) 777 PRINT_TABLE(BroadcastTable4) 778 } 779 780 static TableGen::Emitter::OptClass<X86FoldTablesEmitter> 781 X("gen-x86-fold-tables", "Generate X86 fold tables"); 782