1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting the memory fold tables of 10 // the X86 backend instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "Common/CodeGenInstruction.h" 15 #include "Common/CodeGenTarget.h" 16 #include "X86RecognizableInstr.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/Support/X86FoldTablesUtils.h" 19 #include "llvm/TableGen/Record.h" 20 #include "llvm/TableGen/TableGenBackend.h" 21 #include <set> 22 23 using namespace llvm; 24 using namespace X86Disassembler; 25 26 namespace { 27 // Represents an entry in the manual mapped instructions set. 28 struct ManualMapEntry { 29 const char *RegInstStr; 30 const char *MemInstStr; 31 uint16_t Strategy; 32 }; 33 34 // List of instructions requiring explicitly aligned memory. 35 const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", 36 "MOVNTPD", "MOVNTDQ", "MOVNTDQA"}; 37 38 // List of instructions NOT requiring explicit memory alignment. 39 const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", 40 "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", 41 "PCMPISTRI"}; 42 43 const ManualMapEntry ManualMapSet[] = { 44 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS}, 45 #include "X86ManualFoldTables.def" 46 }; 47 48 const std::set<StringRef> NoFoldSet = { 49 #define NOFOLD(INSN) #INSN, 50 #include "X86ManualFoldTables.def" 51 }; 52 53 static bool isExplicitAlign(const CodeGenInstruction *Inst) { 54 return any_of(ExplicitAlign, [Inst](const char *InstStr) { 55 return Inst->TheDef->getName().contains(InstStr); 56 }); 57 } 58 59 static bool isExplicitUnalign(const CodeGenInstruction *Inst) { 60 return any_of(ExplicitUnalign, [Inst](const char *InstStr) { 61 return Inst->TheDef->getName().contains(InstStr); 62 }); 63 } 64 65 class X86FoldTablesEmitter { 66 RecordKeeper &Records; 67 CodeGenTarget Target; 68 69 // Represents an entry in the folding table 70 class X86FoldTableEntry { 71 const CodeGenInstruction *RegInst; 72 const CodeGenInstruction *MemInst; 73 74 public: 75 bool NoReverse = false; 76 bool NoForward = false; 77 bool FoldLoad = false; 78 bool FoldStore = false; 79 enum BcastType { 80 BCAST_NONE, 81 BCAST_W, 82 BCAST_D, 83 BCAST_Q, 84 BCAST_SS, 85 BCAST_SD, 86 BCAST_SH, 87 }; 88 BcastType BroadcastKind = BCAST_NONE; 89 90 Align Alignment; 91 92 X86FoldTableEntry() = default; 93 X86FoldTableEntry(const CodeGenInstruction *RegInst, 94 const CodeGenInstruction *MemInst) 95 : RegInst(RegInst), MemInst(MemInst) {} 96 97 void print(raw_ostream &OS) const { 98 OS.indent(2); 99 OS << "{X86::" << RegInst->TheDef->getName() << ", "; 100 OS << "X86::" << MemInst->TheDef->getName() << ", "; 101 102 std::string Attrs; 103 if (FoldLoad) 104 Attrs += "TB_FOLDED_LOAD|"; 105 if (FoldStore) 106 Attrs += "TB_FOLDED_STORE|"; 107 if (NoReverse) 108 Attrs += "TB_NO_REVERSE|"; 109 if (NoForward) 110 Attrs += "TB_NO_FORWARD|"; 111 if (Alignment != Align(1)) 112 Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|"; 113 switch (BroadcastKind) { 114 case BCAST_NONE: 115 break; 116 case BCAST_W: 117 Attrs += "TB_BCAST_W|"; 118 break; 119 case BCAST_D: 120 Attrs += "TB_BCAST_D|"; 121 break; 122 case BCAST_Q: 123 Attrs += "TB_BCAST_Q|"; 124 break; 125 case BCAST_SS: 126 Attrs += "TB_BCAST_SS|"; 127 break; 128 case BCAST_SD: 129 Attrs += "TB_BCAST_SD|"; 130 break; 131 case BCAST_SH: 132 Attrs += "TB_BCAST_SH|"; 133 break; 134 } 135 136 StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|"); 137 if (SimplifiedAttrs.empty()) 138 SimplifiedAttrs = "0"; 139 140 OS << SimplifiedAttrs << "},\n"; 141 } 142 143 #ifndef NDEBUG 144 // Check that Uses and Defs are same after memory fold. 145 void checkCorrectness() const { 146 auto &RegInstRec = *RegInst->TheDef; 147 auto &MemInstRec = *MemInst->TheDef; 148 auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses"); 149 auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses"); 150 auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs"); 151 auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs"); 152 if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem) 153 report_fatal_error("Uses/Defs couldn't be changed after folding " + 154 RegInstRec.getName() + " to " + 155 MemInstRec.getName()); 156 } 157 #endif 158 }; 159 160 // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the 161 // enum of the instruction, which is computed in 162 // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator 163 // here. 164 // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum? 165 struct CompareInstrsByEnum { 166 bool operator()(const CodeGenInstruction *LHS, 167 const CodeGenInstruction *RHS) const { 168 assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); 169 const auto &D1 = *LHS->TheDef; 170 const auto &D2 = *RHS->TheDef; 171 return std::tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < 172 std::tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); 173 } 174 }; 175 176 typedef std::map<const CodeGenInstruction *, X86FoldTableEntry, 177 CompareInstrsByEnum> 178 FoldTable; 179 // Table2Addr - Holds instructions which their memory form performs 180 // load+store. 181 // 182 // Table#i - Holds instructions which the their memory form 183 // performs a load OR a store, and their #i'th operand is folded. 184 // 185 // BroadcastTable#i - Holds instructions which the their memory form performs 186 // a broadcast load and their #i'th operand is folded. 187 FoldTable Table2Addr; 188 FoldTable Table0; 189 FoldTable Table1; 190 FoldTable Table2; 191 FoldTable Table3; 192 FoldTable Table4; 193 FoldTable BroadcastTable1; 194 FoldTable BroadcastTable2; 195 FoldTable BroadcastTable3; 196 FoldTable BroadcastTable4; 197 198 public: 199 X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} 200 201 // run - Generate the 6 X86 memory fold tables. 202 void run(raw_ostream &OS); 203 204 private: 205 // Decides to which table to add the entry with the given instructions. 206 // S sets the strategy of adding the TB_NO_REVERSE flag. 207 void updateTables(const CodeGenInstruction *RegInst, 208 const CodeGenInstruction *MemInst, uint16_t S = 0, 209 bool IsManual = false, bool IsBroadcast = false); 210 211 // Generates X86FoldTableEntry with the given instructions and fill it with 212 // the appropriate flags, then adds it to a memory fold table. 213 void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst, 214 const CodeGenInstruction *MemInst, uint16_t S, 215 unsigned FoldedIdx, bool IsManual); 216 // Generates X86FoldTableEntry with the given instructions and adds it to a 217 // broadcast table. 218 void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst, 219 const CodeGenInstruction *MemInst); 220 221 // Print the given table as a static const C++ array of type 222 // X86FoldTableEntry. 223 void printTable(const FoldTable &Table, StringRef TableName, 224 raw_ostream &OS) { 225 OS << "static const X86FoldTableEntry " << TableName << "[] = {\n"; 226 227 for (auto &E : Table) 228 E.second.print(OS); 229 230 OS << "};\n\n"; 231 } 232 }; 233 234 // Return true if one of the instruction's operands is a RST register class 235 static bool hasRSTRegClass(const CodeGenInstruction *Inst) { 236 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 237 return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi"; 238 }); 239 } 240 241 // Return true if one of the instruction's operands is a ptr_rc_tailcall 242 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) { 243 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 244 return OpIn.Rec->getName() == "ptr_rc_tailcall"; 245 }); 246 } 247 248 static uint8_t byteFromBitsInit(const BitsInit *B) { 249 unsigned N = B->getNumBits(); 250 assert(N <= 8 && "Field is too large for uint8_t!"); 251 252 uint8_t Value = 0; 253 for (unsigned I = 0; I != N; ++I) { 254 BitInit *Bit = cast<BitInit>(B->getBit(I)); 255 Value |= Bit->getValue() << I; 256 } 257 return Value; 258 } 259 260 static bool mayFoldFromForm(uint8_t Form) { 261 switch (Form) { 262 default: 263 return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r; 264 case X86Local::MRMXr: 265 case X86Local::MRMXrCC: 266 case X86Local::MRMDestReg: 267 case X86Local::MRMSrcReg: 268 case X86Local::MRMSrcReg4VOp3: 269 case X86Local::MRMSrcRegOp4: 270 case X86Local::MRMSrcRegCC: 271 return true; 272 } 273 } 274 275 static bool mayFoldToForm(uint8_t Form) { 276 switch (Form) { 277 default: 278 return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m; 279 case X86Local::MRMXm: 280 case X86Local::MRMXmCC: 281 case X86Local::MRMDestMem: 282 case X86Local::MRMSrcMem: 283 case X86Local::MRMSrcMem4VOp3: 284 case X86Local::MRMSrcMemOp4: 285 case X86Local::MRMSrcMemCC: 286 return true; 287 } 288 } 289 290 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) { 291 switch (LHS) { 292 default: 293 llvm_unreachable("Unexpected Form!"); 294 case X86Local::MRM0r: 295 return RHS == X86Local::MRM0m; 296 case X86Local::MRM1r: 297 return RHS == X86Local::MRM1m; 298 case X86Local::MRM2r: 299 return RHS == X86Local::MRM2m; 300 case X86Local::MRM3r: 301 return RHS == X86Local::MRM3m; 302 case X86Local::MRM4r: 303 return RHS == X86Local::MRM4m; 304 case X86Local::MRM5r: 305 return RHS == X86Local::MRM5m; 306 case X86Local::MRM6r: 307 return RHS == X86Local::MRM6m; 308 case X86Local::MRM7r: 309 return RHS == X86Local::MRM7m; 310 case X86Local::MRMXr: 311 return RHS == X86Local::MRMXm; 312 case X86Local::MRMXrCC: 313 return RHS == X86Local::MRMXmCC; 314 case X86Local::MRMDestReg: 315 return RHS == X86Local::MRMDestMem; 316 case X86Local::MRMSrcReg: 317 return RHS == X86Local::MRMSrcMem; 318 case X86Local::MRMSrcReg4VOp3: 319 return RHS == X86Local::MRMSrcMem4VOp3; 320 case X86Local::MRMSrcRegOp4: 321 return RHS == X86Local::MRMSrcMemOp4; 322 case X86Local::MRMSrcRegCC: 323 return RHS == X86Local::MRMSrcMemCC; 324 } 325 } 326 327 static bool isNOREXRegClass(const Record *Op) { 328 return Op->getName().contains("_NOREX"); 329 } 330 331 // Function object - Operator() returns true if the given Reg instruction 332 // matches the Mem instruction of this object. 333 class IsMatch { 334 const CodeGenInstruction *MemInst; 335 const X86Disassembler::RecognizableInstrBase MemRI; 336 bool IsBroadcast; 337 const unsigned Variant; 338 339 public: 340 IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V) 341 : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {} 342 343 bool operator()(const CodeGenInstruction *RegInst) { 344 X86Disassembler::RecognizableInstrBase RegRI(*RegInst); 345 const Record *RegRec = RegInst->TheDef; 346 const Record *MemRec = MemInst->TheDef; 347 348 // EVEX_B means different things for memory and register forms. 349 // register form: rounding control or SAE 350 // memory form: broadcast 351 if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) 352 return false; 353 // EVEX_B indicates NDD for MAP4 instructions 354 if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) && 355 RegRI.OpMap != X86Local::T_MAP4) 356 return false; 357 358 if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) 359 return false; 360 361 // X86 encoding is crazy, e.g 362 // 363 // f3 0f c7 30 vmxon (%rax) 364 // f3 0f c7 f0 senduipi %rax 365 // 366 // This two instruction have similiar encoding fields but are unrelated 367 if (X86Disassembler::getMnemonic(MemInst, Variant) != 368 X86Disassembler::getMnemonic(RegInst, Variant)) 369 return false; 370 371 // Return false if any of the following fields of does not match. 372 if (std::tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, RegRI.OpMap, 373 RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V, 374 RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW, 375 RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, RegRI.HasEVEX_L2, 376 RegRI.HasEVEX_NF, RegRec->getValueAsBit("hasEVEX_RC"), 377 RegRec->getValueAsBit("hasLockPrefix"), 378 RegRec->getValueAsBit("hasNoTrackPrefix")) != 379 std::tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap, 380 MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V, 381 MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW, 382 MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, MemRI.HasEVEX_L2, 383 MemRI.HasEVEX_NF, MemRec->getValueAsBit("hasEVEX_RC"), 384 MemRec->getValueAsBit("hasLockPrefix"), 385 MemRec->getValueAsBit("hasNoTrackPrefix"))) 386 return false; 387 388 // Make sure the sizes of the operands of both instructions suit each other. 389 // This is needed for instructions with intrinsic version (_Int). 390 // Where the only difference is the size of the operands. 391 // For example: VUCOMISDZrm and VUCOMISDrm_Int 392 // Also for instructions that their EVEX version was upgraded to work with 393 // k-registers. For example VPCMPEQBrm (xmm output register) and 394 // VPCMPEQBZ128rm (k register output register). 395 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 396 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 397 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 398 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 399 400 // Instructions with one output in their memory form use the memory folded 401 // operand as source and destination (Read-Modify-Write). 402 unsigned RegStartIdx = 403 (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0; 404 405 bool FoundFoldedOp = false; 406 for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) { 407 Record *MemOpRec = MemInst->Operands[I].Rec; 408 Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec; 409 410 if (MemOpRec == RegOpRec) 411 continue; 412 413 if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) && 414 ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) || 415 (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec)))) 416 return false; 417 418 if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) && 419 (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))) 420 return false; 421 422 if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) && 423 (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))) 424 return false; 425 426 // Only one operand can be folded. 427 if (FoundFoldedOp) 428 return false; 429 430 assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)); 431 FoundFoldedOp = true; 432 } 433 434 return FoundFoldedOp; 435 } 436 }; 437 438 } // end anonymous namespace 439 440 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, 441 const CodeGenInstruction *RegInst, 442 const CodeGenInstruction *MemInst, 443 uint16_t S, unsigned FoldedIdx, 444 bool IsManual) { 445 446 assert((IsManual || Table.find(RegInst) == Table.end()) && 447 "Override entry unexpectedly"); 448 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 449 Record *RegRec = RegInst->TheDef; 450 Result.NoReverse = S & TB_NO_REVERSE; 451 Result.NoForward = S & TB_NO_FORWARD; 452 Result.FoldLoad = S & TB_FOLDED_LOAD; 453 Result.FoldStore = S & TB_FOLDED_STORE; 454 Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)); 455 if (IsManual) { 456 Table[RegInst] = Result; 457 return; 458 } 459 460 Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec; 461 Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec; 462 463 // Unfolding code generates a load/store instruction according to the size of 464 // the register in the register form instruction. 465 // If the register's size is greater than the memory's operand size, do not 466 // allow unfolding. 467 468 // the unfolded load size will be based on the register size. If that’s bigger 469 // than the memory operand size, the unfolded load will load more memory and 470 // potentially cause a memory fault. 471 if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) 472 Result.NoReverse = true; 473 474 // Check no-kz version's isMoveReg 475 StringRef RegInstName = RegRec->getName(); 476 unsigned DropLen = 477 RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0); 478 Record *BaseDef = 479 DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr; 480 bool IsMoveReg = 481 BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg; 482 // A masked load can not be unfolded to a full load, otherwise it would access 483 // unexpected memory. A simple store can not be unfolded. 484 if (IsMoveReg && (BaseDef || Result.FoldStore)) 485 Result.NoReverse = true; 486 487 uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); 488 if (isExplicitAlign(RegInst)) { 489 // The instruction require explicitly aligned memory. 490 BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize"); 491 Result.Alignment = Align(byteFromBitsInit(VectSize)); 492 } else if (!Enc && !isExplicitUnalign(RegInst) && 493 getMemOperandSize(MemOpRec) > 64) { 494 // Instructions with XOP/VEX/EVEX encoding do not require alignment while 495 // SSE packed vector instructions require a 16 byte alignment. 496 Result.Alignment = Align(16); 497 } 498 // Expand is only ever created as a masked instruction. It is not safe to 499 // unfold a masked expand because we don't know if it came from an expand load 500 // intrinsic or folding a plain load. If it is from a expand load intrinsic, 501 // Unfolding to plain load would read more elements and could trigger a fault. 502 if (RegRec->getName().contains("EXPAND")) 503 Result.NoReverse = true; 504 505 Table[RegInst] = Result; 506 } 507 508 void X86FoldTablesEmitter::addBroadcastEntry( 509 FoldTable &Table, const CodeGenInstruction *RegInst, 510 const CodeGenInstruction *MemInst) { 511 512 assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly"); 513 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 514 515 DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList"); 516 for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) { 517 Result.BroadcastKind = 518 StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString()) 519 .Case("i16mem", X86FoldTableEntry::BCAST_W) 520 .Case("i32mem", X86FoldTableEntry::BCAST_D) 521 .Case("i64mem", X86FoldTableEntry::BCAST_Q) 522 .Case("f16mem", X86FoldTableEntry::BCAST_SH) 523 .Case("f32mem", X86FoldTableEntry::BCAST_SS) 524 .Case("f64mem", X86FoldTableEntry::BCAST_SD) 525 .Default(X86FoldTableEntry::BCAST_NONE); 526 if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE) 527 break; 528 } 529 assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE && 530 "Unknown memory operand for broadcast"); 531 532 Table[RegInst] = Result; 533 } 534 535 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst, 536 const CodeGenInstruction *MemInst, 537 uint16_t S, bool IsManual, 538 bool IsBroadcast) { 539 540 Record *RegRec = RegInst->TheDef; 541 Record *MemRec = MemInst->TheDef; 542 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 543 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 544 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 545 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 546 547 // Instructions which Read-Modify-Write should be added to Table2Addr. 548 if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) { 549 assert(!IsBroadcast && "Read-Modify-Write can not be broadcast"); 550 // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE. 551 addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0, 552 IsManual); 553 return; 554 } 555 556 // Only table0 entries should explicitly specify a load or store flag. 557 // If the instruction writes to the folded operand, it will appear as 558 // an output in the register form instruction and as an input in the 559 // memory form instruction. If the instruction reads from the folded 560 // operand, it will appear as in input in both forms. 561 if (MemInSize == RegInSize && MemOutSize == RegOutSize) { 562 // Load-Folding cases. 563 // If the i'th register form operand is a register and the i'th memory form 564 // operand is a memory operand, add instructions to Table#i. 565 for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) { 566 Record *RegOpRec = RegInst->Operands[I].Rec; 567 Record *MemOpRec = MemInst->Operands[I].Rec; 568 // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, 569 // TAILJMPr64_REX 570 if ((isRegisterOperand(RegOpRec) || 571 RegOpRec->isSubClassOf("PointerLikeRegClass")) && 572 isMemoryOperand(MemOpRec)) { 573 switch (I) { 574 case 0: 575 assert(!IsBroadcast && "BroadcastTable0 needs to be added"); 576 addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_LOAD, 0, 577 IsManual); 578 return; 579 case 1: 580 IsBroadcast 581 ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst) 582 : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual); 583 return; 584 case 2: 585 IsBroadcast 586 ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst) 587 : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual); 588 return; 589 case 3: 590 IsBroadcast 591 ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst) 592 : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual); 593 return; 594 case 4: 595 IsBroadcast 596 ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst) 597 : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual); 598 return; 599 } 600 } 601 } 602 } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) { 603 // Store-Folding cases. 604 // If the memory form instruction performs a store, the *output* 605 // register of the register form instructions disappear and instead a 606 // memory *input* operand appears in the memory form instruction. 607 // For example: 608 // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src) 609 // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src) 610 Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec; 611 Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec; 612 if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) && 613 getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) { 614 assert(!IsBroadcast && "Store can not be broadcast"); 615 addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_STORE, 0, 616 IsManual); 617 } 618 } 619 } 620 621 void X86FoldTablesEmitter::run(raw_ostream &OS) { 622 // Holds all memory instructions 623 std::vector<const CodeGenInstruction *> MemInsts; 624 // Holds all register instructions - divided according to opcode. 625 std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts; 626 627 ArrayRef<const CodeGenInstruction *> NumberedInstructions = 628 Target.getInstructionsByEnumValue(); 629 630 for (const CodeGenInstruction *Inst : NumberedInstructions) { 631 const Record *Rec = Inst->TheDef; 632 if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly")) 633 continue; 634 635 if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) 636 continue; 637 638 // Promoted legacy instruction is in EVEX space, and has REX2-encoding 639 // alternative. It's added due to HW design and never emitted by compiler. 640 if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == 641 X86Local::T_MAP4 && 642 byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == 643 X86Local::ExplicitEVEX) 644 continue; 645 646 // - Instructions including RST register class operands are not relevant 647 // for memory folding (for further details check the explanation in 648 // lib/Target/X86/X86InstrFPStack.td file). 649 // - Some instructions (listed in the manual map above) use the register 650 // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure 651 // safe mapping of these instruction we manually map them and exclude 652 // them from the automation. 653 if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst)) 654 continue; 655 656 // Add all the memory form instructions to MemInsts, and all the register 657 // form instructions to RegInsts[Opc], where Opc is the opcode of each 658 // instructions. this helps reducing the runtime of the backend. 659 const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits"); 660 uint8_t Form = byteFromBitsInit(FormBits); 661 if (mayFoldToForm(Form)) 662 MemInsts.push_back(Inst); 663 else if (mayFoldFromForm(Form)) { 664 uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode")); 665 RegInsts[Opc].push_back(Inst); 666 } 667 } 668 669 // Create a copy b/c the register instruction will removed when a new entry is 670 // added into memory fold tables. 671 auto RegInstsForBroadcast = RegInsts; 672 673 Record *AsmWriter = Target.getAsmWriter(); 674 unsigned Variant = AsmWriter->getValueAsInt("Variant"); 675 auto FixUp = [&](const CodeGenInstruction *RegInst) { 676 StringRef RegInstName = RegInst->TheDef->getName(); 677 if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt")) 678 if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4))) 679 RegInst = &Target.getInstruction(RegAltRec); 680 return RegInst; 681 }; 682 // For each memory form instruction, try to find its register form 683 // instruction. 684 for (const CodeGenInstruction *MemInst : MemInsts) { 685 uint8_t Opc = 686 byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode")); 687 688 auto RegInstsIt = RegInsts.find(Opc); 689 if (RegInstsIt == RegInsts.end()) 690 continue; 691 692 // Two forms (memory & register) of the same instruction must have the same 693 // opcode. 694 std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second; 695 696 // Memory fold tables 697 auto Match = 698 find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant)); 699 if (Match != OpcRegInsts.end()) { 700 updateTables(FixUp(*Match), MemInst); 701 OpcRegInsts.erase(Match); 702 } 703 704 // Broadcast tables 705 StringRef MemInstName = MemInst->TheDef->getName(); 706 if (!MemInstName.contains("mb") && !MemInstName.contains("mib")) 707 continue; 708 RegInstsIt = RegInstsForBroadcast.find(Opc); 709 assert(RegInstsIt != RegInstsForBroadcast.end() && 710 "Unexpected control flow"); 711 std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast = 712 RegInstsIt->second; 713 Match = find_if(OpcRegInstsForBroadcast, 714 IsMatch(MemInst, /*IsBroadcast=*/true, Variant)); 715 if (Match != OpcRegInstsForBroadcast.end()) { 716 updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false, 717 /*IsBroadcast=*/true); 718 OpcRegInstsForBroadcast.erase(Match); 719 } 720 } 721 722 // Add the manually mapped instructions listed above. 723 for (const ManualMapEntry &Entry : ManualMapSet) { 724 Record *RegInstIter = Records.getDef(Entry.RegInstStr); 725 Record *MemInstIter = Records.getDef(Entry.MemInstStr); 726 727 updateTables(&(Target.getInstruction(RegInstIter)), 728 &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); 729 } 730 731 #ifndef NDEBUG 732 auto CheckMemFoldTable = [](const FoldTable &Table) -> void { 733 for (const auto &Record : Table) { 734 auto &FoldEntry = Record.second; 735 FoldEntry.checkCorrectness(); 736 } 737 }; 738 CheckMemFoldTable(Table2Addr); 739 CheckMemFoldTable(Table0); 740 CheckMemFoldTable(Table1); 741 CheckMemFoldTable(Table2); 742 CheckMemFoldTable(Table3); 743 CheckMemFoldTable(Table4); 744 CheckMemFoldTable(BroadcastTable1); 745 CheckMemFoldTable(BroadcastTable2); 746 CheckMemFoldTable(BroadcastTable3); 747 CheckMemFoldTable(BroadcastTable4); 748 #endif 749 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS); 750 // Print all tables. 751 PRINT_TABLE(Table2Addr) 752 PRINT_TABLE(Table0) 753 PRINT_TABLE(Table1) 754 PRINT_TABLE(Table2) 755 PRINT_TABLE(Table3) 756 PRINT_TABLE(Table4) 757 PRINT_TABLE(BroadcastTable1) 758 PRINT_TABLE(BroadcastTable2) 759 PRINT_TABLE(BroadcastTable3) 760 PRINT_TABLE(BroadcastTable4) 761 } 762 763 static TableGen::Emitter::OptClass<X86FoldTablesEmitter> 764 X("gen-x86-fold-tables", "Generate X86 fold tables"); 765