1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend is responsible for emitting the memory fold tables of 10 // the X86 backend instructions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenInstruction.h" 15 #include "CodeGenTarget.h" 16 #include "X86RecognizableInstr.h" 17 #include "llvm/Support/FormattedStream.h" 18 #include "llvm/Support/X86FoldTablesUtils.h" 19 #include "llvm/TableGen/Record.h" 20 #include "llvm/TableGen/TableGenBackend.h" 21 #include <set> 22 23 using namespace llvm; 24 using namespace X86Disassembler; 25 26 namespace { 27 // Represents an entry in the manual mapped instructions set. 28 struct ManualMapEntry { 29 const char *RegInstStr; 30 const char *MemInstStr; 31 uint16_t Strategy; 32 }; 33 34 // List of instructions requiring explicitly aligned memory. 35 const char *ExplicitAlign[] = { 36 "MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", "MOVNTPD", 37 "MOVNTDQ", "MOVNTDQA", "SHA1MSG1", "SHA1MSG2", "SHA1NEXTE", 38 "SHA1RNDS4", "SHA256MSG1", "SHA256MSG2", "SHA256RNDS2"}; 39 40 // List of instructions NOT requiring explicit memory alignment. 41 const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD", 42 "PCMPESTRM", "PCMPESTRI", "PCMPISTRM", 43 "PCMPISTRI"}; 44 45 const ManualMapEntry ManualMapSet[] = { 46 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS}, 47 #include "X86ManualFoldTables.def" 48 }; 49 50 const std::set<StringRef> NoFoldSet = { 51 #define NOFOLD(INSN) #INSN, 52 #include "X86ManualFoldTables.def" 53 }; 54 55 static bool isExplicitAlign(const CodeGenInstruction *Inst) { 56 return any_of(ExplicitAlign, [Inst](const char *InstStr) { 57 return Inst->TheDef->getName().contains(InstStr); 58 }); 59 } 60 61 static bool isExplicitUnalign(const CodeGenInstruction *Inst) { 62 return any_of(ExplicitUnalign, [Inst](const char *InstStr) { 63 return Inst->TheDef->getName().contains(InstStr); 64 }); 65 } 66 67 class X86FoldTablesEmitter { 68 RecordKeeper &Records; 69 CodeGenTarget Target; 70 71 // Represents an entry in the folding table 72 class X86FoldTableEntry { 73 const CodeGenInstruction *RegInst; 74 const CodeGenInstruction *MemInst; 75 76 public: 77 bool NoReverse = false; 78 bool NoForward = false; 79 bool FoldLoad = false; 80 bool FoldStore = false; 81 enum BcastType { 82 BCAST_NONE, 83 BCAST_D, 84 BCAST_Q, 85 BCAST_SS, 86 BCAST_SD, 87 BCAST_SH, 88 }; 89 BcastType BroadcastKind = BCAST_NONE; 90 91 Align Alignment; 92 93 X86FoldTableEntry() = default; 94 X86FoldTableEntry(const CodeGenInstruction *RegInst, 95 const CodeGenInstruction *MemInst) 96 : RegInst(RegInst), MemInst(MemInst) {} 97 98 void print(formatted_raw_ostream &OS) const { 99 OS.indent(2); 100 OS << "{X86::" << RegInst->TheDef->getName() << ", "; 101 OS << "X86::" << MemInst->TheDef->getName() << ", "; 102 103 std::string Attrs; 104 if (FoldLoad) 105 Attrs += "TB_FOLDED_LOAD|"; 106 if (FoldStore) 107 Attrs += "TB_FOLDED_STORE|"; 108 if (NoReverse) 109 Attrs += "TB_NO_REVERSE|"; 110 if (NoForward) 111 Attrs += "TB_NO_FORWARD|"; 112 if (Alignment != Align(1)) 113 Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|"; 114 switch (BroadcastKind) { 115 case BCAST_NONE: 116 break; 117 case BCAST_D: 118 Attrs += "TB_BCAST_D|"; 119 break; 120 case BCAST_Q: 121 Attrs += "TB_BCAST_Q|"; 122 break; 123 case BCAST_SS: 124 Attrs += "TB_BCAST_SS|"; 125 break; 126 case BCAST_SD: 127 Attrs += "TB_BCAST_SD|"; 128 break; 129 case BCAST_SH: 130 Attrs += "TB_BCAST_SH|"; 131 break; 132 } 133 134 StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|"); 135 if (SimplifiedAttrs.empty()) 136 SimplifiedAttrs = "0"; 137 138 OS << SimplifiedAttrs << "},\n"; 139 } 140 141 #ifndef NDEBUG 142 // Check that Uses and Defs are same after memory fold. 143 void checkCorrectness() const { 144 auto &RegInstRec = *RegInst->TheDef; 145 auto &MemInstRec = *MemInst->TheDef; 146 auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses"); 147 auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses"); 148 auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs"); 149 auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs"); 150 if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem) 151 report_fatal_error("Uses/Defs couldn't be changed after folding " + 152 RegInstRec.getName() + " to " + 153 MemInstRec.getName()); 154 } 155 #endif 156 }; 157 158 // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the 159 // enum of the instruction, which is computed in 160 // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator 161 // here. 162 // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum? 163 struct CompareInstrsByEnum { 164 bool operator()(const CodeGenInstruction *LHS, 165 const CodeGenInstruction *RHS) const { 166 assert(LHS && RHS && "LHS and RHS shouldn't be nullptr"); 167 const auto &D1 = *LHS->TheDef; 168 const auto &D2 = *RHS->TheDef; 169 return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) < 170 std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName()); 171 } 172 }; 173 174 typedef std::map<const CodeGenInstruction *, X86FoldTableEntry, 175 CompareInstrsByEnum> 176 FoldTable; 177 // Table2Addr - Holds instructions which their memory form performs 178 // load+store. 179 // 180 // Table#i - Holds instructions which the their memory form 181 // performs a load OR a store, and their #i'th operand is folded. 182 // 183 // BroadcastTable#i - Holds instructions which the their memory form performs 184 // a broadcast load and their #i'th operand is folded. 185 FoldTable Table2Addr; 186 FoldTable Table0; 187 FoldTable Table1; 188 FoldTable Table2; 189 FoldTable Table3; 190 FoldTable Table4; 191 FoldTable BroadcastTable1; 192 FoldTable BroadcastTable2; 193 FoldTable BroadcastTable3; 194 FoldTable BroadcastTable4; 195 196 public: 197 X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} 198 199 // run - Generate the 6 X86 memory fold tables. 200 void run(raw_ostream &OS); 201 202 private: 203 // Decides to which table to add the entry with the given instructions. 204 // S sets the strategy of adding the TB_NO_REVERSE flag. 205 void updateTables(const CodeGenInstruction *RegInst, 206 const CodeGenInstruction *MemInst, uint16_t S = 0, 207 bool IsManual = false, bool IsBroadcast = false); 208 209 // Generates X86FoldTableEntry with the given instructions and fill it with 210 // the appropriate flags, then adds it to a memory fold table. 211 void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst, 212 const CodeGenInstruction *MemInst, uint16_t S, 213 unsigned FoldedIdx, bool IsManual); 214 // Generates X86FoldTableEntry with the given instructions and adds it to a 215 // broadcast table. 216 void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst, 217 const CodeGenInstruction *MemInst); 218 219 // Print the given table as a static const C++ array of type 220 // X86FoldTableEntry. 221 void printTable(const FoldTable &Table, StringRef TableName, 222 formatted_raw_ostream &OS) { 223 OS << "static const X86FoldTableEntry " << TableName << "[] = {\n"; 224 225 for (auto &E : Table) 226 E.second.print(OS); 227 228 OS << "};\n\n"; 229 } 230 }; 231 232 // Return true if one of the instruction's operands is a RST register class 233 static bool hasRSTRegClass(const CodeGenInstruction *Inst) { 234 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 235 return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi"; 236 }); 237 } 238 239 // Return true if one of the instruction's operands is a ptr_rc_tailcall 240 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) { 241 return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { 242 return OpIn.Rec->getName() == "ptr_rc_tailcall"; 243 }); 244 } 245 246 static uint8_t byteFromBitsInit(const BitsInit *B) { 247 unsigned N = B->getNumBits(); 248 assert(N <= 8 && "Field is too large for uint8_t!"); 249 250 uint8_t Value = 0; 251 for (unsigned I = 0; I != N; ++I) { 252 BitInit *Bit = cast<BitInit>(B->getBit(I)); 253 Value |= Bit->getValue() << I; 254 } 255 return Value; 256 } 257 258 static bool mayFoldFromForm(uint8_t Form) { 259 switch (Form) { 260 default: 261 return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r; 262 case X86Local::MRMXr: 263 case X86Local::MRMXrCC: 264 case X86Local::MRMDestReg: 265 case X86Local::MRMSrcReg: 266 case X86Local::MRMSrcReg4VOp3: 267 case X86Local::MRMSrcRegOp4: 268 case X86Local::MRMSrcRegCC: 269 return true; 270 } 271 } 272 273 static bool mayFoldToForm(uint8_t Form) { 274 switch (Form) { 275 default: 276 return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m; 277 case X86Local::MRMXm: 278 case X86Local::MRMXmCC: 279 case X86Local::MRMDestMem: 280 case X86Local::MRMSrcMem: 281 case X86Local::MRMSrcMem4VOp3: 282 case X86Local::MRMSrcMemOp4: 283 case X86Local::MRMSrcMemCC: 284 return true; 285 } 286 } 287 288 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) { 289 switch (LHS) { 290 default: 291 llvm_unreachable("Unexpected Form!"); 292 case X86Local::MRM0r: 293 return RHS == X86Local::MRM0m; 294 case X86Local::MRM1r: 295 return RHS == X86Local::MRM1m; 296 case X86Local::MRM2r: 297 return RHS == X86Local::MRM2m; 298 case X86Local::MRM3r: 299 return RHS == X86Local::MRM3m; 300 case X86Local::MRM4r: 301 return RHS == X86Local::MRM4m; 302 case X86Local::MRM5r: 303 return RHS == X86Local::MRM5m; 304 case X86Local::MRM6r: 305 return RHS == X86Local::MRM6m; 306 case X86Local::MRM7r: 307 return RHS == X86Local::MRM7m; 308 case X86Local::MRMXr: 309 return RHS == X86Local::MRMXm; 310 case X86Local::MRMXrCC: 311 return RHS == X86Local::MRMXmCC; 312 case X86Local::MRMDestReg: 313 return RHS == X86Local::MRMDestMem; 314 case X86Local::MRMSrcReg: 315 return RHS == X86Local::MRMSrcMem; 316 case X86Local::MRMSrcReg4VOp3: 317 return RHS == X86Local::MRMSrcMem4VOp3; 318 case X86Local::MRMSrcRegOp4: 319 return RHS == X86Local::MRMSrcMemOp4; 320 case X86Local::MRMSrcRegCC: 321 return RHS == X86Local::MRMSrcMemCC; 322 } 323 } 324 325 static bool isNOREXRegClass(const Record *Op) { 326 return Op->getName().contains("_NOREX"); 327 } 328 329 // Function object - Operator() returns true if the given Reg instruction 330 // matches the Mem instruction of this object. 331 class IsMatch { 332 const CodeGenInstruction *MemInst; 333 const X86Disassembler::RecognizableInstrBase MemRI; 334 bool IsBroadcast; 335 const unsigned Variant; 336 337 public: 338 IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V) 339 : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {} 340 341 bool operator()(const CodeGenInstruction *RegInst) { 342 X86Disassembler::RecognizableInstrBase RegRI(*RegInst); 343 const Record *RegRec = RegInst->TheDef; 344 const Record *MemRec = MemInst->TheDef; 345 346 // EVEX_B means different things for memory and register forms. 347 // register form: rounding control or SAE 348 // memory form: broadcast 349 if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B)) 350 return false; 351 // EVEX_B indicates NDD for MAP4 instructions 352 if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) && 353 RegRI.OpMap != X86Local::T_MAP4) 354 return false; 355 356 if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form)) 357 return false; 358 359 // X86 encoding is crazy, e.g 360 // 361 // f3 0f c7 30 vmxon (%rax) 362 // f3 0f c7 f0 senduipi %rax 363 // 364 // This two instruction have similiar encoding fields but are unrelated 365 if (X86Disassembler::getMnemonic(MemInst, Variant) != 366 X86Disassembler::getMnemonic(RegInst, Variant)) 367 return false; 368 369 // Return false if any of the following fields of does not match. 370 if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, 371 RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, 372 RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L, 373 RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, 374 RegRI.HasEVEX_L2, RegRI.HasEVEX_NF, 375 RegRec->getValueAsBit("hasEVEX_RC"), 376 RegRec->getValueAsBit("hasLockPrefix"), 377 RegRec->getValueAsBit("hasNoTrackPrefix")) != 378 std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, 379 MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, 380 MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L, 381 MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, 382 MemRI.HasEVEX_L2, MemRI.HasEVEX_NF, 383 MemRec->getValueAsBit("hasEVEX_RC"), 384 MemRec->getValueAsBit("hasLockPrefix"), 385 MemRec->getValueAsBit("hasNoTrackPrefix"))) 386 return false; 387 388 // Make sure the sizes of the operands of both instructions suit each other. 389 // This is needed for instructions with intrinsic version (_Int). 390 // Where the only difference is the size of the operands. 391 // For example: VUCOMISDZrm and VUCOMISDrm_Int 392 // Also for instructions that their EVEX version was upgraded to work with 393 // k-registers. For example VPCMPEQBrm (xmm output register) and 394 // VPCMPEQBZ128rm (k register output register). 395 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 396 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 397 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 398 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 399 400 // Instructions with one output in their memory form use the memory folded 401 // operand as source and destination (Read-Modify-Write). 402 unsigned RegStartIdx = 403 (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0; 404 405 bool FoundFoldedOp = false; 406 for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) { 407 Record *MemOpRec = MemInst->Operands[I].Rec; 408 Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec; 409 410 if (MemOpRec == RegOpRec) 411 continue; 412 413 if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) && 414 ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) || 415 (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec)))) 416 return false; 417 418 if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) && 419 (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))) 420 return false; 421 422 if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) && 423 (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))) 424 return false; 425 426 // Only one operand can be folded. 427 if (FoundFoldedOp) 428 return false; 429 430 assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)); 431 FoundFoldedOp = true; 432 } 433 434 return FoundFoldedOp; 435 } 436 }; 437 438 } // end anonymous namespace 439 440 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, 441 const CodeGenInstruction *RegInst, 442 const CodeGenInstruction *MemInst, 443 uint16_t S, unsigned FoldedIdx, 444 bool IsManual) { 445 446 assert((IsManual || Table.find(RegInst) == Table.end()) && 447 "Override entry unexpectedly"); 448 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 449 Record *RegRec = RegInst->TheDef; 450 Record *MemRec = MemInst->TheDef; 451 452 Result.NoReverse = S & TB_NO_REVERSE; 453 Result.NoForward = S & TB_NO_FORWARD; 454 Result.FoldLoad = S & TB_FOLDED_LOAD; 455 Result.FoldStore = S & TB_FOLDED_STORE; 456 Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)); 457 if (IsManual) { 458 Table[RegInst] = Result; 459 return; 460 } 461 462 // Only table0 entries should explicitly specify a load or store flag. 463 if (&Table == &Table0) { 464 unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 465 unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 466 // If the instruction writes to the folded operand, it will appear as an 467 // output in the register form instruction and as an input in the memory 468 // form instruction. 469 // If the instruction reads from the folded operand, it well appear as in 470 // input in both forms. 471 if (MemInOpsNum == RegInOpsNum) 472 Result.FoldLoad = true; 473 else 474 Result.FoldStore = true; 475 } 476 477 Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec; 478 Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec; 479 480 // Unfolding code generates a load/store instruction according to the size of 481 // the register in the register form instruction. 482 // If the register's size is greater than the memory's operand size, do not 483 // allow unfolding. 484 485 // the unfolded load size will be based on the register size. If that’s bigger 486 // than the memory operand size, the unfolded load will load more memory and 487 // potentially cause a memory fault. 488 if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) 489 Result.NoReverse = true; 490 491 // Check no-kz version's isMoveReg 492 StringRef RegInstName = RegRec->getName(); 493 unsigned DropLen = 494 RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0); 495 Record *BaseDef = 496 DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr; 497 bool IsMoveReg = 498 BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg; 499 // A masked load can not be unfolded to a full load, otherwise it would access 500 // unexpected memory. A simple store can not be unfolded. 501 if (IsMoveReg && (BaseDef || Result.FoldStore)) 502 Result.NoReverse = true; 503 504 uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); 505 if (isExplicitAlign(RegInst)) { 506 // The instruction require explicitly aligned memory. 507 BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize"); 508 Result.Alignment = Align(byteFromBitsInit(VectSize)); 509 } else if (!Enc && !isExplicitUnalign(RegInst) && 510 getMemOperandSize(MemOpRec) > 64) { 511 // Instructions with XOP/VEX/EVEX encoding do not require alignment while 512 // SSE packed vector instructions require a 16 byte alignment. 513 Result.Alignment = Align(16); 514 } 515 // Expand is only ever created as a masked instruction. It is not safe to 516 // unfold a masked expand because we don't know if it came from an expand load 517 // intrinsic or folding a plain load. If it is from a expand load intrinsic, 518 // Unfolding to plain load would read more elements and could trigger a fault. 519 if (RegRec->getName().contains("EXPAND")) 520 Result.NoReverse = true; 521 522 Table[RegInst] = Result; 523 } 524 525 void X86FoldTablesEmitter::addBroadcastEntry( 526 FoldTable &Table, const CodeGenInstruction *RegInst, 527 const CodeGenInstruction *MemInst) { 528 529 assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly"); 530 X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst); 531 532 Record *RegRec = RegInst->TheDef; 533 StringRef RegInstName = RegRec->getName(); 534 StringRef MemInstName = MemInst->TheDef->getName(); 535 Record *Domain = RegRec->getValueAsDef("ExeDomain"); 536 bool IsSSEPackedInt = Domain->getName() == "SSEPackedInt"; 537 if ((RegInstName.contains("DZ") || RegInstName.contains("DWZ") || 538 RegInstName.contains("Dr") || RegInstName.contains("I32")) && 539 IsSSEPackedInt) { 540 assert((MemInstName.contains("DZ") || RegInstName.contains("DWZ") || 541 MemInstName.contains("Dr") || MemInstName.contains("I32")) && 542 "Unmatched names for broadcast"); 543 Result.BroadcastKind = X86FoldTableEntry::BCAST_D; 544 } else if ((RegInstName.contains("QZ") || RegInstName.contains("QBZ") || 545 RegInstName.contains("Qr") || RegInstName.contains("I64")) && 546 IsSSEPackedInt) { 547 assert((MemInstName.contains("QZ") || MemInstName.contains("QBZ") || 548 MemInstName.contains("Qr") || MemInstName.contains("I64")) && 549 "Unmatched names for broadcast"); 550 Result.BroadcastKind = X86FoldTableEntry::BCAST_Q; 551 } else if ((RegInstName.contains("PS") || RegInstName.contains("F32") || 552 RegInstName.contains("CPH")) && 553 !RegInstName.contains("PH2PS")) { 554 assert((MemInstName.contains("PS") || MemInstName.contains("F32") || 555 MemInstName.contains("CPH")) && 556 "Unmatched names for broadcast"); 557 Result.BroadcastKind = X86FoldTableEntry::BCAST_SS; 558 } else if ((RegInstName.contains("PD") || RegInstName.contains("F64")) && 559 !RegInstName.contains("PH2PD")) { 560 assert((MemInstName.contains("PD") || MemInstName.contains("F64")) && 561 "Unmatched names for broadcast"); 562 Result.BroadcastKind = X86FoldTableEntry::BCAST_SD; 563 } else if (RegInstName.contains("PH")) { 564 assert(MemInstName.contains("PH") && "Unmatched names for broadcast"); 565 Result.BroadcastKind = X86FoldTableEntry::BCAST_SH; 566 } else { 567 errs() << RegInstName << ", " << MemInstName << "\n"; 568 llvm_unreachable("Name is not canoicalized for broadcast or " 569 "ExeDomain is incorrect"); 570 } 571 572 Table[RegInst] = Result; 573 } 574 575 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst, 576 const CodeGenInstruction *MemInst, 577 uint16_t S, bool IsManual, 578 bool IsBroadcast) { 579 580 Record *RegRec = RegInst->TheDef; 581 Record *MemRec = MemInst->TheDef; 582 unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); 583 unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); 584 unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); 585 unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); 586 587 // Instructions which Read-Modify-Write should be added to Table2Addr. 588 if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) { 589 assert(!IsBroadcast && "Read-Modify-Write can not be broadcast"); 590 // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE. 591 addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0, 592 IsManual); 593 return; 594 } 595 596 if (MemInSize == RegInSize && MemOutSize == RegOutSize) { 597 // Load-Folding cases. 598 // If the i'th register form operand is a register and the i'th memory form 599 // operand is a memory operand, add instructions to Table#i. 600 for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) { 601 Record *RegOpRec = RegInst->Operands[I].Rec; 602 Record *MemOpRec = MemInst->Operands[I].Rec; 603 // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64, 604 // TAILJMPr64_REX 605 if ((isRegisterOperand(RegOpRec) || 606 RegOpRec->isSubClassOf("PointerLikeRegClass")) && 607 isMemoryOperand(MemOpRec)) { 608 switch (I) { 609 case 0: 610 assert(!IsBroadcast && "BroadcastTable0 needs to be added"); 611 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 612 return; 613 case 1: 614 IsBroadcast 615 ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst) 616 : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual); 617 return; 618 case 2: 619 IsBroadcast 620 ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst) 621 : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual); 622 return; 623 case 3: 624 IsBroadcast 625 ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst) 626 : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual); 627 return; 628 case 4: 629 IsBroadcast 630 ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst) 631 : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual); 632 return; 633 } 634 } 635 } 636 } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) { 637 // Store-Folding cases. 638 // If the memory form instruction performs a store, the *output* 639 // register of the register form instructions disappear and instead a 640 // memory *input* operand appears in the memory form instruction. 641 // For example: 642 // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src) 643 // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src) 644 Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec; 645 Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec; 646 if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) && 647 getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) { 648 assert(!IsBroadcast && "Store can not be broadcast"); 649 addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual); 650 } 651 } 652 } 653 654 void X86FoldTablesEmitter::run(raw_ostream &O) { 655 formatted_raw_ostream OS(O); 656 657 // Holds all memory instructions 658 std::vector<const CodeGenInstruction *> MemInsts; 659 // Holds all register instructions - divided according to opcode. 660 std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts; 661 662 ArrayRef<const CodeGenInstruction *> NumberedInstructions = 663 Target.getInstructionsByEnumValue(); 664 665 for (const CodeGenInstruction *Inst : NumberedInstructions) { 666 const Record *Rec = Inst->TheDef; 667 if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly")) 668 continue; 669 670 if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end()) 671 continue; 672 673 // Promoted legacy instruction is in EVEX space, and has REX2-encoding 674 // alternative. It's added due to HW design and never emitted by compiler. 675 if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) == 676 X86Local::T_MAP4 && 677 byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) == 678 X86Local::ExplicitEVEX) 679 continue; 680 681 // - Instructions including RST register class operands are not relevant 682 // for memory folding (for further details check the explanation in 683 // lib/Target/X86/X86InstrFPStack.td file). 684 // - Some instructions (listed in the manual map above) use the register 685 // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure 686 // safe mapping of these instruction we manually map them and exclude 687 // them from the automation. 688 if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst)) 689 continue; 690 691 // Add all the memory form instructions to MemInsts, and all the register 692 // form instructions to RegInsts[Opc], where Opc is the opcode of each 693 // instructions. this helps reducing the runtime of the backend. 694 const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits"); 695 uint8_t Form = byteFromBitsInit(FormBits); 696 if (mayFoldToForm(Form)) 697 MemInsts.push_back(Inst); 698 else if (mayFoldFromForm(Form)) { 699 uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode")); 700 RegInsts[Opc].push_back(Inst); 701 } 702 } 703 704 // Create a copy b/c the register instruction will removed when a new entry is 705 // added into memory fold tables. 706 auto RegInstsForBroadcast = RegInsts; 707 708 Record *AsmWriter = Target.getAsmWriter(); 709 unsigned Variant = AsmWriter->getValueAsInt("Variant"); 710 auto FixUp = [&](const CodeGenInstruction *RegInst) { 711 StringRef RegInstName = RegInst->TheDef->getName(); 712 if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt")) 713 if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4))) 714 RegInst = &Target.getInstruction(RegAltRec); 715 return RegInst; 716 }; 717 // For each memory form instruction, try to find its register form 718 // instruction. 719 for (const CodeGenInstruction *MemInst : MemInsts) { 720 uint8_t Opc = 721 byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode")); 722 723 auto RegInstsIt = RegInsts.find(Opc); 724 if (RegInstsIt == RegInsts.end()) 725 continue; 726 727 // Two forms (memory & register) of the same instruction must have the same 728 // opcode. 729 std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second; 730 731 // Memory fold tables 732 auto Match = 733 find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant)); 734 if (Match != OpcRegInsts.end()) { 735 updateTables(FixUp(*Match), MemInst); 736 OpcRegInsts.erase(Match); 737 } 738 739 // Broadcast tables 740 StringRef MemInstName = MemInst->TheDef->getName(); 741 if (!MemInstName.contains("mb") && !MemInstName.contains("mib")) 742 continue; 743 RegInstsIt = RegInstsForBroadcast.find(Opc); 744 assert(RegInstsIt != RegInstsForBroadcast.end() && 745 "Unexpected control flow"); 746 std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast = 747 RegInstsIt->second; 748 Match = find_if(OpcRegInstsForBroadcast, 749 IsMatch(MemInst, /*IsBroadcast=*/true, Variant)); 750 if (Match != OpcRegInstsForBroadcast.end()) { 751 updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false, 752 /*IsBroadcast=*/true); 753 OpcRegInstsForBroadcast.erase(Match); 754 } 755 } 756 757 // Add the manually mapped instructions listed above. 758 for (const ManualMapEntry &Entry : ManualMapSet) { 759 Record *RegInstIter = Records.getDef(Entry.RegInstStr); 760 Record *MemInstIter = Records.getDef(Entry.MemInstStr); 761 762 updateTables(&(Target.getInstruction(RegInstIter)), 763 &(Target.getInstruction(MemInstIter)), Entry.Strategy, true); 764 } 765 766 #ifndef NDEBUG 767 auto CheckMemFoldTable = [](const FoldTable &Table) -> void { 768 for (const auto &Record : Table) { 769 auto &FoldEntry = Record.second; 770 FoldEntry.checkCorrectness(); 771 } 772 }; 773 CheckMemFoldTable(Table2Addr); 774 CheckMemFoldTable(Table0); 775 CheckMemFoldTable(Table1); 776 CheckMemFoldTable(Table2); 777 CheckMemFoldTable(Table3); 778 CheckMemFoldTable(Table4); 779 CheckMemFoldTable(BroadcastTable1); 780 CheckMemFoldTable(BroadcastTable2); 781 CheckMemFoldTable(BroadcastTable3); 782 CheckMemFoldTable(BroadcastTable4); 783 #endif 784 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS); 785 // Print all tables. 786 PRINT_TABLE(Table2Addr) 787 PRINT_TABLE(Table0) 788 PRINT_TABLE(Table1) 789 PRINT_TABLE(Table2) 790 PRINT_TABLE(Table3) 791 PRINT_TABLE(Table4) 792 PRINT_TABLE(BroadcastTable1) 793 PRINT_TABLE(BroadcastTable2) 794 PRINT_TABLE(BroadcastTable3) 795 PRINT_TABLE(BroadcastTable4) 796 } 797 798 static TableGen::Emitter::OptClass<X86FoldTablesEmitter> 799 X("gen-x86-fold-tables", "Generate X86 fold tables"); 800