xref: /freebsd/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend is responsible for emitting the memory fold tables of
10 // the X86 backend instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenInstruction.h"
15 #include "CodeGenTarget.h"
16 #include "X86RecognizableInstr.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Support/FormattedStream.h"
19 #include "llvm/Support/X86FoldTablesUtils.h"
20 #include "llvm/TableGen/Record.h"
21 #include "llvm/TableGen/TableGenBackend.h"
22 #include <set>
23 
24 using namespace llvm;
25 using namespace X86Disassembler;
26 
27 namespace {
28 // Represents an entry in the manual mapped instructions set.
29 struct ManualMapEntry {
30   const char *RegInstStr;
31   const char *MemInstStr;
32   uint16_t Strategy;
33 };
34 
35 // List of instructions requiring explicitly aligned memory.
36 const char *ExplicitAlign[] = {
37     "MOVDQA",    "MOVAPS",     "MOVAPD",     "MOVNTPS",    "MOVNTPD",
38     "MOVNTDQ",   "MOVNTDQA",   "SHA1MSG1",   "SHA1MSG2",   "SHA1NEXTE",
39     "SHA1RNDS4", "SHA256MSG1", "SHA256MSG2", "SHA256RNDS2"};
40 
41 // List of instructions NOT requiring explicit memory alignment.
42 const char *ExplicitUnalign[] = {"MOVDQU",    "MOVUPS",    "MOVUPD",
43                                  "PCMPESTRM", "PCMPESTRI", "PCMPISTRM",
44                                  "PCMPISTRI"};
45 
46 const ManualMapEntry ManualMapSet[] = {
47 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
48 #include "X86ManualFoldTables.def"
49 };
50 
51 const std::set<StringRef> NoFoldSet = {
52 #define NOFOLD(INSN) #INSN,
53 #include "X86ManualFoldTables.def"
54 };
55 
56 static bool isExplicitAlign(const CodeGenInstruction *Inst) {
57   return any_of(ExplicitAlign, [Inst](const char *InstStr) {
58     return Inst->TheDef->getName().contains(InstStr);
59   });
60 }
61 
62 static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
63   return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
64     return Inst->TheDef->getName().contains(InstStr);
65   });
66 }
67 
68 class X86FoldTablesEmitter {
69   RecordKeeper &Records;
70   CodeGenTarget Target;
71 
72   // Represents an entry in the folding table
73   class X86FoldTableEntry {
74     const CodeGenInstruction *RegInst;
75     const CodeGenInstruction *MemInst;
76 
77   public:
78     bool NoReverse = false;
79     bool NoForward = false;
80     bool FoldLoad = false;
81     bool FoldStore = false;
82     enum BcastType {
83       BCAST_NONE,
84       BCAST_W,
85       BCAST_D,
86       BCAST_Q,
87       BCAST_SS,
88       BCAST_SD,
89       BCAST_SH,
90     };
91     BcastType BroadcastKind = BCAST_NONE;
92 
93     Align Alignment;
94 
95     X86FoldTableEntry() = default;
96     X86FoldTableEntry(const CodeGenInstruction *RegInst,
97                       const CodeGenInstruction *MemInst)
98         : RegInst(RegInst), MemInst(MemInst) {}
99 
100     void print(formatted_raw_ostream &OS) const {
101       OS.indent(2);
102       OS << "{X86::" << RegInst->TheDef->getName() << ", ";
103       OS << "X86::" << MemInst->TheDef->getName() << ", ";
104 
105       std::string Attrs;
106       if (FoldLoad)
107         Attrs += "TB_FOLDED_LOAD|";
108       if (FoldStore)
109         Attrs += "TB_FOLDED_STORE|";
110       if (NoReverse)
111         Attrs += "TB_NO_REVERSE|";
112       if (NoForward)
113         Attrs += "TB_NO_FORWARD|";
114       if (Alignment != Align(1))
115         Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";
116       switch (BroadcastKind) {
117       case BCAST_NONE:
118         break;
119       case BCAST_W:
120         Attrs += "TB_BCAST_W|";
121         break;
122       case BCAST_D:
123         Attrs += "TB_BCAST_D|";
124         break;
125       case BCAST_Q:
126         Attrs += "TB_BCAST_Q|";
127         break;
128       case BCAST_SS:
129         Attrs += "TB_BCAST_SS|";
130         break;
131       case BCAST_SD:
132         Attrs += "TB_BCAST_SD|";
133         break;
134       case BCAST_SH:
135         Attrs += "TB_BCAST_SH|";
136         break;
137       }
138 
139       StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
140       if (SimplifiedAttrs.empty())
141         SimplifiedAttrs = "0";
142 
143       OS << SimplifiedAttrs << "},\n";
144     }
145 
146 #ifndef NDEBUG
147     // Check that Uses and Defs are same after memory fold.
148     void checkCorrectness() const {
149       auto &RegInstRec = *RegInst->TheDef;
150       auto &MemInstRec = *MemInst->TheDef;
151       auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");
152       auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");
153       auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");
154       auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");
155       if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)
156         report_fatal_error("Uses/Defs couldn't be changed after folding " +
157                            RegInstRec.getName() + " to " +
158                            MemInstRec.getName());
159     }
160 #endif
161   };
162 
163   // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the
164   // enum of the instruction, which is computed in
165   // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator
166   // here.
167   // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?
168   struct CompareInstrsByEnum {
169     bool operator()(const CodeGenInstruction *LHS,
170                     const CodeGenInstruction *RHS) const {
171       assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
172       const auto &D1 = *LHS->TheDef;
173       const auto &D2 = *RHS->TheDef;
174       return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
175              std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
176     }
177   };
178 
179   typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
180                    CompareInstrsByEnum>
181       FoldTable;
182   // Table2Addr - Holds instructions which their memory form performs
183   //              load+store.
184   //
185   // Table#i - Holds instructions which the their memory form
186   //           performs a load OR a store, and their #i'th operand is folded.
187   //
188   // BroadcastTable#i - Holds instructions which the their memory form performs
189   //                    a broadcast load and their #i'th operand is folded.
190   FoldTable Table2Addr;
191   FoldTable Table0;
192   FoldTable Table1;
193   FoldTable Table2;
194   FoldTable Table3;
195   FoldTable Table4;
196   FoldTable BroadcastTable1;
197   FoldTable BroadcastTable2;
198   FoldTable BroadcastTable3;
199   FoldTable BroadcastTable4;
200 
201 public:
202   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
203 
204   // run - Generate the 6 X86 memory fold tables.
205   void run(raw_ostream &OS);
206 
207 private:
208   // Decides to which table to add the entry with the given instructions.
209   // S sets the strategy of adding the TB_NO_REVERSE flag.
210   void updateTables(const CodeGenInstruction *RegInst,
211                     const CodeGenInstruction *MemInst, uint16_t S = 0,
212                     bool IsManual = false, bool IsBroadcast = false);
213 
214   // Generates X86FoldTableEntry with the given instructions and fill it with
215   // the appropriate flags, then adds it to a memory fold table.
216   void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,
217                          const CodeGenInstruction *MemInst, uint16_t S,
218                          unsigned FoldedIdx, bool IsManual);
219   // Generates X86FoldTableEntry with the given instructions and adds it to a
220   // broadcast table.
221   void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,
222                          const CodeGenInstruction *MemInst);
223 
224   // Print the given table as a static const C++ array of type
225   // X86FoldTableEntry.
226   void printTable(const FoldTable &Table, StringRef TableName,
227                   formatted_raw_ostream &OS) {
228     OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";
229 
230     for (auto &E : Table)
231       E.second.print(OS);
232 
233     OS << "};\n\n";
234   }
235 };
236 
237 // Return true if one of the instruction's operands is a RST register class
238 static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
239   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
240     return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
241   });
242 }
243 
244 // Return true if one of the instruction's operands is a ptr_rc_tailcall
245 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
246   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
247     return OpIn.Rec->getName() == "ptr_rc_tailcall";
248   });
249 }
250 
251 static uint8_t byteFromBitsInit(const BitsInit *B) {
252   unsigned N = B->getNumBits();
253   assert(N <= 8 && "Field is too large for uint8_t!");
254 
255   uint8_t Value = 0;
256   for (unsigned I = 0; I != N; ++I) {
257     BitInit *Bit = cast<BitInit>(B->getBit(I));
258     Value |= Bit->getValue() << I;
259   }
260   return Value;
261 }
262 
263 static bool mayFoldFromForm(uint8_t Form) {
264   switch (Form) {
265   default:
266     return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;
267   case X86Local::MRMXr:
268   case X86Local::MRMXrCC:
269   case X86Local::MRMDestReg:
270   case X86Local::MRMSrcReg:
271   case X86Local::MRMSrcReg4VOp3:
272   case X86Local::MRMSrcRegOp4:
273   case X86Local::MRMSrcRegCC:
274     return true;
275   }
276 }
277 
278 static bool mayFoldToForm(uint8_t Form) {
279   switch (Form) {
280   default:
281     return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;
282   case X86Local::MRMXm:
283   case X86Local::MRMXmCC:
284   case X86Local::MRMDestMem:
285   case X86Local::MRMSrcMem:
286   case X86Local::MRMSrcMem4VOp3:
287   case X86Local::MRMSrcMemOp4:
288   case X86Local::MRMSrcMemCC:
289     return true;
290   }
291 }
292 
293 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {
294   switch (LHS) {
295   default:
296     llvm_unreachable("Unexpected Form!");
297   case X86Local::MRM0r:
298     return RHS == X86Local::MRM0m;
299   case X86Local::MRM1r:
300     return RHS == X86Local::MRM1m;
301   case X86Local::MRM2r:
302     return RHS == X86Local::MRM2m;
303   case X86Local::MRM3r:
304     return RHS == X86Local::MRM3m;
305   case X86Local::MRM4r:
306     return RHS == X86Local::MRM4m;
307   case X86Local::MRM5r:
308     return RHS == X86Local::MRM5m;
309   case X86Local::MRM6r:
310     return RHS == X86Local::MRM6m;
311   case X86Local::MRM7r:
312     return RHS == X86Local::MRM7m;
313   case X86Local::MRMXr:
314     return RHS == X86Local::MRMXm;
315   case X86Local::MRMXrCC:
316     return RHS == X86Local::MRMXmCC;
317   case X86Local::MRMDestReg:
318     return RHS == X86Local::MRMDestMem;
319   case X86Local::MRMSrcReg:
320     return RHS == X86Local::MRMSrcMem;
321   case X86Local::MRMSrcReg4VOp3:
322     return RHS == X86Local::MRMSrcMem4VOp3;
323   case X86Local::MRMSrcRegOp4:
324     return RHS == X86Local::MRMSrcMemOp4;
325   case X86Local::MRMSrcRegCC:
326     return RHS == X86Local::MRMSrcMemCC;
327   }
328 }
329 
330 static bool isNOREXRegClass(const Record *Op) {
331   return Op->getName().contains("_NOREX");
332 }
333 
334 // Function object - Operator() returns true if the given Reg instruction
335 // matches the Mem instruction of this object.
336 class IsMatch {
337   const CodeGenInstruction *MemInst;
338   const X86Disassembler::RecognizableInstrBase MemRI;
339   bool IsBroadcast;
340   const unsigned Variant;
341 
342 public:
343   IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)
344       : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}
345 
346   bool operator()(const CodeGenInstruction *RegInst) {
347     X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
348     const Record *RegRec = RegInst->TheDef;
349     const Record *MemRec = MemInst->TheDef;
350 
351     // EVEX_B means different things for memory and register forms.
352     // register form: rounding control or SAE
353     // memory form: broadcast
354     if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
355       return false;
356     // EVEX_B indicates NDD for MAP4 instructions
357     if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&
358         RegRI.OpMap != X86Local::T_MAP4)
359       return false;
360 
361     if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
362       return false;
363 
364     // X86 encoding is crazy, e.g
365     //
366     // f3 0f c7 30       vmxon   (%rax)
367     // f3 0f c7 f0       senduipi        %rax
368     //
369     // This two instruction have similiar encoding fields but are unrelated
370     if (X86Disassembler::getMnemonic(MemInst, Variant) !=
371         X86Disassembler::getMnemonic(RegInst, Variant))
372       return false;
373 
374     // Return false if any of the following fields of does not match.
375     if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix,
376                         RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W,
377                         RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L,
378                         RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ,
379                         RegRI.HasEVEX_L2, RegRI.HasEVEX_NF,
380                         RegRec->getValueAsBit("hasEVEX_RC"),
381                         RegRec->getValueAsBit("hasLockPrefix"),
382                         RegRec->getValueAsBit("hasNoTrackPrefix")) !=
383         std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix,
384                         MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W,
385                         MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L,
386                         MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ,
387                         MemRI.HasEVEX_L2, MemRI.HasEVEX_NF,
388                         MemRec->getValueAsBit("hasEVEX_RC"),
389                         MemRec->getValueAsBit("hasLockPrefix"),
390                         MemRec->getValueAsBit("hasNoTrackPrefix")))
391       return false;
392 
393     // Make sure the sizes of the operands of both instructions suit each other.
394     // This is needed for instructions with intrinsic version (_Int).
395     // Where the only difference is the size of the operands.
396     // For example: VUCOMISDZrm and VUCOMISDrm_Int
397     // Also for instructions that their EVEX version was upgraded to work with
398     // k-registers. For example VPCMPEQBrm (xmm output register) and
399     // VPCMPEQBZ128rm (k register output register).
400     unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
401     unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
402     unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
403     unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
404 
405     // Instructions with one output in their memory form use the memory folded
406     // operand as source and destination (Read-Modify-Write).
407     unsigned RegStartIdx =
408         (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
409 
410     bool FoundFoldedOp = false;
411     for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {
412       Record *MemOpRec = MemInst->Operands[I].Rec;
413       Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;
414 
415       if (MemOpRec == RegOpRec)
416         continue;
417 
418       if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) &&
419           ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) ||
420            (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))))
421         return false;
422 
423       if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) &&
424           (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)))
425         return false;
426 
427       if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) &&
428           (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")))
429         return false;
430 
431       // Only one operand can be folded.
432       if (FoundFoldedOp)
433         return false;
434 
435       assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
436       FoundFoldedOp = true;
437     }
438 
439     return FoundFoldedOp;
440   }
441 };
442 
443 } // end anonymous namespace
444 
445 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
446                                              const CodeGenInstruction *RegInst,
447                                              const CodeGenInstruction *MemInst,
448                                              uint16_t S, unsigned FoldedIdx,
449                                              bool IsManual) {
450 
451   assert((IsManual || Table.find(RegInst) == Table.end()) &&
452          "Override entry unexpectedly");
453   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
454   Record *RegRec = RegInst->TheDef;
455   Record *MemRec = MemInst->TheDef;
456 
457   Result.NoReverse = S & TB_NO_REVERSE;
458   Result.NoForward = S & TB_NO_FORWARD;
459   Result.FoldLoad = S & TB_FOLDED_LOAD;
460   Result.FoldStore = S & TB_FOLDED_STORE;
461   Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));
462   if (IsManual) {
463     Table[RegInst] = Result;
464     return;
465   }
466 
467   // Only table0 entries should explicitly specify a load or store flag.
468   if (&Table == &Table0) {
469     unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
470     unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs();
471     // If the instruction writes to the folded operand, it will appear as an
472     // output in the register form instruction and as an input in the memory
473     // form instruction.
474     // If the instruction reads from the folded operand, it well appear as in
475     // input in both forms.
476     if (MemInOpsNum == RegInOpsNum)
477       Result.FoldLoad = true;
478     else
479       Result.FoldStore = true;
480   }
481 
482   Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec;
483   Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec;
484 
485   // Unfolding code generates a load/store instruction according to the size of
486   // the register in the register form instruction.
487   // If the register's size is greater than the memory's operand size, do not
488   // allow unfolding.
489 
490   // the unfolded load size will be based on the register size. If that’s bigger
491   // than the memory operand size, the unfolded load will load more memory and
492   // potentially cause a memory fault.
493   if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
494     Result.NoReverse = true;
495 
496   // Check no-kz version's isMoveReg
497   StringRef RegInstName = RegRec->getName();
498   unsigned DropLen =
499       RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0);
500   Record *BaseDef =
501       DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr;
502   bool IsMoveReg =
503       BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg;
504   // A masked load can not be unfolded to a full load, otherwise it would access
505   // unexpected memory. A simple store can not be unfolded.
506   if (IsMoveReg && (BaseDef || Result.FoldStore))
507     Result.NoReverse = true;
508 
509   uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
510   if (isExplicitAlign(RegInst)) {
511     // The instruction require explicitly aligned memory.
512     BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
513     Result.Alignment = Align(byteFromBitsInit(VectSize));
514   } else if (!Enc && !isExplicitUnalign(RegInst) &&
515              getMemOperandSize(MemOpRec) > 64) {
516     // Instructions with XOP/VEX/EVEX encoding do not require alignment while
517     // SSE packed vector instructions require a 16 byte alignment.
518     Result.Alignment = Align(16);
519   }
520   // Expand is only ever created as a masked instruction. It is not safe to
521   // unfold a masked expand because we don't know if it came from an expand load
522   // intrinsic or folding a plain load. If it is from a expand load intrinsic,
523   // Unfolding to plain load would read more elements and could trigger a fault.
524   if (RegRec->getName().contains("EXPAND"))
525     Result.NoReverse = true;
526 
527   Table[RegInst] = Result;
528 }
529 
530 void X86FoldTablesEmitter::addBroadcastEntry(
531     FoldTable &Table, const CodeGenInstruction *RegInst,
532     const CodeGenInstruction *MemInst) {
533 
534   assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
535   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
536 
537   DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList");
538   for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
539     Result.BroadcastKind =
540         StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())
541             .Case("i16mem", X86FoldTableEntry::BCAST_W)
542             .Case("i32mem", X86FoldTableEntry::BCAST_D)
543             .Case("i64mem", X86FoldTableEntry::BCAST_Q)
544             .Case("f16mem", X86FoldTableEntry::BCAST_SH)
545             .Case("f32mem", X86FoldTableEntry::BCAST_SS)
546             .Case("f64mem", X86FoldTableEntry::BCAST_SD)
547             .Default(X86FoldTableEntry::BCAST_NONE);
548     if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)
549       break;
550   }
551   assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&
552          "Unknown memory operand for broadcast");
553 
554   Table[RegInst] = Result;
555 }
556 
557 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
558                                         const CodeGenInstruction *MemInst,
559                                         uint16_t S, bool IsManual,
560                                         bool IsBroadcast) {
561 
562   Record *RegRec = RegInst->TheDef;
563   Record *MemRec = MemInst->TheDef;
564   unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
565   unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
566   unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
567   unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
568 
569   // Instructions which Read-Modify-Write should be added to Table2Addr.
570   if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
571     assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");
572     // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.
573     addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0,
574                       IsManual);
575     return;
576   }
577 
578   if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
579     // Load-Folding cases.
580     // If the i'th register form operand is a register and the i'th memory form
581     // operand is a memory operand, add instructions to Table#i.
582     for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) {
583       Record *RegOpRec = RegInst->Operands[I].Rec;
584       Record *MemOpRec = MemInst->Operands[I].Rec;
585       // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64,
586       // TAILJMPr64_REX
587       if ((isRegisterOperand(RegOpRec) ||
588            RegOpRec->isSubClassOf("PointerLikeRegClass")) &&
589           isMemoryOperand(MemOpRec)) {
590         switch (I) {
591         case 0:
592           assert(!IsBroadcast && "BroadcastTable0 needs to be added");
593           addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
594           return;
595         case 1:
596           IsBroadcast
597               ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst)
598               : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);
599           return;
600         case 2:
601           IsBroadcast
602               ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst)
603               : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);
604           return;
605         case 3:
606           IsBroadcast
607               ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst)
608               : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);
609           return;
610         case 4:
611           IsBroadcast
612               ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst)
613               : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);
614           return;
615         }
616       }
617     }
618   } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
619     // Store-Folding cases.
620     // If the memory form instruction performs a store, the *output*
621     // register of the register form instructions disappear and instead a
622     // memory *input* operand appears in the memory form instruction.
623     // For example:
624     //   MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
625     //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
626     Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;
627     Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;
628     if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
629         getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) {
630       assert(!IsBroadcast && "Store can not be broadcast");
631       addEntryWithFlags(Table0, RegInst, MemInst, S, 0, IsManual);
632     }
633   }
634 }
635 
636 void X86FoldTablesEmitter::run(raw_ostream &O) {
637   formatted_raw_ostream OS(O);
638 
639   // Holds all memory instructions
640   std::vector<const CodeGenInstruction *> MemInsts;
641   // Holds all register instructions - divided according to opcode.
642   std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
643 
644   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
645       Target.getInstructionsByEnumValue();
646 
647   for (const CodeGenInstruction *Inst : NumberedInstructions) {
648     const Record *Rec = Inst->TheDef;
649     if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))
650       continue;
651 
652     if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())
653       continue;
654 
655     // Promoted legacy instruction is in EVEX space, and has REX2-encoding
656     // alternative. It's added due to HW design and never emitted by compiler.
657     if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) ==
658             X86Local::T_MAP4 &&
659         byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) ==
660             X86Local::ExplicitEVEX)
661       continue;
662 
663     // - Instructions including RST register class operands are not relevant
664     //   for memory folding (for further details check the explanation in
665     //   lib/Target/X86/X86InstrFPStack.td file).
666     // - Some instructions (listed in the manual map above) use the register
667     //   class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
668     //   safe mapping of these instruction we manually map them and exclude
669     //   them from the automation.
670     if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
671       continue;
672 
673     // Add all the memory form instructions to MemInsts, and all the register
674     // form instructions to RegInsts[Opc], where Opc is the opcode of each
675     // instructions. this helps reducing the runtime of the backend.
676     const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits");
677     uint8_t Form = byteFromBitsInit(FormBits);
678     if (mayFoldToForm(Form))
679       MemInsts.push_back(Inst);
680     else if (mayFoldFromForm(Form)) {
681       uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
682       RegInsts[Opc].push_back(Inst);
683     }
684   }
685 
686   // Create a copy b/c the register instruction will removed when a new entry is
687   // added into memory fold tables.
688   auto RegInstsForBroadcast = RegInsts;
689 
690   Record *AsmWriter = Target.getAsmWriter();
691   unsigned Variant = AsmWriter->getValueAsInt("Variant");
692   auto FixUp = [&](const CodeGenInstruction *RegInst) {
693     StringRef RegInstName = RegInst->TheDef->getName();
694     if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))
695       if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))
696         RegInst = &Target.getInstruction(RegAltRec);
697     return RegInst;
698   };
699   // For each memory form instruction, try to find its register form
700   // instruction.
701   for (const CodeGenInstruction *MemInst : MemInsts) {
702     uint8_t Opc =
703         byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
704 
705     auto RegInstsIt = RegInsts.find(Opc);
706     if (RegInstsIt == RegInsts.end())
707       continue;
708 
709     // Two forms (memory & register) of the same instruction must have the same
710     // opcode.
711     std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
712 
713     // Memory fold tables
714     auto Match =
715         find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant));
716     if (Match != OpcRegInsts.end()) {
717       updateTables(FixUp(*Match), MemInst);
718       OpcRegInsts.erase(Match);
719     }
720 
721     // Broadcast tables
722     StringRef MemInstName = MemInst->TheDef->getName();
723     if (!MemInstName.contains("mb") && !MemInstName.contains("mib"))
724       continue;
725     RegInstsIt = RegInstsForBroadcast.find(Opc);
726     assert(RegInstsIt != RegInstsForBroadcast.end() &&
727            "Unexpected control flow");
728     std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =
729         RegInstsIt->second;
730     Match = find_if(OpcRegInstsForBroadcast,
731                     IsMatch(MemInst, /*IsBroadcast=*/true, Variant));
732     if (Match != OpcRegInstsForBroadcast.end()) {
733       updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false,
734                    /*IsBroadcast=*/true);
735       OpcRegInstsForBroadcast.erase(Match);
736     }
737   }
738 
739   // Add the manually mapped instructions listed above.
740   for (const ManualMapEntry &Entry : ManualMapSet) {
741     Record *RegInstIter = Records.getDef(Entry.RegInstStr);
742     Record *MemInstIter = Records.getDef(Entry.MemInstStr);
743 
744     updateTables(&(Target.getInstruction(RegInstIter)),
745                  &(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
746   }
747 
748 #ifndef NDEBUG
749   auto CheckMemFoldTable = [](const FoldTable &Table) -> void {
750     for (const auto &Record : Table) {
751       auto &FoldEntry = Record.second;
752       FoldEntry.checkCorrectness();
753     }
754   };
755   CheckMemFoldTable(Table2Addr);
756   CheckMemFoldTable(Table0);
757   CheckMemFoldTable(Table1);
758   CheckMemFoldTable(Table2);
759   CheckMemFoldTable(Table3);
760   CheckMemFoldTable(Table4);
761   CheckMemFoldTable(BroadcastTable1);
762   CheckMemFoldTable(BroadcastTable2);
763   CheckMemFoldTable(BroadcastTable3);
764   CheckMemFoldTable(BroadcastTable4);
765 #endif
766 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);
767   // Print all tables.
768   PRINT_TABLE(Table2Addr)
769   PRINT_TABLE(Table0)
770   PRINT_TABLE(Table1)
771   PRINT_TABLE(Table2)
772   PRINT_TABLE(Table3)
773   PRINT_TABLE(Table4)
774   PRINT_TABLE(BroadcastTable1)
775   PRINT_TABLE(BroadcastTable2)
776   PRINT_TABLE(BroadcastTable3)
777   PRINT_TABLE(BroadcastTable4)
778 }
779 
780 static TableGen::Emitter::OptClass<X86FoldTablesEmitter>
781     X("gen-x86-fold-tables", "Generate X86 fold tables");
782