1 //===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This tablegen backend is responsible for emitting the memory fold tables of
10 // the X86 backend instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "Common/CodeGenInstruction.h"
15 #include "Common/CodeGenTarget.h"
16 #include "X86RecognizableInstr.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/Support/X86FoldTablesUtils.h"
19 #include "llvm/TableGen/Record.h"
20 #include "llvm/TableGen/TableGenBackend.h"
21 #include <set>
22 
23 using namespace llvm;
24 using namespace X86Disassembler;
25 
26 namespace {
27 // Represents an entry in the manual mapped instructions set.
28 struct ManualMapEntry {
29   const char *RegInstStr;
30   const char *MemInstStr;
31   uint16_t Strategy;
32 };
33 
34 // List of instructions requiring explicitly aligned memory.
35 const char *ExplicitAlign[] = {"MOVDQA",  "MOVAPS",  "MOVAPD",  "MOVNTPS",
36                                "MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
37 
38 // List of instructions NOT requiring explicit memory alignment.
39 const char *ExplicitUnalign[] = {"MOVDQU",    "MOVUPS",    "MOVUPD",
40                                  "PCMPESTRM", "PCMPESTRI", "PCMPISTRM",
41                                  "PCMPISTRI"};
42 
43 const ManualMapEntry ManualMapSet[] = {
44 #define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
45 #include "X86ManualFoldTables.def"
46 };
47 
48 const std::set<StringRef> NoFoldSet = {
49 #define NOFOLD(INSN) #INSN,
50 #include "X86ManualFoldTables.def"
51 };
52 
isExplicitAlign(const CodeGenInstruction * Inst)53 static bool isExplicitAlign(const CodeGenInstruction *Inst) {
54   return any_of(ExplicitAlign, [Inst](const char *InstStr) {
55     return Inst->TheDef->getName().contains(InstStr);
56   });
57 }
58 
isExplicitUnalign(const CodeGenInstruction * Inst)59 static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
60   return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
61     return Inst->TheDef->getName().contains(InstStr);
62   });
63 }
64 
65 class X86FoldTablesEmitter {
66   RecordKeeper &Records;
67   CodeGenTarget Target;
68 
69   // Represents an entry in the folding table
70   class X86FoldTableEntry {
71     const CodeGenInstruction *RegInst;
72     const CodeGenInstruction *MemInst;
73 
74   public:
75     bool NoReverse = false;
76     bool NoForward = false;
77     bool FoldLoad = false;
78     bool FoldStore = false;
79     enum BcastType {
80       BCAST_NONE,
81       BCAST_W,
82       BCAST_D,
83       BCAST_Q,
84       BCAST_SS,
85       BCAST_SD,
86       BCAST_SH,
87     };
88     BcastType BroadcastKind = BCAST_NONE;
89 
90     Align Alignment;
91 
92     X86FoldTableEntry() = default;
X86FoldTableEntry(const CodeGenInstruction * RegInst,const CodeGenInstruction * MemInst)93     X86FoldTableEntry(const CodeGenInstruction *RegInst,
94                       const CodeGenInstruction *MemInst)
95         : RegInst(RegInst), MemInst(MemInst) {}
96 
print(raw_ostream & OS) const97     void print(raw_ostream &OS) const {
98       OS.indent(2);
99       OS << "{X86::" << RegInst->TheDef->getName() << ", ";
100       OS << "X86::" << MemInst->TheDef->getName() << ", ";
101 
102       std::string Attrs;
103       if (FoldLoad)
104         Attrs += "TB_FOLDED_LOAD|";
105       if (FoldStore)
106         Attrs += "TB_FOLDED_STORE|";
107       if (NoReverse)
108         Attrs += "TB_NO_REVERSE|";
109       if (NoForward)
110         Attrs += "TB_NO_FORWARD|";
111       if (Alignment != Align(1))
112         Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";
113       switch (BroadcastKind) {
114       case BCAST_NONE:
115         break;
116       case BCAST_W:
117         Attrs += "TB_BCAST_W|";
118         break;
119       case BCAST_D:
120         Attrs += "TB_BCAST_D|";
121         break;
122       case BCAST_Q:
123         Attrs += "TB_BCAST_Q|";
124         break;
125       case BCAST_SS:
126         Attrs += "TB_BCAST_SS|";
127         break;
128       case BCAST_SD:
129         Attrs += "TB_BCAST_SD|";
130         break;
131       case BCAST_SH:
132         Attrs += "TB_BCAST_SH|";
133         break;
134       }
135 
136       StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
137       if (SimplifiedAttrs.empty())
138         SimplifiedAttrs = "0";
139 
140       OS << SimplifiedAttrs << "},\n";
141     }
142 
143 #ifndef NDEBUG
144     // Check that Uses and Defs are same after memory fold.
checkCorrectness() const145     void checkCorrectness() const {
146       auto &RegInstRec = *RegInst->TheDef;
147       auto &MemInstRec = *MemInst->TheDef;
148       auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");
149       auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");
150       auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");
151       auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");
152       if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)
153         report_fatal_error("Uses/Defs couldn't be changed after folding " +
154                            RegInstRec.getName() + " to " +
155                            MemInstRec.getName());
156     }
157 #endif
158   };
159 
160   // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the
161   // enum of the instruction, which is computed in
162   // CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator
163   // here.
164   // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?
165   struct CompareInstrsByEnum {
operator ()__anon982d773b0111::X86FoldTablesEmitter::CompareInstrsByEnum166     bool operator()(const CodeGenInstruction *LHS,
167                     const CodeGenInstruction *RHS) const {
168       assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
169       const auto &D1 = *LHS->TheDef;
170       const auto &D2 = *RHS->TheDef;
171       return std::tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
172              std::tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
173     }
174   };
175 
176   typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
177                    CompareInstrsByEnum>
178       FoldTable;
179   // Table2Addr - Holds instructions which their memory form performs
180   //              load+store.
181   //
182   // Table#i - Holds instructions which the their memory form
183   //           performs a load OR a store, and their #i'th operand is folded.
184   //
185   // BroadcastTable#i - Holds instructions which the their memory form performs
186   //                    a broadcast load and their #i'th operand is folded.
187   FoldTable Table2Addr;
188   FoldTable Table0;
189   FoldTable Table1;
190   FoldTable Table2;
191   FoldTable Table3;
192   FoldTable Table4;
193   FoldTable BroadcastTable1;
194   FoldTable BroadcastTable2;
195   FoldTable BroadcastTable3;
196   FoldTable BroadcastTable4;
197 
198 public:
X86FoldTablesEmitter(RecordKeeper & R)199   X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
200 
201   // run - Generate the 6 X86 memory fold tables.
202   void run(raw_ostream &OS);
203 
204 private:
205   // Decides to which table to add the entry with the given instructions.
206   // S sets the strategy of adding the TB_NO_REVERSE flag.
207   void updateTables(const CodeGenInstruction *RegInst,
208                     const CodeGenInstruction *MemInst, uint16_t S = 0,
209                     bool IsManual = false, bool IsBroadcast = false);
210 
211   // Generates X86FoldTableEntry with the given instructions and fill it with
212   // the appropriate flags, then adds it to a memory fold table.
213   void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,
214                          const CodeGenInstruction *MemInst, uint16_t S,
215                          unsigned FoldedIdx, bool IsManual);
216   // Generates X86FoldTableEntry with the given instructions and adds it to a
217   // broadcast table.
218   void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,
219                          const CodeGenInstruction *MemInst);
220 
221   // Print the given table as a static const C++ array of type
222   // X86FoldTableEntry.
printTable(const FoldTable & Table,StringRef TableName,raw_ostream & OS)223   void printTable(const FoldTable &Table, StringRef TableName,
224                   raw_ostream &OS) {
225     OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";
226 
227     for (auto &E : Table)
228       E.second.print(OS);
229 
230     OS << "};\n\n";
231   }
232 };
233 
234 // Return true if one of the instruction's operands is a RST register class
hasRSTRegClass(const CodeGenInstruction * Inst)235 static bool hasRSTRegClass(const CodeGenInstruction *Inst) {
236   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
237     return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";
238   });
239 }
240 
241 // Return true if one of the instruction's operands is a ptr_rc_tailcall
hasPtrTailcallRegClass(const CodeGenInstruction * Inst)242 static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
243   return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {
244     return OpIn.Rec->getName() == "ptr_rc_tailcall";
245   });
246 }
247 
byteFromBitsInit(const BitsInit * B)248 static uint8_t byteFromBitsInit(const BitsInit *B) {
249   unsigned N = B->getNumBits();
250   assert(N <= 8 && "Field is too large for uint8_t!");
251 
252   uint8_t Value = 0;
253   for (unsigned I = 0; I != N; ++I) {
254     BitInit *Bit = cast<BitInit>(B->getBit(I));
255     Value |= Bit->getValue() << I;
256   }
257   return Value;
258 }
259 
mayFoldFromForm(uint8_t Form)260 static bool mayFoldFromForm(uint8_t Form) {
261   switch (Form) {
262   default:
263     return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;
264   case X86Local::MRMXr:
265   case X86Local::MRMXrCC:
266   case X86Local::MRMDestReg:
267   case X86Local::MRMSrcReg:
268   case X86Local::MRMSrcReg4VOp3:
269   case X86Local::MRMSrcRegOp4:
270   case X86Local::MRMSrcRegCC:
271     return true;
272   }
273 }
274 
mayFoldToForm(uint8_t Form)275 static bool mayFoldToForm(uint8_t Form) {
276   switch (Form) {
277   default:
278     return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;
279   case X86Local::MRMXm:
280   case X86Local::MRMXmCC:
281   case X86Local::MRMDestMem:
282   case X86Local::MRMSrcMem:
283   case X86Local::MRMSrcMem4VOp3:
284   case X86Local::MRMSrcMemOp4:
285   case X86Local::MRMSrcMemCC:
286     return true;
287   }
288 }
289 
mayFoldFromLeftToRight(uint8_t LHS,uint8_t RHS)290 static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {
291   switch (LHS) {
292   default:
293     llvm_unreachable("Unexpected Form!");
294   case X86Local::MRM0r:
295     return RHS == X86Local::MRM0m;
296   case X86Local::MRM1r:
297     return RHS == X86Local::MRM1m;
298   case X86Local::MRM2r:
299     return RHS == X86Local::MRM2m;
300   case X86Local::MRM3r:
301     return RHS == X86Local::MRM3m;
302   case X86Local::MRM4r:
303     return RHS == X86Local::MRM4m;
304   case X86Local::MRM5r:
305     return RHS == X86Local::MRM5m;
306   case X86Local::MRM6r:
307     return RHS == X86Local::MRM6m;
308   case X86Local::MRM7r:
309     return RHS == X86Local::MRM7m;
310   case X86Local::MRMXr:
311     return RHS == X86Local::MRMXm;
312   case X86Local::MRMXrCC:
313     return RHS == X86Local::MRMXmCC;
314   case X86Local::MRMDestReg:
315     return RHS == X86Local::MRMDestMem;
316   case X86Local::MRMSrcReg:
317     return RHS == X86Local::MRMSrcMem;
318   case X86Local::MRMSrcReg4VOp3:
319     return RHS == X86Local::MRMSrcMem4VOp3;
320   case X86Local::MRMSrcRegOp4:
321     return RHS == X86Local::MRMSrcMemOp4;
322   case X86Local::MRMSrcRegCC:
323     return RHS == X86Local::MRMSrcMemCC;
324   }
325 }
326 
isNOREXRegClass(const Record * Op)327 static bool isNOREXRegClass(const Record *Op) {
328   return Op->getName().contains("_NOREX");
329 }
330 
331 // Function object - Operator() returns true if the given Reg instruction
332 // matches the Mem instruction of this object.
333 class IsMatch {
334   const CodeGenInstruction *MemInst;
335   const X86Disassembler::RecognizableInstrBase MemRI;
336   bool IsBroadcast;
337   const unsigned Variant;
338 
339 public:
IsMatch(const CodeGenInstruction * Inst,bool IsBroadcast,unsigned V)340   IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)
341       : MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}
342 
operator ()(const CodeGenInstruction * RegInst)343   bool operator()(const CodeGenInstruction *RegInst) {
344     X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
345     const Record *RegRec = RegInst->TheDef;
346     const Record *MemRec = MemInst->TheDef;
347 
348     // EVEX_B means different things for memory and register forms.
349     // register form: rounding control or SAE
350     // memory form: broadcast
351     if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))
352       return false;
353     // EVEX_B indicates NDD for MAP4 instructions
354     if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&
355         RegRI.OpMap != X86Local::T_MAP4)
356       return false;
357 
358     if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
359       return false;
360 
361     // X86 encoding is crazy, e.g
362     //
363     // f3 0f c7 30       vmxon   (%rax)
364     // f3 0f c7 f0       senduipi        %rax
365     //
366     // This two instruction have similiar encoding fields but are unrelated
367     if (X86Disassembler::getMnemonic(MemInst, Variant) !=
368         X86Disassembler::getMnemonic(RegInst, Variant))
369       return false;
370 
371     // Return false if any of the following fields of does not match.
372     if (std::tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, RegRI.OpMap,
373                    RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V,
374                    RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW,
375                    RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, RegRI.HasEVEX_L2,
376                    RegRI.HasEVEX_NF, RegRec->getValueAsBit("hasEVEX_RC"),
377                    RegRec->getValueAsBit("hasLockPrefix"),
378                    RegRec->getValueAsBit("hasNoTrackPrefix")) !=
379         std::tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap,
380                    MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V,
381                    MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW,
382                    MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, MemRI.HasEVEX_L2,
383                    MemRI.HasEVEX_NF, MemRec->getValueAsBit("hasEVEX_RC"),
384                    MemRec->getValueAsBit("hasLockPrefix"),
385                    MemRec->getValueAsBit("hasNoTrackPrefix")))
386       return false;
387 
388     // Make sure the sizes of the operands of both instructions suit each other.
389     // This is needed for instructions with intrinsic version (_Int).
390     // Where the only difference is the size of the operands.
391     // For example: VUCOMISDZrm and VUCOMISDrm_Int
392     // Also for instructions that their EVEX version was upgraded to work with
393     // k-registers. For example VPCMPEQBrm (xmm output register) and
394     // VPCMPEQBZ128rm (k register output register).
395     unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
396     unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
397     unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
398     unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
399 
400     // Instructions with one output in their memory form use the memory folded
401     // operand as source and destination (Read-Modify-Write).
402     unsigned RegStartIdx =
403         (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
404 
405     bool FoundFoldedOp = false;
406     for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {
407       Record *MemOpRec = MemInst->Operands[I].Rec;
408       Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;
409 
410       if (MemOpRec == RegOpRec)
411         continue;
412 
413       if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) &&
414           ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) ||
415            (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))))
416         return false;
417 
418       if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) &&
419           (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)))
420         return false;
421 
422       if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) &&
423           (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")))
424         return false;
425 
426       // Only one operand can be folded.
427       if (FoundFoldedOp)
428         return false;
429 
430       assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
431       FoundFoldedOp = true;
432     }
433 
434     return FoundFoldedOp;
435   }
436 };
437 
438 } // end anonymous namespace
439 
addEntryWithFlags(FoldTable & Table,const CodeGenInstruction * RegInst,const CodeGenInstruction * MemInst,uint16_t S,unsigned FoldedIdx,bool IsManual)440 void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
441                                              const CodeGenInstruction *RegInst,
442                                              const CodeGenInstruction *MemInst,
443                                              uint16_t S, unsigned FoldedIdx,
444                                              bool IsManual) {
445 
446   assert((IsManual || Table.find(RegInst) == Table.end()) &&
447          "Override entry unexpectedly");
448   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
449   Record *RegRec = RegInst->TheDef;
450   Result.NoReverse = S & TB_NO_REVERSE;
451   Result.NoForward = S & TB_NO_FORWARD;
452   Result.FoldLoad = S & TB_FOLDED_LOAD;
453   Result.FoldStore = S & TB_FOLDED_STORE;
454   Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));
455   if (IsManual) {
456     Table[RegInst] = Result;
457     return;
458   }
459 
460   Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec;
461   Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec;
462 
463   // Unfolding code generates a load/store instruction according to the size of
464   // the register in the register form instruction.
465   // If the register's size is greater than the memory's operand size, do not
466   // allow unfolding.
467 
468   // the unfolded load size will be based on the register size. If that’s bigger
469   // than the memory operand size, the unfolded load will load more memory and
470   // potentially cause a memory fault.
471   if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
472     Result.NoReverse = true;
473 
474   // Check no-kz version's isMoveReg
475   StringRef RegInstName = RegRec->getName();
476   unsigned DropLen =
477       RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0);
478   Record *BaseDef =
479       DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr;
480   bool IsMoveReg =
481       BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg;
482   // A masked load can not be unfolded to a full load, otherwise it would access
483   // unexpected memory. A simple store can not be unfolded.
484   if (IsMoveReg && (BaseDef || Result.FoldStore))
485     Result.NoReverse = true;
486 
487   uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
488   if (isExplicitAlign(RegInst)) {
489     // The instruction require explicitly aligned memory.
490     BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
491     Result.Alignment = Align(byteFromBitsInit(VectSize));
492   } else if (!Enc && !isExplicitUnalign(RegInst) &&
493              getMemOperandSize(MemOpRec) > 64) {
494     // Instructions with XOP/VEX/EVEX encoding do not require alignment while
495     // SSE packed vector instructions require a 16 byte alignment.
496     Result.Alignment = Align(16);
497   }
498   // Expand is only ever created as a masked instruction. It is not safe to
499   // unfold a masked expand because we don't know if it came from an expand load
500   // intrinsic or folding a plain load. If it is from a expand load intrinsic,
501   // Unfolding to plain load would read more elements and could trigger a fault.
502   if (RegRec->getName().contains("EXPAND"))
503     Result.NoReverse = true;
504 
505   Table[RegInst] = Result;
506 }
507 
addBroadcastEntry(FoldTable & Table,const CodeGenInstruction * RegInst,const CodeGenInstruction * MemInst)508 void X86FoldTablesEmitter::addBroadcastEntry(
509     FoldTable &Table, const CodeGenInstruction *RegInst,
510     const CodeGenInstruction *MemInst) {
511 
512   assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");
513   X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);
514 
515   DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList");
516   for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {
517     Result.BroadcastKind =
518         StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())
519             .Case("i16mem", X86FoldTableEntry::BCAST_W)
520             .Case("i32mem", X86FoldTableEntry::BCAST_D)
521             .Case("i64mem", X86FoldTableEntry::BCAST_Q)
522             .Case("f16mem", X86FoldTableEntry::BCAST_SH)
523             .Case("f32mem", X86FoldTableEntry::BCAST_SS)
524             .Case("f64mem", X86FoldTableEntry::BCAST_SD)
525             .Default(X86FoldTableEntry::BCAST_NONE);
526     if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)
527       break;
528   }
529   assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&
530          "Unknown memory operand for broadcast");
531 
532   Table[RegInst] = Result;
533 }
534 
updateTables(const CodeGenInstruction * RegInst,const CodeGenInstruction * MemInst,uint16_t S,bool IsManual,bool IsBroadcast)535 void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,
536                                         const CodeGenInstruction *MemInst,
537                                         uint16_t S, bool IsManual,
538                                         bool IsBroadcast) {
539 
540   Record *RegRec = RegInst->TheDef;
541   Record *MemRec = MemInst->TheDef;
542   unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
543   unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
544   unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
545   unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
546 
547   // Instructions which Read-Modify-Write should be added to Table2Addr.
548   if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
549     assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");
550     // X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.
551     addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0,
552                       IsManual);
553     return;
554   }
555 
556   // Only table0 entries should explicitly specify a load or store flag.
557   // If the instruction writes to the folded operand, it will appear as
558   // an output in the register form instruction and as an input in the
559   // memory form instruction. If the instruction reads from the folded
560   // operand, it will appear as in input in both forms.
561   if (MemInSize == RegInSize && MemOutSize == RegOutSize) {
562     // Load-Folding cases.
563     // If the i'th register form operand is a register and the i'th memory form
564     // operand is a memory operand, add instructions to Table#i.
565     for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) {
566       Record *RegOpRec = RegInst->Operands[I].Rec;
567       Record *MemOpRec = MemInst->Operands[I].Rec;
568       // PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64,
569       // TAILJMPr64_REX
570       if ((isRegisterOperand(RegOpRec) ||
571            RegOpRec->isSubClassOf("PointerLikeRegClass")) &&
572           isMemoryOperand(MemOpRec)) {
573         switch (I) {
574         case 0:
575           assert(!IsBroadcast && "BroadcastTable0 needs to be added");
576           addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_LOAD, 0,
577                             IsManual);
578           return;
579         case 1:
580           IsBroadcast
581               ? addBroadcastEntry(BroadcastTable1, RegInst, MemInst)
582               : addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);
583           return;
584         case 2:
585           IsBroadcast
586               ? addBroadcastEntry(BroadcastTable2, RegInst, MemInst)
587               : addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);
588           return;
589         case 3:
590           IsBroadcast
591               ? addBroadcastEntry(BroadcastTable3, RegInst, MemInst)
592               : addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);
593           return;
594         case 4:
595           IsBroadcast
596               ? addBroadcastEntry(BroadcastTable4, RegInst, MemInst)
597               : addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);
598           return;
599         }
600       }
601     }
602   } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {
603     // Store-Folding cases.
604     // If the memory form instruction performs a store, the *output*
605     // register of the register form instructions disappear and instead a
606     // memory *input* operand appears in the memory form instruction.
607     // For example:
608     //   MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)
609     //   MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)
610     Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;
611     Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;
612     if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
613         getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) {
614       assert(!IsBroadcast && "Store can not be broadcast");
615       addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_STORE, 0,
616                         IsManual);
617     }
618   }
619 }
620 
run(raw_ostream & OS)621 void X86FoldTablesEmitter::run(raw_ostream &OS) {
622   // Holds all memory instructions
623   std::vector<const CodeGenInstruction *> MemInsts;
624   // Holds all register instructions - divided according to opcode.
625   std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;
626 
627   ArrayRef<const CodeGenInstruction *> NumberedInstructions =
628       Target.getInstructionsByEnumValue();
629 
630   for (const CodeGenInstruction *Inst : NumberedInstructions) {
631     const Record *Rec = Inst->TheDef;
632     if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))
633       continue;
634 
635     if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())
636       continue;
637 
638     // Promoted legacy instruction is in EVEX space, and has REX2-encoding
639     // alternative. It's added due to HW design and never emitted by compiler.
640     if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) ==
641             X86Local::T_MAP4 &&
642         byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) ==
643             X86Local::ExplicitEVEX)
644       continue;
645 
646     // - Instructions including RST register class operands are not relevant
647     //   for memory folding (for further details check the explanation in
648     //   lib/Target/X86/X86InstrFPStack.td file).
649     // - Some instructions (listed in the manual map above) use the register
650     //   class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
651     //   safe mapping of these instruction we manually map them and exclude
652     //   them from the automation.
653     if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
654       continue;
655 
656     // Add all the memory form instructions to MemInsts, and all the register
657     // form instructions to RegInsts[Opc], where Opc is the opcode of each
658     // instructions. this helps reducing the runtime of the backend.
659     const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits");
660     uint8_t Form = byteFromBitsInit(FormBits);
661     if (mayFoldToForm(Form))
662       MemInsts.push_back(Inst);
663     else if (mayFoldFromForm(Form)) {
664       uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
665       RegInsts[Opc].push_back(Inst);
666     }
667   }
668 
669   // Create a copy b/c the register instruction will removed when a new entry is
670   // added into memory fold tables.
671   auto RegInstsForBroadcast = RegInsts;
672 
673   Record *AsmWriter = Target.getAsmWriter();
674   unsigned Variant = AsmWriter->getValueAsInt("Variant");
675   auto FixUp = [&](const CodeGenInstruction *RegInst) {
676     StringRef RegInstName = RegInst->TheDef->getName();
677     if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))
678       if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))
679         RegInst = &Target.getInstruction(RegAltRec);
680     return RegInst;
681   };
682   // For each memory form instruction, try to find its register form
683   // instruction.
684   for (const CodeGenInstruction *MemInst : MemInsts) {
685     uint8_t Opc =
686         byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
687 
688     auto RegInstsIt = RegInsts.find(Opc);
689     if (RegInstsIt == RegInsts.end())
690       continue;
691 
692     // Two forms (memory & register) of the same instruction must have the same
693     // opcode.
694     std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;
695 
696     // Memory fold tables
697     auto Match =
698         find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant));
699     if (Match != OpcRegInsts.end()) {
700       updateTables(FixUp(*Match), MemInst);
701       OpcRegInsts.erase(Match);
702     }
703 
704     // Broadcast tables
705     StringRef MemInstName = MemInst->TheDef->getName();
706     if (!MemInstName.contains("mb") && !MemInstName.contains("mib"))
707       continue;
708     RegInstsIt = RegInstsForBroadcast.find(Opc);
709     assert(RegInstsIt != RegInstsForBroadcast.end() &&
710            "Unexpected control flow");
711     std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =
712         RegInstsIt->second;
713     Match = find_if(OpcRegInstsForBroadcast,
714                     IsMatch(MemInst, /*IsBroadcast=*/true, Variant));
715     if (Match != OpcRegInstsForBroadcast.end()) {
716       updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false,
717                    /*IsBroadcast=*/true);
718       OpcRegInstsForBroadcast.erase(Match);
719     }
720   }
721 
722   // Add the manually mapped instructions listed above.
723   for (const ManualMapEntry &Entry : ManualMapSet) {
724     Record *RegInstIter = Records.getDef(Entry.RegInstStr);
725     Record *MemInstIter = Records.getDef(Entry.MemInstStr);
726 
727     updateTables(&(Target.getInstruction(RegInstIter)),
728                  &(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
729   }
730 
731 #ifndef NDEBUG
732   auto CheckMemFoldTable = [](const FoldTable &Table) -> void {
733     for (const auto &Record : Table) {
734       auto &FoldEntry = Record.second;
735       FoldEntry.checkCorrectness();
736     }
737   };
738   CheckMemFoldTable(Table2Addr);
739   CheckMemFoldTable(Table0);
740   CheckMemFoldTable(Table1);
741   CheckMemFoldTable(Table2);
742   CheckMemFoldTable(Table3);
743   CheckMemFoldTable(Table4);
744   CheckMemFoldTable(BroadcastTable1);
745   CheckMemFoldTable(BroadcastTable2);
746   CheckMemFoldTable(BroadcastTable3);
747   CheckMemFoldTable(BroadcastTable4);
748 #endif
749 #define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);
750   // Print all tables.
751   PRINT_TABLE(Table2Addr)
752   PRINT_TABLE(Table0)
753   PRINT_TABLE(Table1)
754   PRINT_TABLE(Table2)
755   PRINT_TABLE(Table3)
756   PRINT_TABLE(Table4)
757   PRINT_TABLE(BroadcastTable1)
758   PRINT_TABLE(BroadcastTable2)
759   PRINT_TABLE(BroadcastTable3)
760   PRINT_TABLE(BroadcastTable4)
761 }
762 
763 static TableGen::Emitter::OptClass<X86FoldTablesEmitter>
764     X("gen-x86-fold-tables", "Generate X86 fold tables");
765