xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp (revision fe75646a0234a261c0013bf1840fdac4acaf0cec)
1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to preconfig the shape of physical tile registers
10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11 /// walk each instruction of basic block in reverse order. All the tile
12 /// registers that live out the basic block would be spilled and reloaded
13 /// before its user. It also check the depenedency of the shape to ensure
14 /// the shape is defined before ldtilecfg.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "X86.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/DepthFirstIterator.h"
24 #include "llvm/ADT/PostOrderIterator.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/Passes.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "fastpretileconfig"
39 
40 STATISTIC(NumStores, "Number of stores added");
41 STATISTIC(NumLoads, "Number of loads added");
42 
43 namespace {
44 
45 class X86FastPreTileConfig : public MachineFunctionPass {
46   MachineFunction *MF = nullptr;
47   const X86Subtarget *ST = nullptr;
48   const TargetInstrInfo *TII = nullptr;
49   MachineRegisterInfo *MRI = nullptr;
50   X86MachineFunctionInfo *X86FI = nullptr;
51   MachineFrameInfo *MFI = nullptr;
52   const TargetRegisterInfo *TRI = nullptr;
53   MachineBasicBlock *MBB = nullptr;
54   int CfgSS = -1;
55   struct PHIInfo {
56     Register Row;
57     Register Col;
58     Register StackAddr;
59   };
60   DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
61 
62   /// Maps virtual regs to the frame index where these values are spilled.
63   IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64 
65   /// Has a bit set for tile virtual register for which it was determined
66   /// that it is alive across blocks.
67   BitVector MayLiveAcrossBlocks;
68 
69   int getStackSpaceFor(Register VirtReg);
70   void InitializeTileConfigStackSpace();
71   bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72   void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73   void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74               MachineOperand *RowMO, MachineOperand *ColMO);
75   void canonicalizePHIs(MachineBasicBlock &MBB);
76   void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77   void convertPHIs(MachineBasicBlock &MBB);
78   bool configBasicBlock(MachineBasicBlock &MBB);
79 
80 public:
81   X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82 
83   /// Return the pass name.
84   StringRef getPassName() const override {
85     return "Fast Tile Register Preconfigure";
86   }
87 
88   /// Perform tile register configure.
89   bool runOnMachineFunction(MachineFunction &MFunc) override;
90 
91   static char ID;
92 };
93 
94 } // end anonymous namespace
95 
96 char X86FastPreTileConfig::ID = 0;
97 
98 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99                       "Fast Tile Register Preconfigure", false, false)
100 INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
101                     "Fast Tile Register Preconfigure", false, false)
102 
103 static bool dominates(MachineBasicBlock &MBB,
104                       MachineBasicBlock::const_iterator A,
105                       MachineBasicBlock::const_iterator B) {
106   auto MBBEnd = MBB.end();
107   if (B == MBBEnd)
108     return true;
109 
110   MachineBasicBlock::const_iterator I = MBB.begin();
111   for (; &*I != A && &*I != B; ++I)
112     ;
113 
114   return &*I == A;
115 }
116 
117 /// This allocates space for the specified virtual register to be held on the
118 /// stack.
119 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120   // Find the location Reg would belong...
121   int SS = StackSlotForVirtReg[VirtReg];
122   // Already has space allocated?
123   if (SS != -1)
124     return SS;
125 
126   // Allocate a new stack object for this spill location...
127   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128   unsigned Size = TRI->getSpillSize(RC);
129   Align Alignment = TRI->getSpillAlign(RC);
130   int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131 
132   // Assign the slot.
133   StackSlotForVirtReg[VirtReg] = FrameIdx;
134   return FrameIdx;
135 }
136 
137 /// Returns false if \p VirtReg is known to not live out of the current config.
138 /// If \p VirtReg live out of the current MBB, it must live out of the current
139 /// config
140 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142     return true;
143 
144   for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145     if (UseInst.getParent() != MBB) {
146       MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147       return true;
148     }
149 
150     // The use and def are in the same MBB. If the tile register is
151     // reconfigured, it is crobbered and we need to spill and reload
152     // tile register.
153     if (CfgMI) {
154       if (dominates(*MBB, *CfgMI, UseInst)) {
155         MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156         return true;
157       }
158     }
159   }
160 
161   return false;
162 }
163 
164 void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165   MachineBasicBlock &MBB = MF->front();
166   MachineInstr *MI = &*MBB.getFirstNonPHI();
167   DebugLoc DL;
168   if (ST->hasAVX512()) {
169     Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170     BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172         .addReg(Zmm);
173   } else if (ST->hasAVX2()) {
174     Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175     BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177         .addReg(Ymm);
178     addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179                       32)
180         .addReg(Ymm);
181   } else {
182     assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183     unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184     Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185     BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187         .addReg(Xmm);
188     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189         .addReg(Xmm);
190     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191         .addReg(Xmm);
192     addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193         .addReg(Xmm);
194   }
195   // Fill in the palette first.
196   addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197       .addImm(1);
198 }
199 
200 /// Insert spill instruction for \p AssignedReg before \p Before.
201 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
202 void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
203                                  Register VirtReg, bool Kill) {
204   LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205   int FI = getStackSpaceFor(VirtReg);
206   LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207 
208   const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209   // Don't need shape information for tile store, becasue it is adjacent to
210   // the tile def instruction.
211   TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
212                            Register());
213   ++NumStores;
214 
215   // TODO: update DBG_VALUEs
216 }
217 
218 /// Insert reload instruction for \p PhysReg before \p Before.
219 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
220                                   Register OrigReg, MachineOperand *RowMO,
221                                   MachineOperand *ColMO) {
222   int FI = getStackSpaceFor(OrigReg);
223   const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
224   Register TileReg;
225   // Fold copy to tileload
226   // BB1:
227   // spill src to s
228   //
229   // BB2:
230   // t = copy src
231   // -->
232   // t = tileload (s)
233   if (UseMI->isCopy())
234     TileReg = UseMI->getOperand(0).getReg();
235   else
236     TileReg = MRI->createVirtualRegister(&RC);
237   // Can't use TII->loadRegFromStackSlot(), because we need the shape
238   // information for reload.
239   // tileloadd (%sp, %idx), %tmm
240   unsigned Opc = X86::PTILELOADDV;
241   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
242   // FIXME: MBB is not the parent of UseMI.
243   MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
244                                 TII->get(X86::MOV64ri), StrideReg)
245                             .addImm(64);
246   NewMI = addFrameReference(
247       BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
248           .addReg(RowMO->getReg())
249           .addReg(ColMO->getReg()),
250       FI);
251   MachineOperand &MO = NewMI->getOperand(5);
252   MO.setReg(StrideReg);
253   MO.setIsKill(true);
254   RowMO->setIsKill(false);
255   ColMO->setIsKill(false);
256   // Erase copy instruction after it is folded.
257   if (UseMI->isCopy()) {
258     UseMI->eraseFromParent();
259   } else {
260     // Replace the register in the user MI.
261     for (auto &MO : UseMI->operands()) {
262       if (MO.isReg() && MO.getReg() == OrigReg)
263         MO.setReg(TileReg);
264     }
265   }
266 
267   ++NumLoads;
268   LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
269                     << printReg(TileReg, TRI) << '\n');
270 }
271 
272 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
273   // The instruction must have 3 operands: tile def, row, col.
274   if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
275     return false;
276   MachineOperand &MO = MI.getOperand(0);
277 
278   if (MO.isReg()) {
279     Register Reg = MO.getReg();
280     // FIXME it may be used after Greedy RA and the physical
281     // register is not rewritten yet.
282     if (Reg.isVirtual() &&
283         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
284       return true;
285     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
286       return true;
287   }
288 
289   return false;
290 }
291 
292 static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
293   MachineInstr *MI = MRI->getVRegDef(TileReg);
294   if (isTileDef(MRI, *MI)) {
295     MachineOperand *RowMO = &MI->getOperand(1);
296     MachineOperand *ColMO = &MI->getOperand(2);
297     return ShapeT(RowMO, ColMO, MRI);
298   } else if (MI->isCopy()) {
299     TileReg = MI->getOperand(1).getReg();
300     return getShape(MRI, TileReg);
301   }
302 
303   // The def should not be PHI node, because we walk the MBB in reverse post
304   // order.
305   assert(MI->isPHI() && "Unexpected PHI when get shape.");
306   llvm_unreachable("Unexpected MI when get shape.");
307 }
308 
309 // BB0:
310 // spill t0 to s0
311 // BB1:
312 // spill t1 to s1
313 //
314 // BB2:
315 // t = phi [t0, bb0] [t1, bb1]
316 // -->
317 // row = phi [r0, bb0] [r1, bb1]
318 // col = phi [c0, bb0] [c1, bb1]
319 //   s = phi [s0, bb0] [s1, bb1]
320 //   t = tileload row, col, s
321 // The new instruction is inserted at the end of the phi node. The order
322 // of the original phi node is not ensured.
323 void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
324                                       MachineInstr &PHI) {
325   // 1. Create instruction to get stack slot address of each incoming block.
326   // 2. Create PHI node for the stack address.
327   // 3. Create PHI node for shape. If one of the incoming shape is immediate
328   //    use the immediate and delete the PHI node.
329   // 4. Create tileload instruction from the stack address.
330   Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
331   MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
332                                         TII->get(X86::PHI), StackAddrReg);
333   Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
334   MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
335                                        TII->get(X86::PHI), RowReg);
336   Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
337   MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
338                                        TII->get(X86::PHI), ColReg);
339   // Record the mapping of phi node and its row/column information.
340   VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
341 
342   for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
343     // Get the 2 incoming value of tile register and MBB.
344     Register InTileReg = PHI.getOperand(I).getReg();
345     // Mark it as liveout, so that it will be spilled when visit
346     // the incoming MBB. Otherwise since phi will be deleted, it
347     // would miss spill when visit incoming MBB.
348     MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
349     MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
350 
351     MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
352     MachineBasicBlock::iterator InsertPos;
353     if (TileDefMI->isPHI()) {
354       InsertPos = TileDefMI->getParent()->getFirstNonPHI();
355       if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
356         //        def t1
357         //       /       \
358         //  def t2       t3 = phi(t1, t4) <--
359         //       \       /                  |
360         //      t4 = phi(t2, t3)-------------
361         //
362         // For each (row, column and stack address) append phi incoming value.
363         // Create r3 = phi(r1, r4)
364         // Create r4 = phi(r2, r3)
365         Register InRowReg = VisitedPHIs[TileDefMI].Row;
366         Register InColReg = VisitedPHIs[TileDefMI].Col;
367         Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
368         RowPHI.addReg(InRowReg).addMBB(InMBB);
369         ColPHI.addReg(InColReg).addMBB(InMBB);
370         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
371         continue;
372       } else {
373         // Recursively convert PHI to tileload
374         convertPHI(TileDefMI->getParent(), *TileDefMI);
375         // The PHI node is coverted to tileload instruction. Get the stack
376         // address from tileload operands.
377         MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
378         assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
379         Register InRowReg = TileLoad->getOperand(1).getReg();
380         Register InColReg = TileLoad->getOperand(2).getReg();
381         Register InStackAddrReg = TileLoad->getOperand(3).getReg();
382         RowPHI.addReg(InRowReg).addMBB(InMBB);
383         ColPHI.addReg(InColReg).addMBB(InMBB);
384         AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
385       }
386     } else {
387       InsertPos = TileDefMI->getIterator();
388 
389       // Fill the incoming operand of row/column phi instruction.
390       ShapeT Shape = getShape(MRI, InTileReg);
391       Shape.getRow()->setIsKill(false);
392       Shape.getCol()->setIsKill(false);
393       RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
394       ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
395 
396       // The incoming tile register live out of its def BB, it would be spilled.
397       // Create MI to get the spill stack slot address for the tile register
398       int FI = getStackSpaceFor(InTileReg);
399       Register InStackAddrReg =
400           MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
401       addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
402                         TII->get(X86::LEA64r), InStackAddrReg)
403                     .addFrameIndex(FI),
404                 0);
405       AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
406     }
407   }
408 
409   MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
410   Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
411   BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
412       .addImm(64);
413   Register TileReg = PHI.getOperand(0).getReg();
414   MachineInstr *NewMI = addDirectMem(
415       BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
416           .addReg(RowReg)
417           .addReg(ColReg),
418       StackAddrReg);
419   MachineOperand &MO = NewMI->getOperand(5);
420   MO.setReg(StrideReg);
421   MO.setIsKill(true);
422   PHI.eraseFromParent();
423   VisitedPHIs.erase(&PHI);
424 }
425 
426 static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
427   MachineOperand &MO = MI.getOperand(0);
428   if (MO.isReg() && MO.getReg().isVirtual() &&
429       MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
430     return true;
431   return false;
432 }
433 
434 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
435   SmallVector<MachineInstr *, 8> PHIs;
436 
437   for (MachineInstr &MI : MBB) {
438     if (!MI.isPHI())
439       break;
440     if (!isTileRegDef(MRI, MI))
441       continue;
442     PHIs.push_back(&MI);
443   }
444   // Canonicalize the phi node first. One tile phi may depeneds previous
445   // phi node. For below case, we need convert %t4.
446   //
447   // BB0:
448   // %t3 = phi (t1 BB1, t2 BB0)
449   // %t4 = phi (t5 BB1, t3 BB0)
450   // -->
451   // %t3 = phi (t1 BB1, t2 BB0)
452   // %t4 = phi (t5 BB1, t2 BB0)
453   //
454   while (!PHIs.empty()) {
455     MachineInstr *PHI = PHIs.pop_back_val();
456 
457     // Find the operand that is incoming from the same MBB and the def
458     // is also phi node.
459     MachineOperand *InMO = nullptr;
460     MachineInstr *DefMI = nullptr;
461     for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
462       Register InTileReg = PHI->getOperand(I).getReg();
463       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
464       DefMI = MRI->getVRegDef(InTileReg);
465       if (InMBB != &MBB || !DefMI->isPHI())
466         continue;
467 
468       InMO = &PHI->getOperand(I);
469       break;
470     }
471     // If can't find such operand, do nothing.
472     if (!InMO)
473       continue;
474 
475     // Current phi node depends on previous phi node. Break the
476     // dependency.
477     Register DefTileReg;
478     for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
479       MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
480       if (InMBB != &MBB)
481         continue;
482       DefTileReg = DefMI->getOperand(I).getReg();
483       InMO->setReg(DefTileReg);
484       break;
485     }
486   }
487 }
488 
489 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
490   SmallVector<MachineInstr *, 8> PHIs;
491   for (MachineInstr &MI : MBB) {
492     if (!MI.isPHI())
493       break;
494     if (!isTileRegDef(MRI, MI))
495       continue;
496     PHIs.push_back(&MI);
497   }
498   while (!PHIs.empty()) {
499     MachineInstr *MI = PHIs.pop_back_val();
500     VisitedPHIs.clear();
501     convertPHI(&MBB, *MI);
502   }
503 }
504 
505 // PreTileConfig should configure the tile registers based on basic
506 // block.
507 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
508   this->MBB = &MBB;
509   bool Change = false;
510   MachineInstr *LastShapeMI = nullptr;
511   MachineInstr *LastTileCfg = nullptr;
512   bool HasUnconfigTile = false;
513 
514   auto Config = [&](MachineInstr &Before) {
515     if (CfgSS == -1)
516       CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
517                                      ST->getTileConfigAlignment(), false);
518     LastTileCfg = addFrameReference(
519         BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
520     LastShapeMI = nullptr;
521     Change = true;
522   };
523   auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
524     for (const MachineOperand &MO : MI.operands()) {
525       if (!MO.isReg())
526         continue;
527       Register Reg = MO.getReg();
528       if (Reg.isVirtual() &&
529           MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
530         return true;
531     }
532     return false;
533   };
534   for (MachineInstr &MI : reverse(MBB)) {
535     // We have transformed phi node before configuring BB.
536     if (MI.isPHI())
537       break;
538     // Don't collect the shape of used tile, the tile should be defined
539     // before the tile use. Spill and reload would happen if there is only
540     // tile use after ldtilecfg, so the shape can be collected from reload.
541     // Take below code for example. %t would be reloaded before tilestore
542     // call
543     // ....
544     // tilestore %r, %c, %t
545     // -->
546     // call
547     // ldtilecfg
548     // %t = tileload %r, %c
549     // tilestore %r, %c, %t
550     if (HasTileOperand(MRI, MI))
551       HasUnconfigTile = true;
552     // According to AMX ABI, all the tile registers including config register
553     // are volatile. Caller need to save/restore config register.
554     if (MI.isCall() && HasUnconfigTile) {
555       MachineBasicBlock::iterator I;
556       if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
557         I = ++LastShapeMI->getIterator();
558       else
559         I = ++MI.getIterator();
560       Config(*I);
561       HasUnconfigTile = false;
562       continue;
563     }
564     if (!isTileDef(MRI, MI))
565       continue;
566     //
567     //---------------------------------------------------------------------
568     // Don't handle COPY instruction. If the src and dst of the COPY can be
569     // in the same config in below case, we just check the shape of t0.
570     // def row0
571     // def col0
572     // ldtilecfg
573     // t0 = tielzero(row0, col0)
574     // t1 = copy t0
575     // ...
576     // If the src and dst of the COPY can NOT be in the same config in below
577     // case. Reload would be generated befor the copy instruction.
578     // def row0
579     // def col0
580     // t0 = tielzero(row0, col0)
581     // spill t0
582     // ...
583     // def row1
584     // def col1
585     // ldtilecfg
586     // t1 = tilezero(row1, col1)
587     // reload t0
588     // t1 = copy t0
589     //---------------------------------------------------------------------
590     //
591     // If MI dominate the last shape def instruction, we need insert
592     // ldtilecfg after LastShapeMI now. The config doesn't include
593     // current MI.
594     //   def row0
595     //   def col0
596     //   tilezero(row0, col0)  <- MI
597     //   def row1
598     //   def col1
599     //   ldtilecfg             <- insert
600     //   tilezero(row1, col1)
601     if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
602       Config(*(++LastShapeMI->getIterator()));
603     MachineOperand *RowMO = &MI.getOperand(1);
604     MachineOperand *ColMO = &MI.getOperand(2);
605     MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
606     MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
607     // If the shape is defined in current MBB, check the domination.
608     // FIXME how about loop?
609     if (RowMI->getParent() == &MBB) {
610       if (!LastShapeMI)
611         LastShapeMI = RowMI;
612       else if (dominates(MBB, LastShapeMI, RowMI))
613         LastShapeMI = RowMI;
614     }
615     if (ColMI->getParent() == &MBB) {
616       if (!LastShapeMI)
617         LastShapeMI = ColMI;
618       else if (dominates(MBB, LastShapeMI, ColMI))
619         LastShapeMI = ColMI;
620     }
621     // If there is user live out of the tilecfg, spill it and reload in
622     // before the user.
623     Register TileReg = MI.getOperand(0).getReg();
624     if (mayLiveOut(TileReg, LastTileCfg))
625       spill(++MI.getIterator(), TileReg, false);
626     for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
627       if (UseMI.getParent() == &MBB) {
628         // check user should not across ldtilecfg
629         if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
630           continue;
631         // reload befor UseMI
632         reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
633       } else {
634         // Don't reload for phi instruction, we handle phi reload separately.
635         // TODO: merge the reload for the same user MBB.
636         if (!UseMI.isPHI())
637           reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
638       }
639     }
640   }
641 
642   // Configure tile registers at the head of the MBB
643   if (HasUnconfigTile) {
644     MachineInstr *Before;
645     if (LastShapeMI == nullptr || LastShapeMI->isPHI())
646       Before = &*MBB.getFirstNonPHI();
647     else
648       Before = &*(++LastShapeMI->getIterator());
649 
650     Config(*Before);
651   }
652 
653   return Change;
654 }
655 
656 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
657   MF = &MFunc;
658   MRI = &MFunc.getRegInfo();
659   ST = &MFunc.getSubtarget<X86Subtarget>();
660   TII = ST->getInstrInfo();
661   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
662   MFI = &MFunc.getFrameInfo();
663   TRI = ST->getRegisterInfo();
664   CfgSS = -1;
665 
666   unsigned NumVirtRegs = MRI->getNumVirtRegs();
667   // Abandon early if there is no tile register to config.
668   bool HasVirtTileReg = false;
669   for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
670     Register VirtReg = Register::index2VirtReg(I);
671     if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
672       HasVirtTileReg = true;
673       break;
674     }
675   }
676   if (!HasVirtTileReg)
677     return false;
678 
679   StackSlotForVirtReg.resize(NumVirtRegs);
680   MayLiveAcrossBlocks.clear();
681   // We will create register during config. *3 is to make sure
682   // the virtual register number doesn't exceed the size of
683   // the bit vector.
684   MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
685   bool Change = false;
686   assert(MRI->isSSA());
687 
688   // Canonicalize the phi node first.
689   for (MachineBasicBlock &MBB : MFunc)
690     canonicalizePHIs(MBB);
691 
692   // Loop over all of the basic blocks in reverse post order and insert
693   // ldtilecfg for tile registers. The reserse post order is to facilitate
694   // PHI node convert.
695   ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
696   for (MachineBasicBlock *MBB : RPOT) {
697     convertPHIs(*MBB);
698     Change |= configBasicBlock(*MBB);
699   }
700 
701   if (Change)
702     InitializeTileConfigStackSpace();
703 
704   StackSlotForVirtReg.clear();
705   return Change;
706 }
707 
708 FunctionPass *llvm::createX86FastPreTileConfigPass() {
709   return new X86FastPreTileConfig();
710 }
711