xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1*e8d8bef9SDimitry Andric //===-- X86TileConfig.cpp - Tile Register Configure----------------------===//
2*e8d8bef9SDimitry Andric //
3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e8d8bef9SDimitry Andric //
7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8*e8d8bef9SDimitry Andric //
9*e8d8bef9SDimitry Andric /// \file Pass to config the shape of AMX physical registers
10*e8d8bef9SDimitry Andric /// AMX register need to be configured before use. In X86PreTileConfig pass
11*e8d8bef9SDimitry Andric /// the pldtilecfg instruction is inserted, however at that time we don't
12*e8d8bef9SDimitry Andric /// know the shape of each physical tile registers, because the register
13*e8d8bef9SDimitry Andric /// allocation is not done yet. This pass runs after egister allocation
14*e8d8bef9SDimitry Andric /// pass. It collects the shape information of each physical tile register
15*e8d8bef9SDimitry Andric /// and store the shape in the stack slot that is allocated for load config
16*e8d8bef9SDimitry Andric /// to tile config register.
17*e8d8bef9SDimitry Andric //
18*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
19*e8d8bef9SDimitry Andric 
20*e8d8bef9SDimitry Andric #include "X86.h"
21*e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
22*e8d8bef9SDimitry Andric #include "X86MachineFunctionInfo.h"
23*e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
24*e8d8bef9SDimitry Andric #include "X86Subtarget.h"
25*e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
26*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
27*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
28*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
29*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
30*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
31*e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
32*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
33*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
34*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h"
35*e8d8bef9SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h"
36*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
37*e8d8bef9SDimitry Andric 
38*e8d8bef9SDimitry Andric using namespace llvm;
39*e8d8bef9SDimitry Andric 
40*e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-config"
41*e8d8bef9SDimitry Andric 
42*e8d8bef9SDimitry Andric namespace {
43*e8d8bef9SDimitry Andric 
44*e8d8bef9SDimitry Andric class X86TileConfig : public MachineFunctionPass {
45*e8d8bef9SDimitry Andric   // context
46*e8d8bef9SDimitry Andric   MachineFunction *MF = nullptr;
47*e8d8bef9SDimitry Andric   const X86Subtarget *ST = nullptr;
48*e8d8bef9SDimitry Andric   const TargetRegisterInfo *TRI;
49*e8d8bef9SDimitry Andric   const TargetInstrInfo *TII;
50*e8d8bef9SDimitry Andric   MachineDominatorTree *DomTree = nullptr;
51*e8d8bef9SDimitry Andric   MachineRegisterInfo *MRI = nullptr;
52*e8d8bef9SDimitry Andric   VirtRegMap *VRM = nullptr;
53*e8d8bef9SDimitry Andric   LiveIntervals *LIS = nullptr;
54*e8d8bef9SDimitry Andric 
55*e8d8bef9SDimitry Andric   MachineInstr *getTileConfigPoint();
56*e8d8bef9SDimitry Andric   void tileConfig();
57*e8d8bef9SDimitry Andric 
58*e8d8bef9SDimitry Andric public:
59*e8d8bef9SDimitry Andric   X86TileConfig() : MachineFunctionPass(ID) {}
60*e8d8bef9SDimitry Andric 
61*e8d8bef9SDimitry Andric   /// Return the pass name.
62*e8d8bef9SDimitry Andric   StringRef getPassName() const override { return "Tile Register Configure"; }
63*e8d8bef9SDimitry Andric 
64*e8d8bef9SDimitry Andric   /// X86TileConfig analysis usage.
65*e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
66*e8d8bef9SDimitry Andric 
67*e8d8bef9SDimitry Andric   /// Perform register allocation.
68*e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &mf) override;
69*e8d8bef9SDimitry Andric 
70*e8d8bef9SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
71*e8d8bef9SDimitry Andric     return MachineFunctionProperties().set(
72*e8d8bef9SDimitry Andric         MachineFunctionProperties::Property::NoPHIs);
73*e8d8bef9SDimitry Andric   }
74*e8d8bef9SDimitry Andric 
75*e8d8bef9SDimitry Andric   static char ID;
76*e8d8bef9SDimitry Andric };
77*e8d8bef9SDimitry Andric 
78*e8d8bef9SDimitry Andric } // end anonymous namespace
79*e8d8bef9SDimitry Andric 
80*e8d8bef9SDimitry Andric char X86TileConfig::ID = 0;
81*e8d8bef9SDimitry Andric 
82*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure",
83*e8d8bef9SDimitry Andric                       false, false)
84*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
85*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
86*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure",
87*e8d8bef9SDimitry Andric                     false, false)
88*e8d8bef9SDimitry Andric 
89*e8d8bef9SDimitry Andric void X86TileConfig::getAnalysisUsage(AnalysisUsage &AU) const {
90*e8d8bef9SDimitry Andric   AU.addRequired<MachineDominatorTree>();
91*e8d8bef9SDimitry Andric   AU.addRequired<LiveIntervals>();
92*e8d8bef9SDimitry Andric   AU.addPreserved<SlotIndexes>();
93*e8d8bef9SDimitry Andric   AU.addRequired<VirtRegMap>();
94*e8d8bef9SDimitry Andric   AU.setPreservesAll();
95*e8d8bef9SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
96*e8d8bef9SDimitry Andric }
97*e8d8bef9SDimitry Andric 
98*e8d8bef9SDimitry Andric static unsigned getTilePhysRegIndex(Register PhysReg) {
99*e8d8bef9SDimitry Andric   assert((PhysReg >= X86::TMM0 && X86::TMM0 <= X86::TMM7) &&
100*e8d8bef9SDimitry Andric          "Tile register number is invalid");
101*e8d8bef9SDimitry Andric   return (PhysReg - X86::TMM0);
102*e8d8bef9SDimitry Andric }
103*e8d8bef9SDimitry Andric 
104*e8d8bef9SDimitry Andric static MachineInstr *
105*e8d8bef9SDimitry Andric storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
106*e8d8bef9SDimitry Andric                     Register SrcReg, unsigned BitSize, int FrameIdx, int Offset,
107*e8d8bef9SDimitry Andric                     const TargetInstrInfo *TII, const TargetRegisterClass *RC,
108*e8d8bef9SDimitry Andric                     const TargetRegisterInfo *TRI) {
109*e8d8bef9SDimitry Andric 
110*e8d8bef9SDimitry Andric   unsigned SubIdx = (BitSize == 8) ? X86::sub_8bit : X86::sub_16bit;
111*e8d8bef9SDimitry Andric   unsigned Opc = (BitSize == 8) ? X86::MOV8mr : X86::MOV16mr;
112*e8d8bef9SDimitry Andric   if (BitSize == TRI->getRegSizeInBits(*RC))
113*e8d8bef9SDimitry Andric     SubIdx = 0;
114*e8d8bef9SDimitry Andric   MachineInstr *NewMI =
115*e8d8bef9SDimitry Andric       addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)), FrameIdx,
116*e8d8bef9SDimitry Andric                         Offset)
117*e8d8bef9SDimitry Andric           .addReg(SrcReg, 0, SubIdx);
118*e8d8bef9SDimitry Andric   return NewMI;
119*e8d8bef9SDimitry Andric }
120*e8d8bef9SDimitry Andric 
121*e8d8bef9SDimitry Andric static MachineInstr *storeImmToStackSlot(MachineBasicBlock &MBB,
122*e8d8bef9SDimitry Andric                                          MachineBasicBlock::iterator MI,
123*e8d8bef9SDimitry Andric                                          int64_t Imm, unsigned BitSize,
124*e8d8bef9SDimitry Andric                                          int FrameIdx, int Offset,
125*e8d8bef9SDimitry Andric                                          const TargetInstrInfo *TII) {
126*e8d8bef9SDimitry Andric   unsigned Opc = (BitSize == 8) ? X86::MOV8mi : X86::MOV16mi;
127*e8d8bef9SDimitry Andric   return addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)),
128*e8d8bef9SDimitry Andric                            FrameIdx, Offset)
129*e8d8bef9SDimitry Andric       .addImm(Imm);
130*e8d8bef9SDimitry Andric }
131*e8d8bef9SDimitry Andric 
132*e8d8bef9SDimitry Andric MachineInstr *X86TileConfig::getTileConfigPoint() {
133*e8d8bef9SDimitry Andric   for (MachineBasicBlock &MBB : *MF) {
134*e8d8bef9SDimitry Andric 
135*e8d8bef9SDimitry Andric     // Traverse the basic block.
136*e8d8bef9SDimitry Andric     for (MachineInstr &MI : MBB)
137*e8d8bef9SDimitry Andric       // Refer X86PreTileConfig.cpp.
138*e8d8bef9SDimitry Andric       // We only support one tile config for now.
139*e8d8bef9SDimitry Andric       if (MI.getOpcode() == X86::PLDTILECFG)
140*e8d8bef9SDimitry Andric         return &MI;
141*e8d8bef9SDimitry Andric   }
142*e8d8bef9SDimitry Andric 
143*e8d8bef9SDimitry Andric   return nullptr;
144*e8d8bef9SDimitry Andric }
145*e8d8bef9SDimitry Andric 
146*e8d8bef9SDimitry Andric void X86TileConfig::tileConfig() {
147*e8d8bef9SDimitry Andric   MachineInstr *MI = getTileConfigPoint();
148*e8d8bef9SDimitry Andric   if (!MI)
149*e8d8bef9SDimitry Andric     return;
150*e8d8bef9SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
151*e8d8bef9SDimitry Andric   int SS = MI->getOperand(1).getIndex();
152*e8d8bef9SDimitry Andric   BitVector PhysRegs(TRI->getNumRegs());
153*e8d8bef9SDimitry Andric 
154*e8d8bef9SDimitry Andric   // Fill in the palette first.
155*e8d8bef9SDimitry Andric   auto *NewMI = storeImmToStackSlot(*MBB, *MI, 1, 8, SS, 0, TII);
156*e8d8bef9SDimitry Andric   LIS->InsertMachineInstrInMaps(*NewMI);
157*e8d8bef9SDimitry Andric   // Fill in the shape of each tile physical register.
158*e8d8bef9SDimitry Andric   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
159*e8d8bef9SDimitry Andric     Register VirtReg = Register::index2VirtReg(i);
160*e8d8bef9SDimitry Andric     if (MRI->reg_nodbg_empty(VirtReg))
161*e8d8bef9SDimitry Andric       continue;
162*e8d8bef9SDimitry Andric     const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
163*e8d8bef9SDimitry Andric     if (RC.getID() != X86::TILERegClassID)
164*e8d8bef9SDimitry Andric       continue;
165*e8d8bef9SDimitry Andric     Register PhysReg = VRM->getPhys(VirtReg);
166*e8d8bef9SDimitry Andric     if (PhysRegs.test(PhysReg))
167*e8d8bef9SDimitry Andric       continue;
168*e8d8bef9SDimitry Andric     PhysRegs.set(PhysReg);
169*e8d8bef9SDimitry Andric     ShapeT Shape = VRM->getShape(VirtReg);
170*e8d8bef9SDimitry Andric     Register RowReg = Shape.getRow()->getReg();
171*e8d8bef9SDimitry Andric     Register ColReg = Shape.getCol()->getReg();
172*e8d8bef9SDimitry Andric 
173*e8d8bef9SDimitry Andric     // Here is the data format for the tile config.
174*e8d8bef9SDimitry Andric     // 0      palette
175*e8d8bef9SDimitry Andric     // 1      start_row
176*e8d8bef9SDimitry Andric     // 2-15   reserved, must be zero
177*e8d8bef9SDimitry Andric     // 16-17  tile0.colsb Tile 0 bytes per row.
178*e8d8bef9SDimitry Andric     // 18-19  tile1.colsb Tile 1 bytes per row.
179*e8d8bef9SDimitry Andric     // 20-21  tile2.colsb Tile 2 bytes per row.
180*e8d8bef9SDimitry Andric     // ... (sequence continues)
181*e8d8bef9SDimitry Andric     // 30-31  tile7.colsb Tile 7 bytes per row.
182*e8d8bef9SDimitry Andric     // 32-47  reserved, must be zero
183*e8d8bef9SDimitry Andric     // 48     tile0.rows Tile 0 rows.
184*e8d8bef9SDimitry Andric     // 49     tile1.rows Tile 1 rows.
185*e8d8bef9SDimitry Andric     // 50     tile2.rows Tile 2 rows.
186*e8d8bef9SDimitry Andric     // ... (sequence continues)
187*e8d8bef9SDimitry Andric     // 55     tile7.rows Tile 7 rows.
188*e8d8bef9SDimitry Andric     // 56-63  reserved, must be zero
189*e8d8bef9SDimitry Andric     unsigned Index = getTilePhysRegIndex(PhysReg);
190*e8d8bef9SDimitry Andric     int RowOffset = 48 + Index;
191*e8d8bef9SDimitry Andric     int ColOffset = 16 + Index * 2;
192*e8d8bef9SDimitry Andric 
193*e8d8bef9SDimitry Andric     unsigned BitSize = 8;
194*e8d8bef9SDimitry Andric     for (const auto &Pair : {std::make_pair(RowReg, RowOffset),
195*e8d8bef9SDimitry Andric                              std::make_pair(ColReg, ColOffset)}) {
196*e8d8bef9SDimitry Andric       int64_t Imm;
197*e8d8bef9SDimitry Andric       int ImmCount = 0;
198*e8d8bef9SDimitry Andric       // All def must be the same value, otherwise it is invalid MIs.
199*e8d8bef9SDimitry Andric       // Immediate is prefered.
200*e8d8bef9SDimitry Andric       for (const MachineOperand &MO : MRI->def_operands(Pair.first)) {
201*e8d8bef9SDimitry Andric         const auto *Inst = MO.getParent();
202*e8d8bef9SDimitry Andric         if (Inst->isMoveImmediate()) {
203*e8d8bef9SDimitry Andric           ImmCount++;
204*e8d8bef9SDimitry Andric           Imm = Inst->getOperand(1).getImm();
205*e8d8bef9SDimitry Andric           break;
206*e8d8bef9SDimitry Andric         }
207*e8d8bef9SDimitry Andric       }
208*e8d8bef9SDimitry Andric       auto StoreConfig = [&](int Offset) {
209*e8d8bef9SDimitry Andric         MachineInstr *NewMI = nullptr;
210*e8d8bef9SDimitry Andric         if (ImmCount)
211*e8d8bef9SDimitry Andric           NewMI = storeImmToStackSlot(*MBB, *MI, Imm, BitSize, SS, Offset, TII);
212*e8d8bef9SDimitry Andric         else {
213*e8d8bef9SDimitry Andric           const TargetRegisterClass *RC = MRI->getRegClass(Pair.first);
214*e8d8bef9SDimitry Andric           NewMI = storeRegToStackSlot(*MBB, *MI, Pair.first, BitSize, SS,
215*e8d8bef9SDimitry Andric                                       Offset, TII, RC, TRI);
216*e8d8bef9SDimitry Andric         }
217*e8d8bef9SDimitry Andric         SlotIndex SIdx = LIS->InsertMachineInstrInMaps(*NewMI);
218*e8d8bef9SDimitry Andric         if (!ImmCount) {
219*e8d8bef9SDimitry Andric           // Extend the live interval.
220*e8d8bef9SDimitry Andric           SmallVector<SlotIndex, 8> EndPoints = {SIdx.getRegSlot()};
221*e8d8bef9SDimitry Andric           LiveInterval &Int = LIS->getInterval(Pair.first);
222*e8d8bef9SDimitry Andric           LIS->extendToIndices(Int, EndPoints);
223*e8d8bef9SDimitry Andric         }
224*e8d8bef9SDimitry Andric       };
225*e8d8bef9SDimitry Andric       StoreConfig(Pair.second);
226*e8d8bef9SDimitry Andric       BitSize += 8;
227*e8d8bef9SDimitry Andric     }
228*e8d8bef9SDimitry Andric   }
229*e8d8bef9SDimitry Andric }
230*e8d8bef9SDimitry Andric 
231*e8d8bef9SDimitry Andric bool X86TileConfig::runOnMachineFunction(MachineFunction &mf) {
232*e8d8bef9SDimitry Andric   MF = &mf;
233*e8d8bef9SDimitry Andric   MRI = &mf.getRegInfo();
234*e8d8bef9SDimitry Andric   ST = &mf.getSubtarget<X86Subtarget>();
235*e8d8bef9SDimitry Andric   TRI = ST->getRegisterInfo();
236*e8d8bef9SDimitry Andric   TII = mf.getSubtarget().getInstrInfo();
237*e8d8bef9SDimitry Andric   DomTree = &getAnalysis<MachineDominatorTree>();
238*e8d8bef9SDimitry Andric   VRM = &getAnalysis<VirtRegMap>();
239*e8d8bef9SDimitry Andric   LIS = &getAnalysis<LiveIntervals>();
240*e8d8bef9SDimitry Andric 
241*e8d8bef9SDimitry Andric   if (VRM->isShapeMapEmpty())
242*e8d8bef9SDimitry Andric     return false;
243*e8d8bef9SDimitry Andric 
244*e8d8bef9SDimitry Andric   tileConfig();
245*e8d8bef9SDimitry Andric   return true;
246*e8d8bef9SDimitry Andric }
247*e8d8bef9SDimitry Andric 
248*e8d8bef9SDimitry Andric FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); }
249