xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86FastTileConfig.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to config the shape of AMX physical registers
10 /// AMX register need to be configured before use. Before FastRegAllocation pass
11 /// the ldtilecfg instruction is inserted, however at that time we don't
12 /// know the shape of each physical tile registers, because the register
13 /// allocation is not done yet. This pass runs after register allocation
14 /// pass. It collects the shape information of each physical tile register
15 /// and store the shape in the stack slot that is allocated for load config
16 /// to tile config register.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "X86.h"
21 #include "X86InstrBuilder.h"
22 #include "X86MachineFunctionInfo.h"
23 #include "X86RegisterInfo.h"
24 #include "X86Subtarget.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/InitializePasses.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "fasttileconfig"
37 
38 namespace {
39 
40 class X86FastTileConfig : public MachineFunctionPass {
41   // context
42   MachineFunction *MF = nullptr;
43   const TargetInstrInfo *TII = nullptr;
44   MachineRegisterInfo *MRI = nullptr;
45   const TargetRegisterInfo *TRI = nullptr;
46   X86MachineFunctionInfo *X86FI = nullptr;
47 
48   bool configBasicBlock(MachineBasicBlock &MBB);
49 
50 public:
X86FastTileConfig()51   X86FastTileConfig() : MachineFunctionPass(ID) {}
52 
53   /// Return the pass name.
getPassName() const54   StringRef getPassName() const override {
55     return "Fast Tile Register Configure";
56   }
57 
getAnalysisUsage(AnalysisUsage & AU) const58   void getAnalysisUsage(AnalysisUsage &AU) const override {
59     AU.setPreservesAll();
60     MachineFunctionPass::getAnalysisUsage(AU);
61   }
62 
63   /// Perform register allocation.
64   bool runOnMachineFunction(MachineFunction &MFunc) override;
65 
getRequiredProperties() const66   MachineFunctionProperties getRequiredProperties() const override {
67     return MachineFunctionProperties().set(
68         MachineFunctionProperties::Property::NoPHIs);
69   }
70 
71   static char ID;
72 };
73 
74 } // end anonymous namespace
75 
76 char X86FastTileConfig::ID = 0;
77 
78 INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
79                       "Fast Tile Register Configure", false, false)
80 INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
81                     "Fast Tile Register Configure", false, false)
82 
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)83 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
84   // There is no phi instruction after register allocation.
85   assert(MI.isPHI() == false);
86   // The instruction must have 3 operands: tile def, row, col.
87   // It should be AMX pseudo instruction that have shape operand.
88   if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
89       !MI.isPseudo())
90     return false;
91   MachineOperand &MO = MI.getOperand(0);
92 
93   if (MO.isReg()) {
94     Register Reg = MO.getReg();
95     // FIXME it may be used after Greedy RA and the physical
96     // register is not rewritten yet.
97     if (Reg.isVirtual() &&
98         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
99       return true;
100     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
101       return true;
102   }
103 
104   return false;
105 }
106 
107 // PreTileConfig should configure the tile registers based on basic
108 // block.
configBasicBlock(MachineBasicBlock & MBB)109 bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
110   bool Change = false;
111   SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
112   for (MachineInstr &MI : reverse(MBB)) {
113     if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
114       continue;
115     // AMX instructions that define tile register.
116     if (MI.getOpcode() != X86::PLDTILECFGV) {
117       MachineOperand &Row = MI.getOperand(1);
118       MachineOperand &Col = MI.getOperand(2);
119       unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
120       ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
121     } else { // PLDTILECFGV
122       // Rewrite the shape information to memory. Stack slot should have
123       // been initialized to zero in pre config.
124       int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
125       for (auto &ShapeInfo : ShapeInfos) {
126         DebugLoc DL;
127         unsigned TMMIdx = ShapeInfo.first;
128         Register RowReg = ShapeInfo.second.getRow()->getReg();
129         Register ColReg = ShapeInfo.second.getCol()->getReg();
130         // Here is the data format for the tile config.
131         // 0      palette
132         // 1      start_row
133         // 2-15   reserved, must be zero
134         // 16-17  tile0.colsb Tile 0 bytes per row.
135         // 18-19  tile1.colsb Tile 1 bytes per row.
136         // 20-21  tile2.colsb Tile 2 bytes per row.
137         // ... (sequence continues)
138         // 30-31  tile7.colsb Tile 7 bytes per row.
139         // 32-47  reserved, must be zero
140         // 48     tile0.rows Tile 0 rows.
141         // 49     tile1.rows Tile 1 rows.
142         // 50     tile2.rows Tile 2 rows.
143         // ... (sequence continues)
144         // 55     tile7.rows Tile 7 rows.
145         // 56-63  reserved, must be zero
146         int RowOffset = 48 + TMMIdx;
147         int ColOffset = 16 + TMMIdx * 2;
148 
149         Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
150         BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
151         MachineInstrBuilder StoreRow =
152             BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
153         addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
154 
155         MachineInstrBuilder StoreCol =
156             BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
157         addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
158       }
159       ShapeInfos.clear();
160       Change = true;
161     }
162   }
163 
164   return Change;
165 }
166 
runOnMachineFunction(MachineFunction & MFunc)167 bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
168   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
169   // Early exit in the common case of non-AMX code.
170   if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
171     return false;
172 
173   MF = &MFunc;
174   MRI = &MFunc.getRegInfo();
175   const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
176   TRI = ST->getRegisterInfo();
177   TII = MFunc.getSubtarget().getInstrInfo();
178   bool Change = false;
179 
180   // Loop over all of the basic blocks, eliminating virtual register references
181   for (MachineBasicBlock &MBB : MFunc)
182     Change |= configBasicBlock(MBB);
183 
184   return Change;
185 }
186 
createX86FastTileConfigPass()187 FunctionPass *llvm::createX86FastTileConfigPass() {
188   return new X86FastTileConfig();
189 }
190