1 //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to config the shape of AMX physical registers
10 /// AMX register need to be configured before use. Before FastRegAllocation pass
11 /// the ldtilecfg instruction is inserted, however at that time we don't
12 /// know the shape of each physical tile registers, because the register
13 /// allocation is not done yet. This pass runs after register allocation
14 /// pass. It collects the shape information of each physical tile register
15 /// and store the shape in the stack slot that is allocated for load config
16 /// to tile config register.
17 //
18 //===----------------------------------------------------------------------===//
19
20 #include "X86.h"
21 #include "X86InstrBuilder.h"
22 #include "X86MachineFunctionInfo.h"
23 #include "X86RegisterInfo.h"
24 #include "X86Subtarget.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetInstrInfo.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/InitializePasses.h"
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "fasttileconfig"
37
38 namespace {
39
40 class X86FastTileConfig : public MachineFunctionPass {
41 // context
42 MachineFunction *MF = nullptr;
43 const TargetInstrInfo *TII = nullptr;
44 MachineRegisterInfo *MRI = nullptr;
45 const TargetRegisterInfo *TRI = nullptr;
46 X86MachineFunctionInfo *X86FI = nullptr;
47
48 bool configBasicBlock(MachineBasicBlock &MBB);
49
50 public:
X86FastTileConfig()51 X86FastTileConfig() : MachineFunctionPass(ID) {}
52
53 /// Return the pass name.
getPassName() const54 StringRef getPassName() const override {
55 return "Fast Tile Register Configure";
56 }
57
getAnalysisUsage(AnalysisUsage & AU) const58 void getAnalysisUsage(AnalysisUsage &AU) const override {
59 AU.setPreservesAll();
60 MachineFunctionPass::getAnalysisUsage(AU);
61 }
62
63 /// Perform register allocation.
64 bool runOnMachineFunction(MachineFunction &MFunc) override;
65
getRequiredProperties() const66 MachineFunctionProperties getRequiredProperties() const override {
67 return MachineFunctionProperties().set(
68 MachineFunctionProperties::Property::NoPHIs);
69 }
70
71 static char ID;
72 };
73
74 } // end anonymous namespace
75
76 char X86FastTileConfig::ID = 0;
77
78 INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
79 "Fast Tile Register Configure", false, false)
80 INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
81 "Fast Tile Register Configure", false, false)
82
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)83 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
84 // There is no phi instruction after register allocation.
85 assert(MI.isPHI() == false);
86 // The instruction must have 3 operands: tile def, row, col.
87 // It should be AMX pseudo instruction that have shape operand.
88 if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
89 !MI.isPseudo())
90 return false;
91 MachineOperand &MO = MI.getOperand(0);
92
93 if (MO.isReg()) {
94 Register Reg = MO.getReg();
95 // FIXME it may be used after Greedy RA and the physical
96 // register is not rewritten yet.
97 if (Reg.isVirtual() &&
98 MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
99 return true;
100 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
101 return true;
102 }
103
104 return false;
105 }
106
107 // PreTileConfig should configure the tile registers based on basic
108 // block.
configBasicBlock(MachineBasicBlock & MBB)109 bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
110 bool Change = false;
111 SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
112 for (MachineInstr &MI : reverse(MBB)) {
113 if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
114 continue;
115 // AMX instructions that define tile register.
116 if (MI.getOpcode() != X86::PLDTILECFGV) {
117 MachineOperand &Row = MI.getOperand(1);
118 MachineOperand &Col = MI.getOperand(2);
119 unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
120 ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
121 } else { // PLDTILECFGV
122 // Rewrite the shape information to memory. Stack slot should have
123 // been initialized to zero in pre config.
124 int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
125 for (auto &ShapeInfo : ShapeInfos) {
126 DebugLoc DL;
127 unsigned TMMIdx = ShapeInfo.first;
128 Register RowReg = ShapeInfo.second.getRow()->getReg();
129 Register ColReg = ShapeInfo.second.getCol()->getReg();
130 // Here is the data format for the tile config.
131 // 0 palette
132 // 1 start_row
133 // 2-15 reserved, must be zero
134 // 16-17 tile0.colsb Tile 0 bytes per row.
135 // 18-19 tile1.colsb Tile 1 bytes per row.
136 // 20-21 tile2.colsb Tile 2 bytes per row.
137 // ... (sequence continues)
138 // 30-31 tile7.colsb Tile 7 bytes per row.
139 // 32-47 reserved, must be zero
140 // 48 tile0.rows Tile 0 rows.
141 // 49 tile1.rows Tile 1 rows.
142 // 50 tile2.rows Tile 2 rows.
143 // ... (sequence continues)
144 // 55 tile7.rows Tile 7 rows.
145 // 56-63 reserved, must be zero
146 int RowOffset = 48 + TMMIdx;
147 int ColOffset = 16 + TMMIdx * 2;
148
149 Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
150 BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
151 MachineInstrBuilder StoreRow =
152 BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
153 addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
154
155 MachineInstrBuilder StoreCol =
156 BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
157 addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
158 }
159 ShapeInfos.clear();
160 Change = true;
161 }
162 }
163
164 return Change;
165 }
166
runOnMachineFunction(MachineFunction & MFunc)167 bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
168 X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
169 // Early exit in the common case of non-AMX code.
170 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
171 return false;
172
173 MF = &MFunc;
174 MRI = &MFunc.getRegInfo();
175 const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
176 TRI = ST->getRegisterInfo();
177 TII = MFunc.getSubtarget().getInstrInfo();
178 bool Change = false;
179
180 // Loop over all of the basic blocks, eliminating virtual register references
181 for (MachineBasicBlock &MBB : MFunc)
182 Change |= configBasicBlock(MBB);
183
184 return Change;
185 }
186
createX86FastTileConfigPass()187 FunctionPass *llvm::createX86FastTileConfigPass() {
188 return new X86FastTileConfig();
189 }
190