1 //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Pass to config the shape of AMX physical registers 10 /// AMX register need to be configured before use. Before FastRegAllocation pass 11 /// the ldtilecfg instruction is inserted, however at that time we don't 12 /// know the shape of each physical tile registers, because the register 13 /// allocation is not done yet. This pass runs after register allocation 14 /// pass. It collects the shape information of each physical tile register 15 /// and store the shape in the stack slot that is allocated for load config 16 /// to tile config register. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "X86.h" 21 #include "X86InstrBuilder.h" 22 #include "X86MachineFunctionInfo.h" 23 #include "X86RegisterInfo.h" 24 #include "X86Subtarget.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/CodeGen/TargetRegisterInfo.h" 32 #include "llvm/InitializePasses.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "fasttileconfig" 37 38 namespace { 39 40 class X86FastTileConfig : public MachineFunctionPass { 41 // context 42 MachineFunction *MF = nullptr; 43 const X86Subtarget *ST = nullptr; 44 const TargetRegisterInfo *TRI = nullptr; 45 const TargetInstrInfo *TII = nullptr; 46 MachineRegisterInfo *MRI = nullptr; 47 X86MachineFunctionInfo *X86FI = nullptr; 48 49 MachineInstr *getTileConfigPoint(); 50 void tileConfig(); 51 52 public: 53 X86FastTileConfig() : MachineFunctionPass(ID) {} 54 55 bool fastTileConfig(); 56 bool isTileLoad(MachineInstr &MI); 57 bool isTileStore(MachineInstr &MI); 58 bool isAMXInstr(MachineInstr &MI); 59 60 MachineInstr *getKeyAMXInstr(MachineInstr *MI); 61 void getTileShapesCfg(MachineInstr *MI, 62 SmallVector<MachineOperand *> &ShapedTiles); 63 void getShapeCfgInstrs(MachineInstr *MI, 64 std::map<unsigned, MachineInstr *> &RowCfgs, 65 std::map<unsigned, MachineInstr *> &ColCfgs); 66 67 /// Return the pass name. 68 StringRef getPassName() const override { 69 return "Fast Tile Register Configure"; 70 } 71 72 void materializeTileCfg(MachineInstr *MI); 73 74 void rewriteTileCfg(SmallVector<MachineOperand *> &ShapedTiles, 75 std::map<unsigned, MachineInstr *> &RowCfgs, 76 std::map<unsigned, MachineInstr *> &ColCfgs); 77 78 /// Perform register allocation. 79 bool runOnMachineFunction(MachineFunction &MFunc) override; 80 81 MachineFunctionProperties getRequiredProperties() const override { 82 return MachineFunctionProperties().set( 83 MachineFunctionProperties::Property::NoPHIs); 84 } 85 86 static char ID; 87 }; 88 89 } // end anonymous namespace 90 91 char X86FastTileConfig::ID = 0; 92 93 INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, 94 "Fast Tile Register Configure", false, false) 95 INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, 96 "Fast Tile Register Configure", false, false) 97 98 static bool isTilePhysReg(MachineOperand &Op) { 99 if (!Op.isReg()) 100 return false; 101 102 Register Reg = Op.getReg(); 103 if (Reg >= X86::TMM0 && Reg <= X86::TMM7) 104 return true; 105 return false; 106 } 107 108 static unsigned getTilePhysRegIdx(MachineOperand *Op) { 109 assert(isTilePhysReg(*Op) && "Tile Operand is invalid"); 110 return Op->getReg() - X86::TMM0; 111 } 112 113 static inline void adjustRowCfg(unsigned TIdx, MachineInstr *MI) { 114 unsigned Offset = 48 + TIdx; 115 MI->getOperand(3).ChangeToImmediate(Offset); 116 } 117 118 static inline void adjustColCfg(unsigned TIdx, MachineInstr *MI) { 119 unsigned Offset = 16 + TIdx * 2; 120 MI->getOperand(3).ChangeToImmediate(Offset); 121 } 122 123 bool X86FastTileConfig::isTileLoad(MachineInstr &MI) { 124 return MI.getOpcode() == X86::PTILELOADDV || 125 MI.getOpcode() == X86::PTILELOADDT1V; 126 } 127 bool X86FastTileConfig::isTileStore(MachineInstr &MI) { 128 return MI.getOpcode() == X86::PTILESTOREDV; 129 } 130 bool X86FastTileConfig::isAMXInstr(MachineInstr &MI) { 131 // TODO: May need to handle some special nontile amx instrucion. 132 if (MI.getOpcode() == X86::PLDTILECFGV || MI.isDebugInstr()) 133 return false; 134 135 return llvm::any_of(MI.operands(), isTilePhysReg); 136 } 137 138 MachineInstr *X86FastTileConfig::getKeyAMXInstr(MachineInstr *MI) { 139 auto Cfg = MachineBasicBlock::iterator(MI); 140 MachineBasicBlock *MBB = MI->getParent(); 141 MachineInstr *KeyMI = nullptr; 142 int KeyAMXNum = 0; 143 144 for (auto II = Cfg; II != MBB->end(); II++) { 145 if (isTileLoad(*II)) { 146 KeyMI = &*II; 147 continue; 148 } 149 150 if (isTileStore(*II)) { 151 assert(KeyMI && "Key AMX Should be found before!"); 152 break; 153 } 154 155 if (isAMXInstr(*II)) { 156 assert((KeyAMXNum == 0) && "Too many Key AMX instruction!"); 157 KeyAMXNum++; 158 KeyMI = &*II; 159 } 160 } 161 assert(KeyMI && "There must be an AMX instruction."); 162 return KeyMI; 163 } 164 165 // Orderly get the tiles in key amx instruction, uses before defs. 166 void X86FastTileConfig::getTileShapesCfg( 167 MachineInstr *CfgMI, SmallVector<MachineOperand *> &ShapedTiles) { 168 MachineInstr *KeyMI = getKeyAMXInstr(CfgMI); 169 170 SmallVector<MachineOperand *> DefTiles; 171 for (MachineOperand &MO : KeyMI->operands()) { 172 if (!isTilePhysReg(MO)) 173 continue; 174 if (MO.isDef()) 175 DefTiles.push_back(&MO); 176 else 177 ShapedTiles.push_back(&MO); 178 } 179 ShapedTiles.append(DefTiles); 180 } 181 182 // We pre-config the shapes at position named with "amx.tmm.N.shape.row* and 183 // amx.shape.N.col*" at pass "Pre AMX Tile Config". 184 // The 'N' implies the order of tiles in key amx intrinsic. 185 void X86FastTileConfig::getShapeCfgInstrs( 186 MachineInstr *MI, std::map<unsigned, MachineInstr *> &RowCfgs, 187 std::map<unsigned, MachineInstr *> &ColCfgs) { 188 auto Cfg = MachineBasicBlock::iterator(MI); 189 MachineBasicBlock *MBB = MI->getParent(); 190 191 for (auto II = Cfg; II != MBB->begin(); II--) { 192 if (isAMXInstr(*II) || II->isTerminator() || II->isCall()) 193 break; 194 if (!II->mayStore() || !II->hasOneMemOperand()) 195 continue; 196 const Value *MemPtr = II->memoperands()[0]->getValue(); 197 if (!MemPtr) 198 continue; 199 200 StringRef Name = MemPtr->getName(); 201 if (!Name.startswith("amx.tmm.")) 202 continue; 203 204 // Get the 'N'th tile shape config in key amx instruction. 205 auto N = Name.find(".shape"); 206 StringRef STileIdx = Name.slice(8, N); 207 unsigned Idx; 208 STileIdx.getAsInteger(10, Idx); 209 210 // And related them with their store instructions. 211 if (Name.contains("row")) 212 RowCfgs[Idx] = &*II; 213 else if (Name.contains("col")) 214 ColCfgs[Idx] = &*II; 215 else 216 llvm_unreachable("Invalid tile shape info!"); 217 } 218 assert((RowCfgs.size() == ColCfgs.size()) && 219 "The number of tile row and col must be equal!"); 220 } 221 222 // Here is the data format for the tile config. 223 // 0 palette = 1 now. 224 // 1 start_row = 0 now. 225 // 2-15 reserved, must be zero 226 // 16-17 tile0.colsb Tile 0 bytes per row. 227 // 18-19 tile1.colsb Tile 1 bytes per row. 228 // 20-21 tile2.colsb Tile 2 bytes per row. 229 // ... (sequence continues) 230 // 30-31 tile7.colsb Tile 7 bytes per row. 231 // 32-47 reserved, must be zero 232 // 48 tile0.rows Tile 0 rows. 233 // 49 tile1.rows Tile 1 rows. 234 // 50 tile2.rows Tile 2 rows. 235 // ... (sequence continues) 236 // 55 tile7.rows Tile 7 rows. 237 // 56-63 reserved, must be zero 238 void X86FastTileConfig::rewriteTileCfg( 239 SmallVector<MachineOperand *> &ShapedTiles, 240 std::map<unsigned, MachineInstr *> &RowCfgs, 241 std::map<unsigned, MachineInstr *> &ColCfgs) { 242 assert((RowCfgs.size() == ShapedTiles.size()) && 243 "The number of tile shapes not equal with the number of tiles!"); 244 245 // Orderly get the tiles and adjust the shape config. 246 for (unsigned I = 0, E = ShapedTiles.size(); I < E; I++) { 247 MachineOperand *MO = ShapedTiles[I]; 248 unsigned TmmIdx = getTilePhysRegIdx(MO); 249 if (I == TmmIdx) 250 continue; 251 adjustRowCfg(TmmIdx, RowCfgs[I]); 252 adjustColCfg(TmmIdx, ColCfgs[I]); 253 } 254 } 255 256 // We have already preconfig the shapes before fast register allocation at 257 // X86PreAMXConfig::preWriteTileCfg(). Now, we have done fast register 258 // allocation, the shapes pre-written before may not rightly corresponding 259 // to the correct tmm registers, so we need adjust them. 260 void X86FastTileConfig::materializeTileCfg(MachineInstr *CfgMI) { 261 SmallVector<MachineOperand *> ShapedTiles; 262 std::map<unsigned, MachineInstr *> RowCfgs; 263 std::map<unsigned, MachineInstr *> ColCfgs; 264 265 // Orderly keep the tile uses and def in ShapedTiles; 266 getTileShapesCfg(CfgMI, ShapedTiles); 267 assert(ShapedTiles.size() && "Not find shapes config!"); 268 269 getShapeCfgInstrs(CfgMI, RowCfgs, ColCfgs); 270 271 rewriteTileCfg(ShapedTiles, RowCfgs, ColCfgs); 272 } 273 274 bool X86FastTileConfig::fastTileConfig() { 275 bool Changed = false; 276 277 for (MachineBasicBlock &MBB : *MF) { 278 SmallVector<MachineInstr *, 2> CFGs; 279 for (MachineInstr &MI : MBB) 280 if (MI.getOpcode() == X86::PLDTILECFGV) 281 CFGs.push_back(&MI); 282 for (auto *MI : CFGs) 283 materializeTileCfg(MI); 284 if (!CFGs.empty()) 285 Changed = true; 286 } 287 if (Changed) 288 X86FI->setHasVirtualTileReg(true); 289 return Changed; 290 } 291 292 bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { 293 MF = &MFunc; 294 MRI = &MFunc.getRegInfo(); 295 ST = &MFunc.getSubtarget<X86Subtarget>(); 296 TRI = ST->getRegisterInfo(); 297 TII = MFunc.getSubtarget().getInstrInfo(); 298 X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); 299 300 return fastTileConfig(); 301 } 302 303 FunctionPass *llvm::createX86FastTileConfigPass() { 304 return new X86FastTileConfig(); 305 } 306