1*e8d8bef9SDimitry Andric //===-- X86TileConfig.cpp - Tile Register Configure----------------------===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e8d8bef9SDimitry Andric // 7*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric /// \file Pass to config the shape of AMX physical registers 10*e8d8bef9SDimitry Andric /// AMX register need to be configured before use. In X86PreTileConfig pass 11*e8d8bef9SDimitry Andric /// the pldtilecfg instruction is inserted, however at that time we don't 12*e8d8bef9SDimitry Andric /// know the shape of each physical tile registers, because the register 13*e8d8bef9SDimitry Andric /// allocation is not done yet. This pass runs after egister allocation 14*e8d8bef9SDimitry Andric /// pass. It collects the shape information of each physical tile register 15*e8d8bef9SDimitry Andric /// and store the shape in the stack slot that is allocated for load config 16*e8d8bef9SDimitry Andric /// to tile config register. 17*e8d8bef9SDimitry Andric // 18*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 19*e8d8bef9SDimitry Andric 20*e8d8bef9SDimitry Andric #include "X86.h" 21*e8d8bef9SDimitry Andric #include "X86InstrBuilder.h" 22*e8d8bef9SDimitry Andric #include "X86MachineFunctionInfo.h" 23*e8d8bef9SDimitry Andric #include "X86RegisterInfo.h" 24*e8d8bef9SDimitry Andric #include "X86Subtarget.h" 25*e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 26*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 27*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 28*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 29*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 30*e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 31*e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h" 32*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h" 33*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 34*e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h" 35*e8d8bef9SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h" 36*e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h" 37*e8d8bef9SDimitry Andric 38*e8d8bef9SDimitry Andric using namespace llvm; 39*e8d8bef9SDimitry Andric 40*e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-config" 41*e8d8bef9SDimitry Andric 42*e8d8bef9SDimitry Andric namespace { 43*e8d8bef9SDimitry Andric 44*e8d8bef9SDimitry Andric class X86TileConfig : public MachineFunctionPass { 45*e8d8bef9SDimitry Andric // context 46*e8d8bef9SDimitry Andric MachineFunction *MF = nullptr; 47*e8d8bef9SDimitry Andric const X86Subtarget *ST = nullptr; 48*e8d8bef9SDimitry Andric const TargetRegisterInfo *TRI; 49*e8d8bef9SDimitry Andric const TargetInstrInfo *TII; 50*e8d8bef9SDimitry Andric MachineDominatorTree *DomTree = nullptr; 51*e8d8bef9SDimitry Andric MachineRegisterInfo *MRI = nullptr; 52*e8d8bef9SDimitry Andric VirtRegMap *VRM = nullptr; 53*e8d8bef9SDimitry Andric LiveIntervals *LIS = nullptr; 54*e8d8bef9SDimitry Andric 55*e8d8bef9SDimitry Andric MachineInstr *getTileConfigPoint(); 56*e8d8bef9SDimitry Andric void tileConfig(); 57*e8d8bef9SDimitry Andric 58*e8d8bef9SDimitry Andric public: 59*e8d8bef9SDimitry Andric X86TileConfig() : MachineFunctionPass(ID) {} 60*e8d8bef9SDimitry Andric 61*e8d8bef9SDimitry Andric /// Return the pass name. 62*e8d8bef9SDimitry Andric StringRef getPassName() const override { return "Tile Register Configure"; } 63*e8d8bef9SDimitry Andric 64*e8d8bef9SDimitry Andric /// X86TileConfig analysis usage. 65*e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 66*e8d8bef9SDimitry Andric 67*e8d8bef9SDimitry Andric /// Perform register allocation. 68*e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &mf) override; 69*e8d8bef9SDimitry Andric 70*e8d8bef9SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 71*e8d8bef9SDimitry Andric return MachineFunctionProperties().set( 72*e8d8bef9SDimitry Andric MachineFunctionProperties::Property::NoPHIs); 73*e8d8bef9SDimitry Andric } 74*e8d8bef9SDimitry Andric 75*e8d8bef9SDimitry Andric static char ID; 76*e8d8bef9SDimitry Andric }; 77*e8d8bef9SDimitry Andric 78*e8d8bef9SDimitry Andric } // end anonymous namespace 79*e8d8bef9SDimitry Andric 80*e8d8bef9SDimitry Andric char X86TileConfig::ID = 0; 81*e8d8bef9SDimitry Andric 82*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure", 83*e8d8bef9SDimitry Andric false, false) 84*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 85*e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 86*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure", 87*e8d8bef9SDimitry Andric false, false) 88*e8d8bef9SDimitry Andric 89*e8d8bef9SDimitry Andric void X86TileConfig::getAnalysisUsage(AnalysisUsage &AU) const { 90*e8d8bef9SDimitry Andric AU.addRequired<MachineDominatorTree>(); 91*e8d8bef9SDimitry Andric AU.addRequired<LiveIntervals>(); 92*e8d8bef9SDimitry Andric AU.addPreserved<SlotIndexes>(); 93*e8d8bef9SDimitry Andric AU.addRequired<VirtRegMap>(); 94*e8d8bef9SDimitry Andric AU.setPreservesAll(); 95*e8d8bef9SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 96*e8d8bef9SDimitry Andric } 97*e8d8bef9SDimitry Andric 98*e8d8bef9SDimitry Andric static unsigned getTilePhysRegIndex(Register PhysReg) { 99*e8d8bef9SDimitry Andric assert((PhysReg >= X86::TMM0 && X86::TMM0 <= X86::TMM7) && 100*e8d8bef9SDimitry Andric "Tile register number is invalid"); 101*e8d8bef9SDimitry Andric return (PhysReg - X86::TMM0); 102*e8d8bef9SDimitry Andric } 103*e8d8bef9SDimitry Andric 104*e8d8bef9SDimitry Andric static MachineInstr * 105*e8d8bef9SDimitry Andric storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 106*e8d8bef9SDimitry Andric Register SrcReg, unsigned BitSize, int FrameIdx, int Offset, 107*e8d8bef9SDimitry Andric const TargetInstrInfo *TII, const TargetRegisterClass *RC, 108*e8d8bef9SDimitry Andric const TargetRegisterInfo *TRI) { 109*e8d8bef9SDimitry Andric 110*e8d8bef9SDimitry Andric unsigned SubIdx = (BitSize == 8) ? X86::sub_8bit : X86::sub_16bit; 111*e8d8bef9SDimitry Andric unsigned Opc = (BitSize == 8) ? X86::MOV8mr : X86::MOV16mr; 112*e8d8bef9SDimitry Andric if (BitSize == TRI->getRegSizeInBits(*RC)) 113*e8d8bef9SDimitry Andric SubIdx = 0; 114*e8d8bef9SDimitry Andric MachineInstr *NewMI = 115*e8d8bef9SDimitry Andric addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)), FrameIdx, 116*e8d8bef9SDimitry Andric Offset) 117*e8d8bef9SDimitry Andric .addReg(SrcReg, 0, SubIdx); 118*e8d8bef9SDimitry Andric return NewMI; 119*e8d8bef9SDimitry Andric } 120*e8d8bef9SDimitry Andric 121*e8d8bef9SDimitry Andric static MachineInstr *storeImmToStackSlot(MachineBasicBlock &MBB, 122*e8d8bef9SDimitry Andric MachineBasicBlock::iterator MI, 123*e8d8bef9SDimitry Andric int64_t Imm, unsigned BitSize, 124*e8d8bef9SDimitry Andric int FrameIdx, int Offset, 125*e8d8bef9SDimitry Andric const TargetInstrInfo *TII) { 126*e8d8bef9SDimitry Andric unsigned Opc = (BitSize == 8) ? X86::MOV8mi : X86::MOV16mi; 127*e8d8bef9SDimitry Andric return addFrameReference(BuildMI(MBB, MI, DebugLoc(), TII->get(Opc)), 128*e8d8bef9SDimitry Andric FrameIdx, Offset) 129*e8d8bef9SDimitry Andric .addImm(Imm); 130*e8d8bef9SDimitry Andric } 131*e8d8bef9SDimitry Andric 132*e8d8bef9SDimitry Andric MachineInstr *X86TileConfig::getTileConfigPoint() { 133*e8d8bef9SDimitry Andric for (MachineBasicBlock &MBB : *MF) { 134*e8d8bef9SDimitry Andric 135*e8d8bef9SDimitry Andric // Traverse the basic block. 136*e8d8bef9SDimitry Andric for (MachineInstr &MI : MBB) 137*e8d8bef9SDimitry Andric // Refer X86PreTileConfig.cpp. 138*e8d8bef9SDimitry Andric // We only support one tile config for now. 139*e8d8bef9SDimitry Andric if (MI.getOpcode() == X86::PLDTILECFG) 140*e8d8bef9SDimitry Andric return &MI; 141*e8d8bef9SDimitry Andric } 142*e8d8bef9SDimitry Andric 143*e8d8bef9SDimitry Andric return nullptr; 144*e8d8bef9SDimitry Andric } 145*e8d8bef9SDimitry Andric 146*e8d8bef9SDimitry Andric void X86TileConfig::tileConfig() { 147*e8d8bef9SDimitry Andric MachineInstr *MI = getTileConfigPoint(); 148*e8d8bef9SDimitry Andric if (!MI) 149*e8d8bef9SDimitry Andric return; 150*e8d8bef9SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 151*e8d8bef9SDimitry Andric int SS = MI->getOperand(1).getIndex(); 152*e8d8bef9SDimitry Andric BitVector PhysRegs(TRI->getNumRegs()); 153*e8d8bef9SDimitry Andric 154*e8d8bef9SDimitry Andric // Fill in the palette first. 155*e8d8bef9SDimitry Andric auto *NewMI = storeImmToStackSlot(*MBB, *MI, 1, 8, SS, 0, TII); 156*e8d8bef9SDimitry Andric LIS->InsertMachineInstrInMaps(*NewMI); 157*e8d8bef9SDimitry Andric // Fill in the shape of each tile physical register. 158*e8d8bef9SDimitry Andric for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { 159*e8d8bef9SDimitry Andric Register VirtReg = Register::index2VirtReg(i); 160*e8d8bef9SDimitry Andric if (MRI->reg_nodbg_empty(VirtReg)) 161*e8d8bef9SDimitry Andric continue; 162*e8d8bef9SDimitry Andric const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 163*e8d8bef9SDimitry Andric if (RC.getID() != X86::TILERegClassID) 164*e8d8bef9SDimitry Andric continue; 165*e8d8bef9SDimitry Andric Register PhysReg = VRM->getPhys(VirtReg); 166*e8d8bef9SDimitry Andric if (PhysRegs.test(PhysReg)) 167*e8d8bef9SDimitry Andric continue; 168*e8d8bef9SDimitry Andric PhysRegs.set(PhysReg); 169*e8d8bef9SDimitry Andric ShapeT Shape = VRM->getShape(VirtReg); 170*e8d8bef9SDimitry Andric Register RowReg = Shape.getRow()->getReg(); 171*e8d8bef9SDimitry Andric Register ColReg = Shape.getCol()->getReg(); 172*e8d8bef9SDimitry Andric 173*e8d8bef9SDimitry Andric // Here is the data format for the tile config. 174*e8d8bef9SDimitry Andric // 0 palette 175*e8d8bef9SDimitry Andric // 1 start_row 176*e8d8bef9SDimitry Andric // 2-15 reserved, must be zero 177*e8d8bef9SDimitry Andric // 16-17 tile0.colsb Tile 0 bytes per row. 178*e8d8bef9SDimitry Andric // 18-19 tile1.colsb Tile 1 bytes per row. 179*e8d8bef9SDimitry Andric // 20-21 tile2.colsb Tile 2 bytes per row. 180*e8d8bef9SDimitry Andric // ... (sequence continues) 181*e8d8bef9SDimitry Andric // 30-31 tile7.colsb Tile 7 bytes per row. 182*e8d8bef9SDimitry Andric // 32-47 reserved, must be zero 183*e8d8bef9SDimitry Andric // 48 tile0.rows Tile 0 rows. 184*e8d8bef9SDimitry Andric // 49 tile1.rows Tile 1 rows. 185*e8d8bef9SDimitry Andric // 50 tile2.rows Tile 2 rows. 186*e8d8bef9SDimitry Andric // ... (sequence continues) 187*e8d8bef9SDimitry Andric // 55 tile7.rows Tile 7 rows. 188*e8d8bef9SDimitry Andric // 56-63 reserved, must be zero 189*e8d8bef9SDimitry Andric unsigned Index = getTilePhysRegIndex(PhysReg); 190*e8d8bef9SDimitry Andric int RowOffset = 48 + Index; 191*e8d8bef9SDimitry Andric int ColOffset = 16 + Index * 2; 192*e8d8bef9SDimitry Andric 193*e8d8bef9SDimitry Andric unsigned BitSize = 8; 194*e8d8bef9SDimitry Andric for (const auto &Pair : {std::make_pair(RowReg, RowOffset), 195*e8d8bef9SDimitry Andric std::make_pair(ColReg, ColOffset)}) { 196*e8d8bef9SDimitry Andric int64_t Imm; 197*e8d8bef9SDimitry Andric int ImmCount = 0; 198*e8d8bef9SDimitry Andric // All def must be the same value, otherwise it is invalid MIs. 199*e8d8bef9SDimitry Andric // Immediate is prefered. 200*e8d8bef9SDimitry Andric for (const MachineOperand &MO : MRI->def_operands(Pair.first)) { 201*e8d8bef9SDimitry Andric const auto *Inst = MO.getParent(); 202*e8d8bef9SDimitry Andric if (Inst->isMoveImmediate()) { 203*e8d8bef9SDimitry Andric ImmCount++; 204*e8d8bef9SDimitry Andric Imm = Inst->getOperand(1).getImm(); 205*e8d8bef9SDimitry Andric break; 206*e8d8bef9SDimitry Andric } 207*e8d8bef9SDimitry Andric } 208*e8d8bef9SDimitry Andric auto StoreConfig = [&](int Offset) { 209*e8d8bef9SDimitry Andric MachineInstr *NewMI = nullptr; 210*e8d8bef9SDimitry Andric if (ImmCount) 211*e8d8bef9SDimitry Andric NewMI = storeImmToStackSlot(*MBB, *MI, Imm, BitSize, SS, Offset, TII); 212*e8d8bef9SDimitry Andric else { 213*e8d8bef9SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(Pair.first); 214*e8d8bef9SDimitry Andric NewMI = storeRegToStackSlot(*MBB, *MI, Pair.first, BitSize, SS, 215*e8d8bef9SDimitry Andric Offset, TII, RC, TRI); 216*e8d8bef9SDimitry Andric } 217*e8d8bef9SDimitry Andric SlotIndex SIdx = LIS->InsertMachineInstrInMaps(*NewMI); 218*e8d8bef9SDimitry Andric if (!ImmCount) { 219*e8d8bef9SDimitry Andric // Extend the live interval. 220*e8d8bef9SDimitry Andric SmallVector<SlotIndex, 8> EndPoints = {SIdx.getRegSlot()}; 221*e8d8bef9SDimitry Andric LiveInterval &Int = LIS->getInterval(Pair.first); 222*e8d8bef9SDimitry Andric LIS->extendToIndices(Int, EndPoints); 223*e8d8bef9SDimitry Andric } 224*e8d8bef9SDimitry Andric }; 225*e8d8bef9SDimitry Andric StoreConfig(Pair.second); 226*e8d8bef9SDimitry Andric BitSize += 8; 227*e8d8bef9SDimitry Andric } 228*e8d8bef9SDimitry Andric } 229*e8d8bef9SDimitry Andric } 230*e8d8bef9SDimitry Andric 231*e8d8bef9SDimitry Andric bool X86TileConfig::runOnMachineFunction(MachineFunction &mf) { 232*e8d8bef9SDimitry Andric MF = &mf; 233*e8d8bef9SDimitry Andric MRI = &mf.getRegInfo(); 234*e8d8bef9SDimitry Andric ST = &mf.getSubtarget<X86Subtarget>(); 235*e8d8bef9SDimitry Andric TRI = ST->getRegisterInfo(); 236*e8d8bef9SDimitry Andric TII = mf.getSubtarget().getInstrInfo(); 237*e8d8bef9SDimitry Andric DomTree = &getAnalysis<MachineDominatorTree>(); 238*e8d8bef9SDimitry Andric VRM = &getAnalysis<VirtRegMap>(); 239*e8d8bef9SDimitry Andric LIS = &getAnalysis<LiveIntervals>(); 240*e8d8bef9SDimitry Andric 241*e8d8bef9SDimitry Andric if (VRM->isShapeMapEmpty()) 242*e8d8bef9SDimitry Andric return false; 243*e8d8bef9SDimitry Andric 244*e8d8bef9SDimitry Andric tileConfig(); 245*e8d8bef9SDimitry Andric return true; 246*e8d8bef9SDimitry Andric } 247*e8d8bef9SDimitry Andric 248*e8d8bef9SDimitry Andric FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); } 249