xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86TileConfig.cpp (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1e8d8bef9SDimitry Andric //===-- X86TileConfig.cpp - Tile Register Configure----------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric /// \file Pass to config the shape of AMX physical registers
10e8d8bef9SDimitry Andric /// AMX register need to be configured before use. In X86PreTileConfig pass
11e8d8bef9SDimitry Andric /// the pldtilecfg instruction is inserted, however at that time we don't
12e8d8bef9SDimitry Andric /// know the shape of each physical tile registers, because the register
13e8d8bef9SDimitry Andric /// allocation is not done yet. This pass runs after egister allocation
14e8d8bef9SDimitry Andric /// pass. It collects the shape information of each physical tile register
15e8d8bef9SDimitry Andric /// and store the shape in the stack slot that is allocated for load config
16e8d8bef9SDimitry Andric /// to tile config register.
17e8d8bef9SDimitry Andric //
18e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
19e8d8bef9SDimitry Andric 
20e8d8bef9SDimitry Andric #include "X86.h"
21e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
22e8d8bef9SDimitry Andric #include "X86MachineFunctionInfo.h"
23e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
24e8d8bef9SDimitry Andric #include "X86Subtarget.h"
25e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
26e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
27e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
28e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
29e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
30e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h"
35e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
36e8d8bef9SDimitry Andric 
37e8d8bef9SDimitry Andric using namespace llvm;
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric #define DEBUG_TYPE "tile-config"
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric namespace {
42e8d8bef9SDimitry Andric 
43*fe6060f1SDimitry Andric struct X86TileConfig : public MachineFunctionPass {
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric   X86TileConfig() : MachineFunctionPass(ID) {}
46e8d8bef9SDimitry Andric 
47e8d8bef9SDimitry Andric   /// Return the pass name.
48e8d8bef9SDimitry Andric   StringRef getPassName() const override { return "Tile Register Configure"; }
49e8d8bef9SDimitry Andric 
50e8d8bef9SDimitry Andric   /// X86TileConfig analysis usage.
51*fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
52*fe6060f1SDimitry Andric     AU.setPreservesAll();
53*fe6060f1SDimitry Andric     AU.addRequired<VirtRegMap>();
54*fe6060f1SDimitry Andric     AU.addRequired<LiveIntervals>();
55*fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
56*fe6060f1SDimitry Andric   }
57e8d8bef9SDimitry Andric 
58e8d8bef9SDimitry Andric   /// Perform register allocation.
59e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &mf) override;
60e8d8bef9SDimitry Andric 
61e8d8bef9SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
62e8d8bef9SDimitry Andric     return MachineFunctionProperties().set(
63e8d8bef9SDimitry Andric         MachineFunctionProperties::Property::NoPHIs);
64e8d8bef9SDimitry Andric   }
65e8d8bef9SDimitry Andric 
66e8d8bef9SDimitry Andric   static char ID;
67e8d8bef9SDimitry Andric };
68e8d8bef9SDimitry Andric 
69e8d8bef9SDimitry Andric } // end anonymous namespace
70e8d8bef9SDimitry Andric 
71e8d8bef9SDimitry Andric char X86TileConfig::ID = 0;
72e8d8bef9SDimitry Andric 
73e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(X86TileConfig, "tileconfig", "Tile Register Configure",
74e8d8bef9SDimitry Andric                       false, false)
75e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
76e8d8bef9SDimitry Andric INITIALIZE_PASS_END(X86TileConfig, "tileconfig", "Tile Register Configure",
77e8d8bef9SDimitry Andric                     false, false)
78e8d8bef9SDimitry Andric 
79*fe6060f1SDimitry Andric bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
80*fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
81*fe6060f1SDimitry Andric   const TargetRegisterInfo *TRI = ST.getRegisterInfo();
82*fe6060f1SDimitry Andric   const TargetInstrInfo *TII = ST.getInstrInfo();
83*fe6060f1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
84*fe6060f1SDimitry Andric   LiveIntervals &LIS = getAnalysis<LiveIntervals>();
85*fe6060f1SDimitry Andric   VirtRegMap &VRM = getAnalysis<VirtRegMap>();
86*fe6060f1SDimitry Andric 
87*fe6060f1SDimitry Andric   if (VRM.isShapeMapEmpty())
88*fe6060f1SDimitry Andric     return false;
89*fe6060f1SDimitry Andric 
90*fe6060f1SDimitry Andric   int SS = INT_MAX;
91*fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
92*fe6060f1SDimitry Andric     for (MachineInstr &MI : MBB) {
93*fe6060f1SDimitry Andric       if (MI.getOpcode() == X86::LDTILECFG) {
94*fe6060f1SDimitry Andric         SS = MI.getOperand(0).getIndex();
95*fe6060f1SDimitry Andric         break;
96*fe6060f1SDimitry Andric       }
97*fe6060f1SDimitry Andric     }
98*fe6060f1SDimitry Andric     if (SS != INT_MAX)
99*fe6060f1SDimitry Andric       break;
100e8d8bef9SDimitry Andric   }
101e8d8bef9SDimitry Andric 
102*fe6060f1SDimitry Andric   // Try to find a point to insert MIs for constant shapes.
103*fe6060f1SDimitry Andric   // Here we are leveraging the palette id inserted in PreRA pass.
104*fe6060f1SDimitry Andric   unsigned ConstPos = 0;
105*fe6060f1SDimitry Andric   MachineInstr *ConstMI = nullptr;
106*fe6060f1SDimitry Andric   for (MachineInstr &MI : MF.front()) {
107*fe6060f1SDimitry Andric     if (MI.getOpcode() == X86::MOV8mi && SS == MI.getOperand(0).getIndex()) {
108*fe6060f1SDimitry Andric       ConstMI = &MI;
109*fe6060f1SDimitry Andric       break;
110*fe6060f1SDimitry Andric     }
111*fe6060f1SDimitry Andric     ++ConstPos;
112*fe6060f1SDimitry Andric   }
113*fe6060f1SDimitry Andric   assert(ConstMI && "Cannot find an insertion point");
114*fe6060f1SDimitry Andric 
115*fe6060f1SDimitry Andric   unsigned AMXRegNum = TRI->getRegClass(X86::TILERegClassID)->getNumRegs();
116*fe6060f1SDimitry Andric   SmallVector<Register, 8> Phys2Virt(AMXRegNum, 0);
117*fe6060f1SDimitry Andric   for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
118*fe6060f1SDimitry Andric     Register VirtReg = Register::index2VirtReg(I);
119*fe6060f1SDimitry Andric     if (MRI.reg_nodbg_empty(VirtReg))
120*fe6060f1SDimitry Andric       continue;
121*fe6060f1SDimitry Andric     if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID)
122*fe6060f1SDimitry Andric       continue;
123*fe6060f1SDimitry Andric     unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0;
124*fe6060f1SDimitry Andric     if (!Phys2Virt[Index])
125*fe6060f1SDimitry Andric       Phys2Virt[Index] = VirtReg;
126e8d8bef9SDimitry Andric   }
127e8d8bef9SDimitry Andric 
128e8d8bef9SDimitry Andric   // Fill in the shape of each tile physical register.
129*fe6060f1SDimitry Andric   for (unsigned I = 0; I < AMXRegNum; ++I) {
130*fe6060f1SDimitry Andric     if (!Phys2Virt[I])
131e8d8bef9SDimitry Andric       continue;
132*fe6060f1SDimitry Andric     DebugLoc DL;
133*fe6060f1SDimitry Andric     bool IsRow = true;
134*fe6060f1SDimitry Andric     MachineInstr *NewMI = nullptr;
135*fe6060f1SDimitry Andric     ShapeT Shape = VRM.getShape(Phys2Virt[I]);
136*fe6060f1SDimitry Andric     for (auto &R : {Shape.getRow()->getReg(), Shape.getCol()->getReg()}) {
137e8d8bef9SDimitry Andric       // Here is the data format for the tile config.
138e8d8bef9SDimitry Andric       // 0      palette
139e8d8bef9SDimitry Andric       // 1      start_row
140e8d8bef9SDimitry Andric       // 2-15   reserved, must be zero
141e8d8bef9SDimitry Andric       // 16-17  tile0.colsb Tile 0 bytes per row.
142e8d8bef9SDimitry Andric       // 18-19  tile1.colsb Tile 1 bytes per row.
143e8d8bef9SDimitry Andric       // 20-21  tile2.colsb Tile 2 bytes per row.
144e8d8bef9SDimitry Andric       // ... (sequence continues)
145e8d8bef9SDimitry Andric       // 30-31  tile7.colsb Tile 7 bytes per row.
146e8d8bef9SDimitry Andric       // 32-47  reserved, must be zero
147e8d8bef9SDimitry Andric       // 48     tile0.rows Tile 0 rows.
148e8d8bef9SDimitry Andric       // 49     tile1.rows Tile 1 rows.
149e8d8bef9SDimitry Andric       // 50     tile2.rows Tile 2 rows.
150e8d8bef9SDimitry Andric       // ... (sequence continues)
151e8d8bef9SDimitry Andric       // 55     tile7.rows Tile 7 rows.
152e8d8bef9SDimitry Andric       // 56-63  reserved, must be zero
153*fe6060f1SDimitry Andric       int64_t Imm = INT64_MAX;
154*fe6060f1SDimitry Andric       int Offset = IsRow ? 48 + I : 16 + I * 2;
155*fe6060f1SDimitry Andric       for (auto &DefMI : MRI.def_instructions(R)) {
156*fe6060f1SDimitry Andric         MachineBasicBlock &MBB = *DefMI.getParent();
157*fe6060f1SDimitry Andric         if (DefMI.isMoveImmediate()) {
158*fe6060f1SDimitry Andric           if (Imm != INT64_MAX) {
159*fe6060f1SDimitry Andric             // FIXME: We should handle this case in future.
160*fe6060f1SDimitry Andric             assert(Imm == DefMI.getOperand(1).getImm() &&
161*fe6060f1SDimitry Andric                    "Cannot initialize with different shapes");
162*fe6060f1SDimitry Andric             continue;
163*fe6060f1SDimitry Andric           }
164*fe6060f1SDimitry Andric           Imm = DefMI.getOperand(1).getImm();
165*fe6060f1SDimitry Andric           NewMI = addFrameReference(
166*fe6060f1SDimitry Andric                       BuildMI(MF.front(), ++ConstMI->getIterator(), DL,
167*fe6060f1SDimitry Andric                               TII->get(IsRow ? X86::MOV8mi : X86::MOV16mi)),
168*fe6060f1SDimitry Andric                       SS, Offset)
169*fe6060f1SDimitry Andric                       .addImm(Imm);
170*fe6060f1SDimitry Andric           ConstMI = NewMI;
171*fe6060f1SDimitry Andric           LIS.InsertMachineInstrInMaps(*NewMI);
172*fe6060f1SDimitry Andric         } else {
173*fe6060f1SDimitry Andric           unsigned SubIdx = IsRow ? X86::sub_8bit : X86::sub_16bit;
174*fe6060f1SDimitry Andric           unsigned RegSize = TRI->getRegSizeInBits(*MRI.getRegClass(R));
175*fe6060f1SDimitry Andric           if ((IsRow && RegSize == 8) || (!IsRow && RegSize == 16))
176*fe6060f1SDimitry Andric             SubIdx = 0;
177*fe6060f1SDimitry Andric           auto Iter = DefMI.getIterator();
178*fe6060f1SDimitry Andric           if (&MBB == &MF.front() &&
179*fe6060f1SDimitry Andric               (unsigned)std::distance(MBB.instr_begin(), Iter) < ConstPos)
180*fe6060f1SDimitry Andric             Iter = ConstMI->getIterator();
181*fe6060f1SDimitry Andric           NewMI = addFrameReference(
182*fe6060f1SDimitry Andric                       BuildMI(MBB, ++Iter, DL,
183*fe6060f1SDimitry Andric                               TII->get(IsRow ? X86::MOV8mr : X86::MOV16mr)),
184*fe6060f1SDimitry Andric                       SS, Offset)
185*fe6060f1SDimitry Andric                       .addReg(R, 0, SubIdx);
186*fe6060f1SDimitry Andric           SlotIndex SIdx = LIS.InsertMachineInstrInMaps(*NewMI);
187*fe6060f1SDimitry Andric           LIS.extendToIndices(LIS.getInterval(R), {SIdx.getRegSlot()});
188e8d8bef9SDimitry Andric         }
189e8d8bef9SDimitry Andric       }
190*fe6060f1SDimitry Andric       IsRow = false;
191e8d8bef9SDimitry Andric     }
192e8d8bef9SDimitry Andric   }
193e8d8bef9SDimitry Andric   return true;
194e8d8bef9SDimitry Andric }
195e8d8bef9SDimitry Andric 
196e8d8bef9SDimitry Andric FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); }
197