1e8d8bef9SDimitry Andric //===-- X86TileConfig.cpp - Tile Register Configure----------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric /// \file Pass to config the shape of AMX physical registers
10e8d8bef9SDimitry Andric /// AMX register need to be configured before use. In X86PreTileConfig pass
11e8d8bef9SDimitry Andric /// the pldtilecfg instruction is inserted, however at that time we don't
12e8d8bef9SDimitry Andric /// know the shape of each physical tile registers, because the register
13e8d8bef9SDimitry Andric /// allocation is not done yet. This pass runs after egister allocation
14e8d8bef9SDimitry Andric /// pass. It collects the shape information of each physical tile register
15e8d8bef9SDimitry Andric /// and store the shape in the stack slot that is allocated for load config
16e8d8bef9SDimitry Andric /// to tile config register.
17e8d8bef9SDimitry Andric //
18e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
19e8d8bef9SDimitry Andric
20e8d8bef9SDimitry Andric #include "X86.h"
21e8d8bef9SDimitry Andric #include "X86InstrBuilder.h"
22e8d8bef9SDimitry Andric #include "X86MachineFunctionInfo.h"
23e8d8bef9SDimitry Andric #include "X86RegisterInfo.h"
24e8d8bef9SDimitry Andric #include "X86Subtarget.h"
25e8d8bef9SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
26e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
27e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
28e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
29e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
30e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/TileShapeInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/VirtRegMap.h"
35e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
36e8d8bef9SDimitry Andric
37e8d8bef9SDimitry Andric using namespace llvm;
38e8d8bef9SDimitry Andric
3981ad6265SDimitry Andric #define DEBUG_TYPE "tileconfig"
40e8d8bef9SDimitry Andric
41e8d8bef9SDimitry Andric namespace {
42e8d8bef9SDimitry Andric
43fe6060f1SDimitry Andric struct X86TileConfig : public MachineFunctionPass {
44e8d8bef9SDimitry Andric
X86TileConfig__anon082a94c50111::X86TileConfig45e8d8bef9SDimitry Andric X86TileConfig() : MachineFunctionPass(ID) {}
46e8d8bef9SDimitry Andric
47e8d8bef9SDimitry Andric /// Return the pass name.
getPassName__anon082a94c50111::X86TileConfig48e8d8bef9SDimitry Andric StringRef getPassName() const override { return "Tile Register Configure"; }
49e8d8bef9SDimitry Andric
50e8d8bef9SDimitry Andric /// X86TileConfig analysis usage.
getAnalysisUsage__anon082a94c50111::X86TileConfig51fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
52fe6060f1SDimitry Andric AU.setPreservesAll();
53fe6060f1SDimitry Andric AU.addRequired<VirtRegMap>();
54*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>();
55fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
56fe6060f1SDimitry Andric }
57e8d8bef9SDimitry Andric
58e8d8bef9SDimitry Andric /// Perform register allocation.
59e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &mf) override;
60e8d8bef9SDimitry Andric
getRequiredProperties__anon082a94c50111::X86TileConfig61e8d8bef9SDimitry Andric MachineFunctionProperties getRequiredProperties() const override {
62e8d8bef9SDimitry Andric return MachineFunctionProperties().set(
63e8d8bef9SDimitry Andric MachineFunctionProperties::Property::NoPHIs);
64e8d8bef9SDimitry Andric }
65e8d8bef9SDimitry Andric
66e8d8bef9SDimitry Andric static char ID;
67e8d8bef9SDimitry Andric };
68e8d8bef9SDimitry Andric
69e8d8bef9SDimitry Andric } // end anonymous namespace
70e8d8bef9SDimitry Andric
71e8d8bef9SDimitry Andric char X86TileConfig::ID = 0;
72e8d8bef9SDimitry Andric
7381ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(X86TileConfig, DEBUG_TYPE, "Tile Register Configure",
74e8d8bef9SDimitry Andric false, false)
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)75e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
7681ad6265SDimitry Andric INITIALIZE_PASS_END(X86TileConfig, DEBUG_TYPE, "Tile Register Configure", false,
7781ad6265SDimitry Andric false)
78e8d8bef9SDimitry Andric
79fe6060f1SDimitry Andric bool X86TileConfig::runOnMachineFunction(MachineFunction &MF) {
80*0fca6ea1SDimitry Andric X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
81*0fca6ea1SDimitry Andric // Early exit in the common case of non-AMX code.
82*0fca6ea1SDimitry Andric if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
83*0fca6ea1SDimitry Andric return false;
84*0fca6ea1SDimitry Andric
85fe6060f1SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
86fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo();
87fe6060f1SDimitry Andric const TargetInstrInfo *TII = ST.getInstrInfo();
88fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
89*0fca6ea1SDimitry Andric LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
90fe6060f1SDimitry Andric VirtRegMap &VRM = getAnalysis<VirtRegMap>();
91fe6060f1SDimitry Andric
92fe6060f1SDimitry Andric if (VRM.isShapeMapEmpty())
93fe6060f1SDimitry Andric return false;
94fe6060f1SDimitry Andric
95fe6060f1SDimitry Andric int SS = INT_MAX;
96fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
97fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) {
9881ad6265SDimitry Andric if (MI.getOpcode() == X86::PLDTILECFGV) {
99fe6060f1SDimitry Andric SS = MI.getOperand(0).getIndex();
100fe6060f1SDimitry Andric break;
101fe6060f1SDimitry Andric }
102fe6060f1SDimitry Andric }
103fe6060f1SDimitry Andric if (SS != INT_MAX)
104fe6060f1SDimitry Andric break;
105e8d8bef9SDimitry Andric }
10681ad6265SDimitry Andric // Didn't find PLDTILECFGV, just return false;
10781ad6265SDimitry Andric if (SS == INT_MAX)
10881ad6265SDimitry Andric return false;
109e8d8bef9SDimitry Andric
110fe6060f1SDimitry Andric // Try to find a point to insert MIs for constant shapes.
111fe6060f1SDimitry Andric // Here we are leveraging the palette id inserted in PreRA pass.
112fe6060f1SDimitry Andric unsigned ConstPos = 0;
113fe6060f1SDimitry Andric MachineInstr *ConstMI = nullptr;
114fe6060f1SDimitry Andric for (MachineInstr &MI : MF.front()) {
115fe6060f1SDimitry Andric if (MI.getOpcode() == X86::MOV8mi && SS == MI.getOperand(0).getIndex()) {
116fe6060f1SDimitry Andric ConstMI = &MI;
117fe6060f1SDimitry Andric break;
118fe6060f1SDimitry Andric }
119fe6060f1SDimitry Andric ++ConstPos;
120fe6060f1SDimitry Andric }
121fe6060f1SDimitry Andric assert(ConstMI && "Cannot find an insertion point");
122fe6060f1SDimitry Andric
123fe6060f1SDimitry Andric unsigned AMXRegNum = TRI->getRegClass(X86::TILERegClassID)->getNumRegs();
124fe6060f1SDimitry Andric SmallVector<Register, 8> Phys2Virt(AMXRegNum, 0);
125fe6060f1SDimitry Andric for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
126fe6060f1SDimitry Andric Register VirtReg = Register::index2VirtReg(I);
127fe6060f1SDimitry Andric if (MRI.reg_nodbg_empty(VirtReg))
128fe6060f1SDimitry Andric continue;
129fe6060f1SDimitry Andric if (MRI.getRegClass(VirtReg)->getID() != X86::TILERegClassID)
130fe6060f1SDimitry Andric continue;
13181ad6265SDimitry Andric if (VRM.getPhys(VirtReg) == VirtRegMap::NO_PHYS_REG)
13281ad6265SDimitry Andric continue;
133fe6060f1SDimitry Andric unsigned Index = VRM.getPhys(VirtReg) - X86::TMM0;
134fe6060f1SDimitry Andric if (!Phys2Virt[Index])
135fe6060f1SDimitry Andric Phys2Virt[Index] = VirtReg;
136e8d8bef9SDimitry Andric }
137e8d8bef9SDimitry Andric
138e8d8bef9SDimitry Andric // Fill in the shape of each tile physical register.
139fe6060f1SDimitry Andric for (unsigned I = 0; I < AMXRegNum; ++I) {
140fe6060f1SDimitry Andric if (!Phys2Virt[I])
141e8d8bef9SDimitry Andric continue;
142fe6060f1SDimitry Andric DebugLoc DL;
143fe6060f1SDimitry Andric bool IsRow = true;
144fe6060f1SDimitry Andric MachineInstr *NewMI = nullptr;
145fe6060f1SDimitry Andric ShapeT Shape = VRM.getShape(Phys2Virt[I]);
146fe6060f1SDimitry Andric for (auto &R : {Shape.getRow()->getReg(), Shape.getCol()->getReg()}) {
147e8d8bef9SDimitry Andric // Here is the data format for the tile config.
148e8d8bef9SDimitry Andric // 0 palette
149e8d8bef9SDimitry Andric // 1 start_row
150e8d8bef9SDimitry Andric // 2-15 reserved, must be zero
151e8d8bef9SDimitry Andric // 16-17 tile0.colsb Tile 0 bytes per row.
152e8d8bef9SDimitry Andric // 18-19 tile1.colsb Tile 1 bytes per row.
153e8d8bef9SDimitry Andric // 20-21 tile2.colsb Tile 2 bytes per row.
154e8d8bef9SDimitry Andric // ... (sequence continues)
155e8d8bef9SDimitry Andric // 30-31 tile7.colsb Tile 7 bytes per row.
156e8d8bef9SDimitry Andric // 32-47 reserved, must be zero
157e8d8bef9SDimitry Andric // 48 tile0.rows Tile 0 rows.
158e8d8bef9SDimitry Andric // 49 tile1.rows Tile 1 rows.
159e8d8bef9SDimitry Andric // 50 tile2.rows Tile 2 rows.
160e8d8bef9SDimitry Andric // ... (sequence continues)
161e8d8bef9SDimitry Andric // 55 tile7.rows Tile 7 rows.
162e8d8bef9SDimitry Andric // 56-63 reserved, must be zero
163fe6060f1SDimitry Andric int64_t Imm = INT64_MAX;
164fe6060f1SDimitry Andric int Offset = IsRow ? 48 + I : 16 + I * 2;
165fe6060f1SDimitry Andric for (auto &DefMI : MRI.def_instructions(R)) {
166fe6060f1SDimitry Andric MachineBasicBlock &MBB = *DefMI.getParent();
167fe6060f1SDimitry Andric if (DefMI.isMoveImmediate()) {
168fe6060f1SDimitry Andric if (Imm != INT64_MAX) {
169fe6060f1SDimitry Andric // FIXME: We should handle this case in future.
170fe6060f1SDimitry Andric assert(Imm == DefMI.getOperand(1).getImm() &&
171fe6060f1SDimitry Andric "Cannot initialize with different shapes");
172fe6060f1SDimitry Andric continue;
173fe6060f1SDimitry Andric }
174fe6060f1SDimitry Andric Imm = DefMI.getOperand(1).getImm();
175fe6060f1SDimitry Andric NewMI = addFrameReference(
176fe6060f1SDimitry Andric BuildMI(MF.front(), ++ConstMI->getIterator(), DL,
177fe6060f1SDimitry Andric TII->get(IsRow ? X86::MOV8mi : X86::MOV16mi)),
178fe6060f1SDimitry Andric SS, Offset)
179fe6060f1SDimitry Andric .addImm(Imm);
180fe6060f1SDimitry Andric ConstMI = NewMI;
181fe6060f1SDimitry Andric LIS.InsertMachineInstrInMaps(*NewMI);
182fe6060f1SDimitry Andric } else {
183fe6060f1SDimitry Andric unsigned SubIdx = IsRow ? X86::sub_8bit : X86::sub_16bit;
184fe6060f1SDimitry Andric unsigned RegSize = TRI->getRegSizeInBits(*MRI.getRegClass(R));
185fe6060f1SDimitry Andric if ((IsRow && RegSize == 8) || (!IsRow && RegSize == 16))
186fe6060f1SDimitry Andric SubIdx = 0;
187fe6060f1SDimitry Andric auto Iter = DefMI.getIterator();
188fe6060f1SDimitry Andric if (&MBB == &MF.front() &&
189fe6060f1SDimitry Andric (unsigned)std::distance(MBB.instr_begin(), Iter) < ConstPos)
190fe6060f1SDimitry Andric Iter = ConstMI->getIterator();
191fe6060f1SDimitry Andric NewMI = addFrameReference(
192fe6060f1SDimitry Andric BuildMI(MBB, ++Iter, DL,
193fe6060f1SDimitry Andric TII->get(IsRow ? X86::MOV8mr : X86::MOV16mr)),
194fe6060f1SDimitry Andric SS, Offset)
195fe6060f1SDimitry Andric .addReg(R, 0, SubIdx);
196fe6060f1SDimitry Andric SlotIndex SIdx = LIS.InsertMachineInstrInMaps(*NewMI);
197fe6060f1SDimitry Andric LIS.extendToIndices(LIS.getInterval(R), {SIdx.getRegSlot()});
198e8d8bef9SDimitry Andric }
199e8d8bef9SDimitry Andric }
200fe6060f1SDimitry Andric IsRow = false;
201e8d8bef9SDimitry Andric }
202e8d8bef9SDimitry Andric }
203e8d8bef9SDimitry Andric return true;
204e8d8bef9SDimitry Andric }
205e8d8bef9SDimitry Andric
createX86TileConfigPass()206e8d8bef9SDimitry Andric FunctionPass *llvm::createX86TileConfigPass() { return new X86TileConfig(); }
207