xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86LowerTileCopy.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
1fe6060f1SDimitry Andric //===-- X86LowerTileCopy.cpp - Expand Tile Copy Instructions---------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric // This file defines the pass which lower AMX tile copy instructions. Since
10fe6060f1SDimitry Andric // there is no tile copy instruction, we need store tile register to stack
11fe6060f1SDimitry Andric // and load from stack to another tile register. We need extra GR to hold
12fe6060f1SDimitry Andric // the stride, and we need stack slot to hold the tile data register.
13fe6060f1SDimitry Andric // We would run this pass after copy propagation, so that we don't miss copy
14fe6060f1SDimitry Andric // optimization. And we would run this pass before prolog/epilog insertion,
15fe6060f1SDimitry Andric // so that we can allocate stack slot.
16fe6060f1SDimitry Andric //
17fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
18fe6060f1SDimitry Andric 
19fe6060f1SDimitry Andric #include "X86.h"
20fe6060f1SDimitry Andric #include "X86InstrBuilder.h"
21fe6060f1SDimitry Andric #include "X86InstrInfo.h"
22fe6060f1SDimitry Andric #include "X86Subtarget.h"
23fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
24fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
29fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
30fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h"
31fe6060f1SDimitry Andric #include "llvm/IR/DebugLoc.h"
32fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
33fe6060f1SDimitry Andric #include "llvm/Support/Debug.h"
34fe6060f1SDimitry Andric 
35fe6060f1SDimitry Andric using namespace llvm;
36fe6060f1SDimitry Andric 
37fe6060f1SDimitry Andric #define DEBUG_TYPE "x86-lower-tile-copy"
38fe6060f1SDimitry Andric 
39fe6060f1SDimitry Andric namespace {
40fe6060f1SDimitry Andric 
41fe6060f1SDimitry Andric class X86LowerTileCopy : public MachineFunctionPass {
42fe6060f1SDimitry Andric public:
43fe6060f1SDimitry Andric   static char ID;
44fe6060f1SDimitry Andric 
45fe6060f1SDimitry Andric   X86LowerTileCopy() : MachineFunctionPass(ID) {}
46fe6060f1SDimitry Andric 
47fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
48fe6060f1SDimitry Andric 
49fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
50fe6060f1SDimitry Andric 
51fe6060f1SDimitry Andric   StringRef getPassName() const override { return "X86 Lower Tile Copy"; }
52fe6060f1SDimitry Andric };
53fe6060f1SDimitry Andric 
54fe6060f1SDimitry Andric } // namespace
55fe6060f1SDimitry Andric 
56fe6060f1SDimitry Andric char X86LowerTileCopy::ID = 0;
57fe6060f1SDimitry Andric 
58fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering",
59fe6060f1SDimitry Andric                       false, false)
60fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering",
61fe6060f1SDimitry Andric                     false, false)
62fe6060f1SDimitry Andric 
63fe6060f1SDimitry Andric void X86LowerTileCopy::getAnalysisUsage(AnalysisUsage &AU) const {
64fe6060f1SDimitry Andric   AU.setPreservesAll();
65fe6060f1SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
66fe6060f1SDimitry Andric }
67fe6060f1SDimitry Andric 
68fe6060f1SDimitry Andric FunctionPass *llvm::createX86LowerTileCopyPass() {
69fe6060f1SDimitry Andric   return new X86LowerTileCopy();
70fe6060f1SDimitry Andric }
71fe6060f1SDimitry Andric 
72fe6060f1SDimitry Andric bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
73fe6060f1SDimitry Andric   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
74fe6060f1SDimitry Andric   const X86InstrInfo *TII = ST.getInstrInfo();
75fe6060f1SDimitry Andric   bool Changed = false;
76fe6060f1SDimitry Andric 
77fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
78349cc55cSDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
79fe6060f1SDimitry Andric       if (!MI.isCopy())
80fe6060f1SDimitry Andric         continue;
81fe6060f1SDimitry Andric       MachineOperand &DstMO = MI.getOperand(0);
82fe6060f1SDimitry Andric       MachineOperand &SrcMO = MI.getOperand(1);
83fe6060f1SDimitry Andric       Register SrcReg = SrcMO.getReg();
84fe6060f1SDimitry Andric       Register DstReg = DstMO.getReg();
85fe6060f1SDimitry Andric       if (!X86::TILERegClass.contains(DstReg, SrcReg))
86fe6060f1SDimitry Andric         continue;
87fe6060f1SDimitry Andric 
88fe6060f1SDimitry Andric       const TargetRegisterInfo *TRI = ST.getRegisterInfo();
89fe6060f1SDimitry Andric       // Allocate stack slot for tile register
90fe6060f1SDimitry Andric       unsigned Size = TRI->getSpillSize(X86::TILERegClass);
91fe6060f1SDimitry Andric       Align Alignment = TRI->getSpillAlign(X86::TILERegClass);
92fe6060f1SDimitry Andric       int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
93fe6060f1SDimitry Andric       // Allocate stack slot for stride register
94fe6060f1SDimitry Andric       Size = TRI->getSpillSize(X86::GR64RegClass);
95fe6060f1SDimitry Andric       Alignment = TRI->getSpillAlign(X86::GR64RegClass);
96fe6060f1SDimitry Andric       int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment);
97fe6060f1SDimitry Andric 
98fe6060f1SDimitry Andric       // TODO: Pick a killed regiter to avoid save/reload. There is problem
99fe6060f1SDimitry Andric       // to get live interval in this stage.
100fe6060f1SDimitry Andric       Register GR64Cand = X86::RAX;
101fe6060f1SDimitry Andric 
102fe6060f1SDimitry Andric       const DebugLoc &DL = MI.getDebugLoc();
103fe6060f1SDimitry Andric       // mov %rax (%sp)
104fe6060f1SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand);
105fe6060f1SDimitry Andric       addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS)
106fe6060f1SDimitry Andric           .addReg(GR64Cand);
107fe6060f1SDimitry Andric       // mov 64 %rax
108fe6060f1SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
109fe6060f1SDimitry Andric       // tilestored %tmm, (%sp, %idx)
110*7a6dacacSDimitry Andric #define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
111*7a6dacacSDimitry Andric       unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
112fe6060f1SDimitry Andric       MachineInstr *NewMI =
113fe6060f1SDimitry Andric           addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
114fe6060f1SDimitry Andric               .addReg(SrcReg, getKillRegState(SrcMO.isKill()));
115fe6060f1SDimitry Andric       MachineOperand &MO = NewMI->getOperand(2);
116fe6060f1SDimitry Andric       MO.setReg(GR64Cand);
117fe6060f1SDimitry Andric       MO.setIsKill(true);
118fe6060f1SDimitry Andric       // tileloadd (%sp, %idx), %tmm
119*7a6dacacSDimitry Andric       Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
120*7a6dacacSDimitry Andric #undef GET_EGPR_IF_ENABLED
121fe6060f1SDimitry Andric       NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
122fe6060f1SDimitry Andric                                 TileSS);
123fe6060f1SDimitry Andric       // restore %rax
124fe6060f1SDimitry Andric       // mov (%sp) %rax
125fe6060f1SDimitry Andric       addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand),
126fe6060f1SDimitry Andric                         StrideSS);
127fe6060f1SDimitry Andric       MI.eraseFromParent();
128fe6060f1SDimitry Andric       Changed = true;
129fe6060f1SDimitry Andric     }
130fe6060f1SDimitry Andric   }
131fe6060f1SDimitry Andric   return Changed;
132fe6060f1SDimitry Andric }
133