1*fe6060f1SDimitry Andric //===-- X86LowerTileCopy.cpp - Expand Tile Copy Instructions---------------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric // This file defines the pass which lower AMX tile copy instructions. Since 10*fe6060f1SDimitry Andric // there is no tile copy instruction, we need store tile register to stack 11*fe6060f1SDimitry Andric // and load from stack to another tile register. We need extra GR to hold 12*fe6060f1SDimitry Andric // the stride, and we need stack slot to hold the tile data register. 13*fe6060f1SDimitry Andric // We would run this pass after copy propagation, so that we don't miss copy 14*fe6060f1SDimitry Andric // optimization. And we would run this pass before prolog/epilog insertion, 15*fe6060f1SDimitry Andric // so that we can allocate stack slot. 16*fe6060f1SDimitry Andric // 17*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 18*fe6060f1SDimitry Andric 19*fe6060f1SDimitry Andric #include "X86.h" 20*fe6060f1SDimitry Andric #include "X86InstrBuilder.h" 21*fe6060f1SDimitry Andric #include "X86InstrInfo.h" 22*fe6060f1SDimitry Andric #include "X86Subtarget.h" 23*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 24*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 25*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 26*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 27*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 28*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 29*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 30*fe6060f1SDimitry Andric #include "llvm/CodeGen/Passes.h" 31*fe6060f1SDimitry Andric #include "llvm/IR/DebugLoc.h" 32*fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 33*fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 34*fe6060f1SDimitry Andric 35*fe6060f1SDimitry Andric using namespace llvm; 36*fe6060f1SDimitry Andric 37*fe6060f1SDimitry Andric #define DEBUG_TYPE "x86-lower-tile-copy" 38*fe6060f1SDimitry Andric 39*fe6060f1SDimitry Andric namespace { 40*fe6060f1SDimitry Andric 41*fe6060f1SDimitry Andric class X86LowerTileCopy : public MachineFunctionPass { 42*fe6060f1SDimitry Andric public: 43*fe6060f1SDimitry Andric static char ID; 44*fe6060f1SDimitry Andric 45*fe6060f1SDimitry Andric X86LowerTileCopy() : MachineFunctionPass(ID) {} 46*fe6060f1SDimitry Andric 47*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 48*fe6060f1SDimitry Andric 49*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 50*fe6060f1SDimitry Andric 51*fe6060f1SDimitry Andric StringRef getPassName() const override { return "X86 Lower Tile Copy"; } 52*fe6060f1SDimitry Andric }; 53*fe6060f1SDimitry Andric 54*fe6060f1SDimitry Andric } // namespace 55*fe6060f1SDimitry Andric 56*fe6060f1SDimitry Andric char X86LowerTileCopy::ID = 0; 57*fe6060f1SDimitry Andric 58*fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering", 59*fe6060f1SDimitry Andric false, false) 60*fe6060f1SDimitry Andric INITIALIZE_PASS_END(X86LowerTileCopy, "lowertilecopy", "Tile Copy Lowering", 61*fe6060f1SDimitry Andric false, false) 62*fe6060f1SDimitry Andric 63*fe6060f1SDimitry Andric void X86LowerTileCopy::getAnalysisUsage(AnalysisUsage &AU) const { 64*fe6060f1SDimitry Andric AU.setPreservesAll(); 65*fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 66*fe6060f1SDimitry Andric } 67*fe6060f1SDimitry Andric 68*fe6060f1SDimitry Andric FunctionPass *llvm::createX86LowerTileCopyPass() { 69*fe6060f1SDimitry Andric return new X86LowerTileCopy(); 70*fe6060f1SDimitry Andric } 71*fe6060f1SDimitry Andric 72*fe6060f1SDimitry Andric bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { 73*fe6060f1SDimitry Andric const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); 74*fe6060f1SDimitry Andric const X86InstrInfo *TII = ST.getInstrInfo(); 75*fe6060f1SDimitry Andric bool Changed = false; 76*fe6060f1SDimitry Andric 77*fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) { 78*fe6060f1SDimitry Andric for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); 79*fe6060f1SDimitry Andric MII != MIE;) { 80*fe6060f1SDimitry Andric MachineInstr &MI = *MII++; 81*fe6060f1SDimitry Andric if (!MI.isCopy()) 82*fe6060f1SDimitry Andric continue; 83*fe6060f1SDimitry Andric MachineOperand &DstMO = MI.getOperand(0); 84*fe6060f1SDimitry Andric MachineOperand &SrcMO = MI.getOperand(1); 85*fe6060f1SDimitry Andric Register SrcReg = SrcMO.getReg(); 86*fe6060f1SDimitry Andric Register DstReg = DstMO.getReg(); 87*fe6060f1SDimitry Andric if (!X86::TILERegClass.contains(DstReg, SrcReg)) 88*fe6060f1SDimitry Andric continue; 89*fe6060f1SDimitry Andric 90*fe6060f1SDimitry Andric const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 91*fe6060f1SDimitry Andric // Allocate stack slot for tile register 92*fe6060f1SDimitry Andric unsigned Size = TRI->getSpillSize(X86::TILERegClass); 93*fe6060f1SDimitry Andric Align Alignment = TRI->getSpillAlign(X86::TILERegClass); 94*fe6060f1SDimitry Andric int TileSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment); 95*fe6060f1SDimitry Andric // Allocate stack slot for stride register 96*fe6060f1SDimitry Andric Size = TRI->getSpillSize(X86::GR64RegClass); 97*fe6060f1SDimitry Andric Alignment = TRI->getSpillAlign(X86::GR64RegClass); 98*fe6060f1SDimitry Andric int StrideSS = MF.getFrameInfo().CreateSpillStackObject(Size, Alignment); 99*fe6060f1SDimitry Andric 100*fe6060f1SDimitry Andric // TODO: Pick a killed regiter to avoid save/reload. There is problem 101*fe6060f1SDimitry Andric // to get live interval in this stage. 102*fe6060f1SDimitry Andric Register GR64Cand = X86::RAX; 103*fe6060f1SDimitry Andric 104*fe6060f1SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 105*fe6060f1SDimitry Andric // mov %rax (%sp) 106*fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), GR64Cand); 107*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64mr)), StrideSS) 108*fe6060f1SDimitry Andric .addReg(GR64Cand); 109*fe6060f1SDimitry Andric // mov 64 %rax 110*fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64); 111*fe6060f1SDimitry Andric // tilestored %tmm, (%sp, %idx) 112*fe6060f1SDimitry Andric unsigned Opc = X86::TILESTORED; 113*fe6060f1SDimitry Andric MachineInstr *NewMI = 114*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS) 115*fe6060f1SDimitry Andric .addReg(SrcReg, getKillRegState(SrcMO.isKill())); 116*fe6060f1SDimitry Andric MachineOperand &MO = NewMI->getOperand(2); 117*fe6060f1SDimitry Andric MO.setReg(GR64Cand); 118*fe6060f1SDimitry Andric MO.setIsKill(true); 119*fe6060f1SDimitry Andric // tileloadd (%sp, %idx), %tmm 120*fe6060f1SDimitry Andric Opc = X86::TILELOADD; 121*fe6060f1SDimitry Andric NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg), 122*fe6060f1SDimitry Andric TileSS); 123*fe6060f1SDimitry Andric // restore %rax 124*fe6060f1SDimitry Andric // mov (%sp) %rax 125*fe6060f1SDimitry Andric addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm), GR64Cand), 126*fe6060f1SDimitry Andric StrideSS); 127*fe6060f1SDimitry Andric MI.eraseFromParent(); 128*fe6060f1SDimitry Andric Changed = true; 129*fe6060f1SDimitry Andric } 130*fe6060f1SDimitry Andric } 131*fe6060f1SDimitry Andric return Changed; 132*fe6060f1SDimitry Andric } 133