//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Lowering the WWM_COPY instructions for various register classes. /// AMDGPU target generates WWM_COPY instruction to differentiate WWM /// copy from COPY. This pass generates the necessary exec mask manipulation /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to /// COPY. // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "si-lower-wwm-copies" namespace { class SILowerWWMCopies : public MachineFunctionPass { public: static char ID; SILowerWWMCopies() : MachineFunctionPass(ID) { initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "SI Lower WWM Copies"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } private: bool isSCCLiveAtMI(const MachineInstr &MI); void addToWWMSpills(MachineFunction &MF, Register Reg); LiveIntervals *LIS; SlotIndexes *Indexes; VirtRegMap *VRM; const SIRegisterInfo *TRI; const MachineRegisterInfo *MRI; SIMachineFunctionInfo *MFI; }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, false) char SILowerWWMCopies::ID = 0; char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { // We can't determine the liveness info if LIS isn't available. Early return // in that case and always assume SCC is live. if (!LIS) return true; LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); SlotIndex Idx = LIS->getInstructionIndex(MI); return LR.liveAt(Idx); } // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills // for preserving its entire lanes at function prolog/epilog. void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { if (Reg.isPhysical()) return; Register PhysReg = VRM->getPhys(Reg); assert(PhysReg != VirtRegMap::NO_PHYS_REG && "should have allocated a physical register"); MFI->allocateWWMSpill(MF, PhysReg); } bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); MFI = MF.getInfo(); LIS = getAnalysisIfAvailable(); Indexes = getAnalysisIfAvailable(); VRM = getAnalysisIfAvailable(); TRI = ST.getRegisterInfo(); MRI = &MF.getRegInfo(); if (!MFI->hasVRegFlags()) return false; bool Changed = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { if (MI.getOpcode() != AMDGPU::WWM_COPY) continue; // TODO: Club adjacent WWM ops between same exec save/restore assert(TII->isVGPRCopy(MI)); // For WWM vector copies, manipulate the exec mask around the copy // instruction. const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock::iterator InsertPt = MI.getIterator(); Register RegForExecCopy = MFI->getSGPRForEXECCopy(); TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, isSCCLiveAtMI(MI), Indexes); TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); addToWWMSpills(MF, MI.getOperand(0).getReg()); LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); // Lower WWM_COPY back to COPY MI.setDesc(TII->get(AMDGPU::COPY)); Changed |= true; } } return Changed; }