1 //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Lowering the WWM_COPY instructions for various register classes. 11 /// AMDGPU target generates WWM_COPY instruction to differentiate WWM 12 /// copy from COPY. This pass generates the necessary exec mask manipulation 13 /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to 14 /// COPY. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "GCNSubtarget.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "SIMachineFunctionInfo.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/VirtRegMap.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-lower-wwm-copies" 30 31 namespace { 32 33 class SILowerWWMCopies : public MachineFunctionPass { 34 public: 35 static char ID; 36 37 SILowerWWMCopies() : MachineFunctionPass(ID) { 38 initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry()); 39 } 40 41 bool runOnMachineFunction(MachineFunction &MF) override; 42 43 StringRef getPassName() const override { return "SI Lower WWM Copies"; } 44 45 void getAnalysisUsage(AnalysisUsage &AU) const override { 46 AU.setPreservesAll(); 47 MachineFunctionPass::getAnalysisUsage(AU); 48 } 49 50 private: 51 bool isSCCLiveAtMI(const MachineInstr &MI); 52 void addToWWMSpills(MachineFunction &MF, Register Reg); 53 54 LiveIntervals *LIS; 55 SlotIndexes *Indexes; 56 VirtRegMap *VRM; 57 const SIRegisterInfo *TRI; 58 const MachineRegisterInfo *MRI; 59 SIMachineFunctionInfo *MFI; 60 }; 61 62 } // End anonymous namespace. 63 64 INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", 65 false, false) 66 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 67 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 68 INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false, 69 false) 70 71 char SILowerWWMCopies::ID = 0; 72 73 char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID; 74 75 bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) { 76 // We can't determine the liveness info if LIS isn't available. Early return 77 // in that case and always assume SCC is live. 78 if (!LIS) 79 return true; 80 81 LiveRange &LR = 82 LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI)); 83 SlotIndex Idx = LIS->getInstructionIndex(MI); 84 return LR.liveAt(Idx); 85 } 86 87 // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills 88 // for preserving its entire lanes at function prolog/epilog. 89 void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) { 90 if (Reg.isPhysical()) 91 return; 92 93 Register PhysReg = VRM->getPhys(Reg); 94 assert(PhysReg != VirtRegMap::NO_PHYS_REG && 95 "should have allocated a physical register"); 96 97 MFI->allocateWWMSpill(MF, PhysReg); 98 } 99 100 bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) { 101 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 102 const SIInstrInfo *TII = ST.getInstrInfo(); 103 104 MFI = MF.getInfo<SIMachineFunctionInfo>(); 105 LIS = getAnalysisIfAvailable<LiveIntervals>(); 106 Indexes = getAnalysisIfAvailable<SlotIndexes>(); 107 VRM = getAnalysisIfAvailable<VirtRegMap>(); 108 TRI = ST.getRegisterInfo(); 109 MRI = &MF.getRegInfo(); 110 111 if (!MFI->hasVRegFlags()) 112 return false; 113 114 bool Changed = false; 115 for (MachineBasicBlock &MBB : MF) { 116 for (MachineInstr &MI : MBB) { 117 if (MI.getOpcode() != AMDGPU::WWM_COPY) 118 continue; 119 120 // TODO: Club adjacent WWM ops between same exec save/restore 121 assert(TII->isVGPRCopy(MI)); 122 123 // For WWM vector copies, manipulate the exec mask around the copy 124 // instruction. 125 const DebugLoc &DL = MI.getDebugLoc(); 126 MachineBasicBlock::iterator InsertPt = MI.getIterator(); 127 Register RegForExecCopy = MFI->getSGPRForEXECCopy(); 128 TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy, 129 isSCCLiveAtMI(MI), Indexes); 130 TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes); 131 addToWWMSpills(MF, MI.getOperand(0).getReg()); 132 LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI); 133 134 // Lower WWM_COPY back to COPY 135 MI.setDesc(TII->get(AMDGPU::COPY)); 136 Changed |= true; 137 } 138 } 139 140 return Changed; 141 } 142