xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
15f757f3fSDimitry Andric //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
25f757f3fSDimitry Andric //
35f757f3fSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f757f3fSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f757f3fSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65f757f3fSDimitry Andric //
75f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
85f757f3fSDimitry Andric //
95f757f3fSDimitry Andric /// \file
105f757f3fSDimitry Andric /// Lowering the WWM_COPY instructions for various register classes.
115f757f3fSDimitry Andric /// AMDGPU target generates WWM_COPY instruction to differentiate WWM
125f757f3fSDimitry Andric /// copy from COPY. This pass generates the necessary exec mask manipulation
135f757f3fSDimitry Andric /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
145f757f3fSDimitry Andric /// COPY.
155f757f3fSDimitry Andric //
165f757f3fSDimitry Andric //===----------------------------------------------------------------------===//
175f757f3fSDimitry Andric 
185f757f3fSDimitry Andric #include "AMDGPU.h"
195f757f3fSDimitry Andric #include "GCNSubtarget.h"
205f757f3fSDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
215f757f3fSDimitry Andric #include "SIMachineFunctionInfo.h"
225f757f3fSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
235f757f3fSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
245f757f3fSDimitry Andric #include "llvm/CodeGen/VirtRegMap.h"
255f757f3fSDimitry Andric #include "llvm/InitializePasses.h"
265f757f3fSDimitry Andric 
275f757f3fSDimitry Andric using namespace llvm;
285f757f3fSDimitry Andric 
295f757f3fSDimitry Andric #define DEBUG_TYPE "si-lower-wwm-copies"
305f757f3fSDimitry Andric 
315f757f3fSDimitry Andric namespace {
325f757f3fSDimitry Andric 
335f757f3fSDimitry Andric class SILowerWWMCopies : public MachineFunctionPass {
345f757f3fSDimitry Andric public:
355f757f3fSDimitry Andric   static char ID;
365f757f3fSDimitry Andric 
SILowerWWMCopies()375f757f3fSDimitry Andric   SILowerWWMCopies() : MachineFunctionPass(ID) {
385f757f3fSDimitry Andric     initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
395f757f3fSDimitry Andric   }
405f757f3fSDimitry Andric 
415f757f3fSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
425f757f3fSDimitry Andric 
getPassName() const435f757f3fSDimitry Andric   StringRef getPassName() const override { return "SI Lower WWM Copies"; }
445f757f3fSDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const455f757f3fSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
465f757f3fSDimitry Andric     AU.setPreservesAll();
475f757f3fSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
485f757f3fSDimitry Andric   }
495f757f3fSDimitry Andric 
505f757f3fSDimitry Andric private:
515f757f3fSDimitry Andric   bool isSCCLiveAtMI(const MachineInstr &MI);
525f757f3fSDimitry Andric   void addToWWMSpills(MachineFunction &MF, Register Reg);
535f757f3fSDimitry Andric 
545f757f3fSDimitry Andric   LiveIntervals *LIS;
555f757f3fSDimitry Andric   SlotIndexes *Indexes;
565f757f3fSDimitry Andric   VirtRegMap *VRM;
575f757f3fSDimitry Andric   const SIRegisterInfo *TRI;
585f757f3fSDimitry Andric   const MachineRegisterInfo *MRI;
595f757f3fSDimitry Andric   SIMachineFunctionInfo *MFI;
605f757f3fSDimitry Andric };
615f757f3fSDimitry Andric 
625f757f3fSDimitry Andric } // End anonymous namespace.
635f757f3fSDimitry Andric 
645f757f3fSDimitry Andric INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
655f757f3fSDimitry Andric                       false, false)
66*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
675f757f3fSDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
685f757f3fSDimitry Andric INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
695f757f3fSDimitry Andric                     false)
705f757f3fSDimitry Andric 
715f757f3fSDimitry Andric char SILowerWWMCopies::ID = 0;
725f757f3fSDimitry Andric 
735f757f3fSDimitry Andric char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
745f757f3fSDimitry Andric 
isSCCLiveAtMI(const MachineInstr & MI)755f757f3fSDimitry Andric bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
765f757f3fSDimitry Andric   // We can't determine the liveness info if LIS isn't available. Early return
775f757f3fSDimitry Andric   // in that case and always assume SCC is live.
785f757f3fSDimitry Andric   if (!LIS)
795f757f3fSDimitry Andric     return true;
805f757f3fSDimitry Andric 
815f757f3fSDimitry Andric   LiveRange &LR =
825f757f3fSDimitry Andric       LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
835f757f3fSDimitry Andric   SlotIndex Idx = LIS->getInstructionIndex(MI);
845f757f3fSDimitry Andric   return LR.liveAt(Idx);
855f757f3fSDimitry Andric }
865f757f3fSDimitry Andric 
875f757f3fSDimitry Andric // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
885f757f3fSDimitry Andric // for preserving its entire lanes at function prolog/epilog.
addToWWMSpills(MachineFunction & MF,Register Reg)895f757f3fSDimitry Andric void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
905f757f3fSDimitry Andric   if (Reg.isPhysical())
915f757f3fSDimitry Andric     return;
925f757f3fSDimitry Andric 
935f757f3fSDimitry Andric   Register PhysReg = VRM->getPhys(Reg);
945f757f3fSDimitry Andric   assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
955f757f3fSDimitry Andric          "should have allocated a physical register");
965f757f3fSDimitry Andric 
975f757f3fSDimitry Andric   MFI->allocateWWMSpill(MF, PhysReg);
985f757f3fSDimitry Andric }
995f757f3fSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)1005f757f3fSDimitry Andric bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
1015f757f3fSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1025f757f3fSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1035f757f3fSDimitry Andric 
1045f757f3fSDimitry Andric   MFI = MF.getInfo<SIMachineFunctionInfo>();
105*0fca6ea1SDimitry Andric   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
106*0fca6ea1SDimitry Andric   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
107*0fca6ea1SDimitry Andric   auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
108*0fca6ea1SDimitry Andric   Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
1095f757f3fSDimitry Andric   VRM = getAnalysisIfAvailable<VirtRegMap>();
1105f757f3fSDimitry Andric   TRI = ST.getRegisterInfo();
1115f757f3fSDimitry Andric   MRI = &MF.getRegInfo();
1125f757f3fSDimitry Andric 
1135f757f3fSDimitry Andric   if (!MFI->hasVRegFlags())
1145f757f3fSDimitry Andric     return false;
1155f757f3fSDimitry Andric 
1165f757f3fSDimitry Andric   bool Changed = false;
1175f757f3fSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1185f757f3fSDimitry Andric     for (MachineInstr &MI : MBB) {
1195f757f3fSDimitry Andric       if (MI.getOpcode() != AMDGPU::WWM_COPY)
1205f757f3fSDimitry Andric         continue;
1215f757f3fSDimitry Andric 
1225f757f3fSDimitry Andric       // TODO: Club adjacent WWM ops between same exec save/restore
1235f757f3fSDimitry Andric       assert(TII->isVGPRCopy(MI));
1245f757f3fSDimitry Andric 
1255f757f3fSDimitry Andric       // For WWM vector copies, manipulate the exec mask around the copy
1265f757f3fSDimitry Andric       // instruction.
1275f757f3fSDimitry Andric       const DebugLoc &DL = MI.getDebugLoc();
1285f757f3fSDimitry Andric       MachineBasicBlock::iterator InsertPt = MI.getIterator();
1295f757f3fSDimitry Andric       Register RegForExecCopy = MFI->getSGPRForEXECCopy();
1305f757f3fSDimitry Andric       TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
1315f757f3fSDimitry Andric                                  isSCCLiveAtMI(MI), Indexes);
1325f757f3fSDimitry Andric       TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
1335f757f3fSDimitry Andric       addToWWMSpills(MF, MI.getOperand(0).getReg());
1345f757f3fSDimitry Andric       LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
1355f757f3fSDimitry Andric 
1365f757f3fSDimitry Andric       // Lower WWM_COPY back to COPY
1375f757f3fSDimitry Andric       MI.setDesc(TII->get(AMDGPU::COPY));
1385f757f3fSDimitry Andric       Changed |= true;
1395f757f3fSDimitry Andric     }
1405f757f3fSDimitry Andric   }
1415f757f3fSDimitry Andric 
1425f757f3fSDimitry Andric   return Changed;
1435f757f3fSDimitry Andric }
144