xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp (revision 35c0a8c449fd2b7f75029ebed5e10852240f0865)
1 //===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Lowering the WWM_COPY instructions for various register classes.
11 /// AMDGPU target generates WWM_COPY instruction to differentiate WWM
12 /// copy from COPY. This pass generates the necessary exec mask manipulation
13 /// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
14 /// COPY.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "GCNSubtarget.h"
20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "si-lower-wwm-copies"
30 
31 namespace {
32 
33 class SILowerWWMCopies : public MachineFunctionPass {
34 public:
35   static char ID;
36 
37   SILowerWWMCopies() : MachineFunctionPass(ID) {
38     initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
39   }
40 
41   bool runOnMachineFunction(MachineFunction &MF) override;
42 
43   StringRef getPassName() const override { return "SI Lower WWM Copies"; }
44 
45   void getAnalysisUsage(AnalysisUsage &AU) const override {
46     AU.setPreservesAll();
47     MachineFunctionPass::getAnalysisUsage(AU);
48   }
49 
50 private:
51   bool isSCCLiveAtMI(const MachineInstr &MI);
52   void addToWWMSpills(MachineFunction &MF, Register Reg);
53 
54   LiveIntervals *LIS;
55   SlotIndexes *Indexes;
56   VirtRegMap *VRM;
57   const SIRegisterInfo *TRI;
58   const MachineRegisterInfo *MRI;
59   SIMachineFunctionInfo *MFI;
60 };
61 
62 } // End anonymous namespace.
63 
64 INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
65                       false, false)
66 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
67 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
68 INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
69                     false)
70 
71 char SILowerWWMCopies::ID = 0;
72 
73 char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
74 
75 bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
76   // We can't determine the liveness info if LIS isn't available. Early return
77   // in that case and always assume SCC is live.
78   if (!LIS)
79     return true;
80 
81   LiveRange &LR =
82       LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
83   SlotIndex Idx = LIS->getInstructionIndex(MI);
84   return LR.liveAt(Idx);
85 }
86 
87 // If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
88 // for preserving its entire lanes at function prolog/epilog.
89 void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
90   if (Reg.isPhysical())
91     return;
92 
93   Register PhysReg = VRM->getPhys(Reg);
94   assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
95          "should have allocated a physical register");
96 
97   MFI->allocateWWMSpill(MF, PhysReg);
98 }
99 
100 bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
101   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
102   const SIInstrInfo *TII = ST.getInstrInfo();
103 
104   MFI = MF.getInfo<SIMachineFunctionInfo>();
105   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
106   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
107   auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
108   Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
109   VRM = getAnalysisIfAvailable<VirtRegMap>();
110   TRI = ST.getRegisterInfo();
111   MRI = &MF.getRegInfo();
112 
113   if (!MFI->hasVRegFlags())
114     return false;
115 
116   bool Changed = false;
117   for (MachineBasicBlock &MBB : MF) {
118     for (MachineInstr &MI : MBB) {
119       if (MI.getOpcode() != AMDGPU::WWM_COPY)
120         continue;
121 
122       // TODO: Club adjacent WWM ops between same exec save/restore
123       assert(TII->isVGPRCopy(MI));
124 
125       // For WWM vector copies, manipulate the exec mask around the copy
126       // instruction.
127       const DebugLoc &DL = MI.getDebugLoc();
128       MachineBasicBlock::iterator InsertPt = MI.getIterator();
129       Register RegForExecCopy = MFI->getSGPRForEXECCopy();
130       TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
131                                  isSCCLiveAtMI(MI), Indexes);
132       TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
133       addToWWMSpills(MF, MI.getOperand(0).getReg());
134       LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
135 
136       // Lower WWM_COPY back to COPY
137       MI.setDesc(TII->get(AMDGPU::COPY));
138       Changed |= true;
139     }
140   }
141 
142   return Changed;
143 }
144