xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1*700637cbSDimitry Andric //- NVPTXForwardParams.cpp - NVPTX Forward Device Params Removing Local Copy -//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // PTX supports 2 methods of accessing device function parameters:
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric //   - "simple" case: If a parameters is only loaded, and all loads can address
12*700637cbSDimitry Andric //     the parameter via a constant offset, then the parameter may be loaded via
13*700637cbSDimitry Andric //     the ".param" address space. This case is not possible if the parameters
14*700637cbSDimitry Andric //     is stored to or has it's address taken. This method is preferable when
15*700637cbSDimitry Andric //     possible. Ex:
16*700637cbSDimitry Andric //
17*700637cbSDimitry Andric //            ld.param.u32    %r1, [foo_param_1];
18*700637cbSDimitry Andric //            ld.param.u32    %r2, [foo_param_1+4];
19*700637cbSDimitry Andric //
20*700637cbSDimitry Andric //   - "move param" case: For more complex cases the address of the param may be
21*700637cbSDimitry Andric //     placed in a register via a "mov" instruction. This "mov" also implicitly
22*700637cbSDimitry Andric //     moves the param to the ".local" address space and allows for it to be
23*700637cbSDimitry Andric //     written to. This essentially defers the responsibilty of the byval copy
24*700637cbSDimitry Andric //     to the PTX calling convention.
25*700637cbSDimitry Andric //
26*700637cbSDimitry Andric //            mov.b64         %rd1, foo_param_0;
27*700637cbSDimitry Andric //            st.local.u32    [%rd1], 42;
28*700637cbSDimitry Andric //            add.u64         %rd3, %rd1, %rd2;
29*700637cbSDimitry Andric //            ld.local.u32    %r2, [%rd3];
30*700637cbSDimitry Andric //
31*700637cbSDimitry Andric // In NVPTXLowerArgs and SelectionDAG, we pessimistically assume that all
32*700637cbSDimitry Andric // parameters will use the "move param" case and the local address space. This
33*700637cbSDimitry Andric // pass is responsible for switching to the "simple" case when possible, as it
34*700637cbSDimitry Andric // is more efficient.
35*700637cbSDimitry Andric //
36*700637cbSDimitry Andric // We do this by simply traversing uses of the param "mov" instructions an
37*700637cbSDimitry Andric // trivially checking if they are all loads.
38*700637cbSDimitry Andric //
39*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
40*700637cbSDimitry Andric 
41*700637cbSDimitry Andric #include "NVPTX.h"
42*700637cbSDimitry Andric #include "llvm/ADT/SmallVector.h"
43*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
44*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
45*700637cbSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
46*700637cbSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
47*700637cbSDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
48*700637cbSDimitry Andric #include "llvm/Support/ErrorHandling.h"
49*700637cbSDimitry Andric 
50*700637cbSDimitry Andric using namespace llvm;
51*700637cbSDimitry Andric 
traverseMoveUse(MachineInstr & U,const MachineRegisterInfo & MRI,SmallVectorImpl<MachineInstr * > & RemoveList,SmallVectorImpl<MachineInstr * > & LoadInsts)52*700637cbSDimitry Andric static bool traverseMoveUse(MachineInstr &U, const MachineRegisterInfo &MRI,
53*700637cbSDimitry Andric                             SmallVectorImpl<MachineInstr *> &RemoveList,
54*700637cbSDimitry Andric                             SmallVectorImpl<MachineInstr *> &LoadInsts) {
55*700637cbSDimitry Andric   switch (U.getOpcode()) {
56*700637cbSDimitry Andric   case NVPTX::LD_i16:
57*700637cbSDimitry Andric   case NVPTX::LD_i32:
58*700637cbSDimitry Andric   case NVPTX::LD_i64:
59*700637cbSDimitry Andric   case NVPTX::LD_i8:
60*700637cbSDimitry Andric   case NVPTX::LDV_i16_v2:
61*700637cbSDimitry Andric   case NVPTX::LDV_i16_v4:
62*700637cbSDimitry Andric   case NVPTX::LDV_i32_v2:
63*700637cbSDimitry Andric   case NVPTX::LDV_i32_v4:
64*700637cbSDimitry Andric   case NVPTX::LDV_i64_v2:
65*700637cbSDimitry Andric   case NVPTX::LDV_i64_v4:
66*700637cbSDimitry Andric   case NVPTX::LDV_i8_v2:
67*700637cbSDimitry Andric   case NVPTX::LDV_i8_v4: {
68*700637cbSDimitry Andric     LoadInsts.push_back(&U);
69*700637cbSDimitry Andric     return true;
70*700637cbSDimitry Andric   }
71*700637cbSDimitry Andric   case NVPTX::cvta_local:
72*700637cbSDimitry Andric   case NVPTX::cvta_local_64:
73*700637cbSDimitry Andric   case NVPTX::cvta_to_local:
74*700637cbSDimitry Andric   case NVPTX::cvta_to_local_64: {
75*700637cbSDimitry Andric     for (auto &U2 : MRI.use_instructions(U.operands_begin()->getReg()))
76*700637cbSDimitry Andric       if (!traverseMoveUse(U2, MRI, RemoveList, LoadInsts))
77*700637cbSDimitry Andric         return false;
78*700637cbSDimitry Andric 
79*700637cbSDimitry Andric     RemoveList.push_back(&U);
80*700637cbSDimitry Andric     return true;
81*700637cbSDimitry Andric   }
82*700637cbSDimitry Andric   default:
83*700637cbSDimitry Andric     return false;
84*700637cbSDimitry Andric   }
85*700637cbSDimitry Andric }
86*700637cbSDimitry Andric 
eliminateMove(MachineInstr & Mov,const MachineRegisterInfo & MRI,SmallVectorImpl<MachineInstr * > & RemoveList)87*700637cbSDimitry Andric static bool eliminateMove(MachineInstr &Mov, const MachineRegisterInfo &MRI,
88*700637cbSDimitry Andric                           SmallVectorImpl<MachineInstr *> &RemoveList) {
89*700637cbSDimitry Andric   SmallVector<MachineInstr *, 16> MaybeRemoveList;
90*700637cbSDimitry Andric   SmallVector<MachineInstr *, 16> LoadInsts;
91*700637cbSDimitry Andric 
92*700637cbSDimitry Andric   for (auto &U : MRI.use_instructions(Mov.operands_begin()->getReg()))
93*700637cbSDimitry Andric     if (!traverseMoveUse(U, MRI, MaybeRemoveList, LoadInsts))
94*700637cbSDimitry Andric       return false;
95*700637cbSDimitry Andric 
96*700637cbSDimitry Andric   RemoveList.append(MaybeRemoveList);
97*700637cbSDimitry Andric   RemoveList.push_back(&Mov);
98*700637cbSDimitry Andric 
99*700637cbSDimitry Andric   const MachineOperand *ParamSymbol = Mov.uses().begin();
100*700637cbSDimitry Andric   assert(ParamSymbol->isSymbol());
101*700637cbSDimitry Andric 
102*700637cbSDimitry Andric   constexpr unsigned LDInstBasePtrOpIdx = 5;
103*700637cbSDimitry Andric   constexpr unsigned LDInstAddrSpaceOpIdx = 2;
104*700637cbSDimitry Andric   for (auto *LI : LoadInsts) {
105*700637cbSDimitry Andric     (LI->uses().begin() + LDInstBasePtrOpIdx)
106*700637cbSDimitry Andric         ->ChangeToES(ParamSymbol->getSymbolName());
107*700637cbSDimitry Andric     (LI->uses().begin() + LDInstAddrSpaceOpIdx)
108*700637cbSDimitry Andric         ->ChangeToImmediate(NVPTX::AddressSpace::Param);
109*700637cbSDimitry Andric   }
110*700637cbSDimitry Andric   return true;
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric 
forwardDeviceParams(MachineFunction & MF)113*700637cbSDimitry Andric static bool forwardDeviceParams(MachineFunction &MF) {
114*700637cbSDimitry Andric   const auto &MRI = MF.getRegInfo();
115*700637cbSDimitry Andric 
116*700637cbSDimitry Andric   bool Changed = false;
117*700637cbSDimitry Andric   SmallVector<MachineInstr *, 16> RemoveList;
118*700637cbSDimitry Andric   for (auto &MI : make_early_inc_range(*MF.begin()))
119*700637cbSDimitry Andric     if (MI.getOpcode() == NVPTX::MOV32_PARAM ||
120*700637cbSDimitry Andric         MI.getOpcode() == NVPTX::MOV64_PARAM)
121*700637cbSDimitry Andric       Changed |= eliminateMove(MI, MRI, RemoveList);
122*700637cbSDimitry Andric 
123*700637cbSDimitry Andric   for (auto *MI : RemoveList)
124*700637cbSDimitry Andric     MI->eraseFromParent();
125*700637cbSDimitry Andric 
126*700637cbSDimitry Andric   return Changed;
127*700637cbSDimitry Andric }
128*700637cbSDimitry Andric 
129*700637cbSDimitry Andric /// ----------------------------------------------------------------------------
130*700637cbSDimitry Andric ///                       Pass (Manager) Boilerplate
131*700637cbSDimitry Andric /// ----------------------------------------------------------------------------
132*700637cbSDimitry Andric 
133*700637cbSDimitry Andric namespace {
134*700637cbSDimitry Andric struct NVPTXForwardParamsPass : public MachineFunctionPass {
135*700637cbSDimitry Andric   static char ID;
NVPTXForwardParamsPass__anon4673056e0111::NVPTXForwardParamsPass136*700637cbSDimitry Andric   NVPTXForwardParamsPass() : MachineFunctionPass(ID) {}
137*700637cbSDimitry Andric 
138*700637cbSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
139*700637cbSDimitry Andric 
getAnalysisUsage__anon4673056e0111::NVPTXForwardParamsPass140*700637cbSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
141*700637cbSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
142*700637cbSDimitry Andric   }
143*700637cbSDimitry Andric };
144*700637cbSDimitry Andric } // namespace
145*700637cbSDimitry Andric 
146*700637cbSDimitry Andric char NVPTXForwardParamsPass::ID = 0;
147*700637cbSDimitry Andric 
148*700637cbSDimitry Andric INITIALIZE_PASS(NVPTXForwardParamsPass, "nvptx-forward-params",
149*700637cbSDimitry Andric                 "NVPTX Forward Params", false, false)
150*700637cbSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)151*700637cbSDimitry Andric bool NVPTXForwardParamsPass::runOnMachineFunction(MachineFunction &MF) {
152*700637cbSDimitry Andric   return forwardDeviceParams(MF);
153*700637cbSDimitry Andric }
154*700637cbSDimitry Andric 
createNVPTXForwardParamsPass()155*700637cbSDimitry Andric MachineFunctionPass *llvm::createNVPTXForwardParamsPass() {
156*700637cbSDimitry Andric   return new NVPTXForwardParamsPass();
157*700637cbSDimitry Andric }
158