1*700637cbSDimitry Andric //- NVPTXForwardParams.cpp - NVPTX Forward Device Params Removing Local Copy -//
2*700637cbSDimitry Andric //
3*700637cbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*700637cbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*700637cbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*700637cbSDimitry Andric //
7*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
8*700637cbSDimitry Andric //
9*700637cbSDimitry Andric // PTX supports 2 methods of accessing device function parameters:
10*700637cbSDimitry Andric //
11*700637cbSDimitry Andric // - "simple" case: If a parameters is only loaded, and all loads can address
12*700637cbSDimitry Andric // the parameter via a constant offset, then the parameter may be loaded via
13*700637cbSDimitry Andric // the ".param" address space. This case is not possible if the parameters
14*700637cbSDimitry Andric // is stored to or has it's address taken. This method is preferable when
15*700637cbSDimitry Andric // possible. Ex:
16*700637cbSDimitry Andric //
17*700637cbSDimitry Andric // ld.param.u32 %r1, [foo_param_1];
18*700637cbSDimitry Andric // ld.param.u32 %r2, [foo_param_1+4];
19*700637cbSDimitry Andric //
20*700637cbSDimitry Andric // - "move param" case: For more complex cases the address of the param may be
21*700637cbSDimitry Andric // placed in a register via a "mov" instruction. This "mov" also implicitly
22*700637cbSDimitry Andric // moves the param to the ".local" address space and allows for it to be
23*700637cbSDimitry Andric // written to. This essentially defers the responsibilty of the byval copy
24*700637cbSDimitry Andric // to the PTX calling convention.
25*700637cbSDimitry Andric //
26*700637cbSDimitry Andric // mov.b64 %rd1, foo_param_0;
27*700637cbSDimitry Andric // st.local.u32 [%rd1], 42;
28*700637cbSDimitry Andric // add.u64 %rd3, %rd1, %rd2;
29*700637cbSDimitry Andric // ld.local.u32 %r2, [%rd3];
30*700637cbSDimitry Andric //
31*700637cbSDimitry Andric // In NVPTXLowerArgs and SelectionDAG, we pessimistically assume that all
32*700637cbSDimitry Andric // parameters will use the "move param" case and the local address space. This
33*700637cbSDimitry Andric // pass is responsible for switching to the "simple" case when possible, as it
34*700637cbSDimitry Andric // is more efficient.
35*700637cbSDimitry Andric //
36*700637cbSDimitry Andric // We do this by simply traversing uses of the param "mov" instructions an
37*700637cbSDimitry Andric // trivially checking if they are all loads.
38*700637cbSDimitry Andric //
39*700637cbSDimitry Andric //===----------------------------------------------------------------------===//
40*700637cbSDimitry Andric
41*700637cbSDimitry Andric #include "NVPTX.h"
42*700637cbSDimitry Andric #include "llvm/ADT/SmallVector.h"
43*700637cbSDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
44*700637cbSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
45*700637cbSDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
46*700637cbSDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
47*700637cbSDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
48*700637cbSDimitry Andric #include "llvm/Support/ErrorHandling.h"
49*700637cbSDimitry Andric
50*700637cbSDimitry Andric using namespace llvm;
51*700637cbSDimitry Andric
traverseMoveUse(MachineInstr & U,const MachineRegisterInfo & MRI,SmallVectorImpl<MachineInstr * > & RemoveList,SmallVectorImpl<MachineInstr * > & LoadInsts)52*700637cbSDimitry Andric static bool traverseMoveUse(MachineInstr &U, const MachineRegisterInfo &MRI,
53*700637cbSDimitry Andric SmallVectorImpl<MachineInstr *> &RemoveList,
54*700637cbSDimitry Andric SmallVectorImpl<MachineInstr *> &LoadInsts) {
55*700637cbSDimitry Andric switch (U.getOpcode()) {
56*700637cbSDimitry Andric case NVPTX::LD_i16:
57*700637cbSDimitry Andric case NVPTX::LD_i32:
58*700637cbSDimitry Andric case NVPTX::LD_i64:
59*700637cbSDimitry Andric case NVPTX::LD_i8:
60*700637cbSDimitry Andric case NVPTX::LDV_i16_v2:
61*700637cbSDimitry Andric case NVPTX::LDV_i16_v4:
62*700637cbSDimitry Andric case NVPTX::LDV_i32_v2:
63*700637cbSDimitry Andric case NVPTX::LDV_i32_v4:
64*700637cbSDimitry Andric case NVPTX::LDV_i64_v2:
65*700637cbSDimitry Andric case NVPTX::LDV_i64_v4:
66*700637cbSDimitry Andric case NVPTX::LDV_i8_v2:
67*700637cbSDimitry Andric case NVPTX::LDV_i8_v4: {
68*700637cbSDimitry Andric LoadInsts.push_back(&U);
69*700637cbSDimitry Andric return true;
70*700637cbSDimitry Andric }
71*700637cbSDimitry Andric case NVPTX::cvta_local:
72*700637cbSDimitry Andric case NVPTX::cvta_local_64:
73*700637cbSDimitry Andric case NVPTX::cvta_to_local:
74*700637cbSDimitry Andric case NVPTX::cvta_to_local_64: {
75*700637cbSDimitry Andric for (auto &U2 : MRI.use_instructions(U.operands_begin()->getReg()))
76*700637cbSDimitry Andric if (!traverseMoveUse(U2, MRI, RemoveList, LoadInsts))
77*700637cbSDimitry Andric return false;
78*700637cbSDimitry Andric
79*700637cbSDimitry Andric RemoveList.push_back(&U);
80*700637cbSDimitry Andric return true;
81*700637cbSDimitry Andric }
82*700637cbSDimitry Andric default:
83*700637cbSDimitry Andric return false;
84*700637cbSDimitry Andric }
85*700637cbSDimitry Andric }
86*700637cbSDimitry Andric
eliminateMove(MachineInstr & Mov,const MachineRegisterInfo & MRI,SmallVectorImpl<MachineInstr * > & RemoveList)87*700637cbSDimitry Andric static bool eliminateMove(MachineInstr &Mov, const MachineRegisterInfo &MRI,
88*700637cbSDimitry Andric SmallVectorImpl<MachineInstr *> &RemoveList) {
89*700637cbSDimitry Andric SmallVector<MachineInstr *, 16> MaybeRemoveList;
90*700637cbSDimitry Andric SmallVector<MachineInstr *, 16> LoadInsts;
91*700637cbSDimitry Andric
92*700637cbSDimitry Andric for (auto &U : MRI.use_instructions(Mov.operands_begin()->getReg()))
93*700637cbSDimitry Andric if (!traverseMoveUse(U, MRI, MaybeRemoveList, LoadInsts))
94*700637cbSDimitry Andric return false;
95*700637cbSDimitry Andric
96*700637cbSDimitry Andric RemoveList.append(MaybeRemoveList);
97*700637cbSDimitry Andric RemoveList.push_back(&Mov);
98*700637cbSDimitry Andric
99*700637cbSDimitry Andric const MachineOperand *ParamSymbol = Mov.uses().begin();
100*700637cbSDimitry Andric assert(ParamSymbol->isSymbol());
101*700637cbSDimitry Andric
102*700637cbSDimitry Andric constexpr unsigned LDInstBasePtrOpIdx = 5;
103*700637cbSDimitry Andric constexpr unsigned LDInstAddrSpaceOpIdx = 2;
104*700637cbSDimitry Andric for (auto *LI : LoadInsts) {
105*700637cbSDimitry Andric (LI->uses().begin() + LDInstBasePtrOpIdx)
106*700637cbSDimitry Andric ->ChangeToES(ParamSymbol->getSymbolName());
107*700637cbSDimitry Andric (LI->uses().begin() + LDInstAddrSpaceOpIdx)
108*700637cbSDimitry Andric ->ChangeToImmediate(NVPTX::AddressSpace::Param);
109*700637cbSDimitry Andric }
110*700637cbSDimitry Andric return true;
111*700637cbSDimitry Andric }
112*700637cbSDimitry Andric
forwardDeviceParams(MachineFunction & MF)113*700637cbSDimitry Andric static bool forwardDeviceParams(MachineFunction &MF) {
114*700637cbSDimitry Andric const auto &MRI = MF.getRegInfo();
115*700637cbSDimitry Andric
116*700637cbSDimitry Andric bool Changed = false;
117*700637cbSDimitry Andric SmallVector<MachineInstr *, 16> RemoveList;
118*700637cbSDimitry Andric for (auto &MI : make_early_inc_range(*MF.begin()))
119*700637cbSDimitry Andric if (MI.getOpcode() == NVPTX::MOV32_PARAM ||
120*700637cbSDimitry Andric MI.getOpcode() == NVPTX::MOV64_PARAM)
121*700637cbSDimitry Andric Changed |= eliminateMove(MI, MRI, RemoveList);
122*700637cbSDimitry Andric
123*700637cbSDimitry Andric for (auto *MI : RemoveList)
124*700637cbSDimitry Andric MI->eraseFromParent();
125*700637cbSDimitry Andric
126*700637cbSDimitry Andric return Changed;
127*700637cbSDimitry Andric }
128*700637cbSDimitry Andric
129*700637cbSDimitry Andric /// ----------------------------------------------------------------------------
130*700637cbSDimitry Andric /// Pass (Manager) Boilerplate
131*700637cbSDimitry Andric /// ----------------------------------------------------------------------------
132*700637cbSDimitry Andric
133*700637cbSDimitry Andric namespace {
134*700637cbSDimitry Andric struct NVPTXForwardParamsPass : public MachineFunctionPass {
135*700637cbSDimitry Andric static char ID;
NVPTXForwardParamsPass__anon4673056e0111::NVPTXForwardParamsPass136*700637cbSDimitry Andric NVPTXForwardParamsPass() : MachineFunctionPass(ID) {}
137*700637cbSDimitry Andric
138*700637cbSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
139*700637cbSDimitry Andric
getAnalysisUsage__anon4673056e0111::NVPTXForwardParamsPass140*700637cbSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
141*700637cbSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
142*700637cbSDimitry Andric }
143*700637cbSDimitry Andric };
144*700637cbSDimitry Andric } // namespace
145*700637cbSDimitry Andric
146*700637cbSDimitry Andric char NVPTXForwardParamsPass::ID = 0;
147*700637cbSDimitry Andric
148*700637cbSDimitry Andric INITIALIZE_PASS(NVPTXForwardParamsPass, "nvptx-forward-params",
149*700637cbSDimitry Andric "NVPTX Forward Params", false, false)
150*700637cbSDimitry Andric
runOnMachineFunction(MachineFunction & MF)151*700637cbSDimitry Andric bool NVPTXForwardParamsPass::runOnMachineFunction(MachineFunction &MF) {
152*700637cbSDimitry Andric return forwardDeviceParams(MF);
153*700637cbSDimitry Andric }
154*700637cbSDimitry Andric
createNVPTXForwardParamsPass()155*700637cbSDimitry Andric MachineFunctionPass *llvm::createNVPTXForwardParamsPass() {
156*700637cbSDimitry Andric return new NVPTXForwardParamsPass();
157*700637cbSDimitry Andric }
158