1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning 10 // of a MachineFunction. 11 // 12 // mov %SPL, %depot 13 // cvta.local %SP, %SPL 14 // 15 // Because Frame Index is a generic address and alloca can only return generic 16 // pointer, without this pass the instructions producing alloca'ed address will 17 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on 18 // this address with their .local versions, but this may introduce a lot of 19 // cvta.to.local instructions. Performance can be improved if we avoid casting 20 // address back and forth and directly calculate local address based on %SPL. 21 // This peephole pass optimizes these cases, for example 22 // 23 // It will transform the following pattern 24 // %0 = LEA_ADDRi64 %VRFrame64, 4 25 // %1 = cvta_to_local_yes_64 %0 26 // 27 // into 28 // %1 = LEA_ADDRi64 %VRFrameLocal64, 4 29 // 30 // %VRFrameLocal64 is the virtual register name of %SPL 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "NVPTX.h" 35 #include "NVPTXRegisterInfo.h" 36 #include "NVPTXSubtarget.h" 37 #include "llvm/CodeGen/MachineFunctionPass.h" 38 #include "llvm/CodeGen/MachineInstrBuilder.h" 39 #include "llvm/CodeGen/MachineRegisterInfo.h" 40 #include "llvm/CodeGen/TargetInstrInfo.h" 41 #include "llvm/CodeGen/TargetRegisterInfo.h" 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "nvptx-peephole" 46 47 namespace llvm { 48 void initializeNVPTXPeepholePass(PassRegistry &); 49 } 50 51 namespace { 52 struct NVPTXPeephole : public MachineFunctionPass { 53 public: 54 static char ID; 55 NVPTXPeephole() : MachineFunctionPass(ID) { 56 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61 StringRef getPassName() const override { 62 return "NVPTX optimize redundant cvta.to.local instruction"; 63 } 64 65 void getAnalysisUsage(AnalysisUsage &AU) const override { 66 MachineFunctionPass::getAnalysisUsage(AU); 67 } 68 }; 69 } 70 71 char NVPTXPeephole::ID = 0; 72 73 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false) 74 75 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { 76 auto &MBB = *Root.getParent(); 77 auto &MF = *MBB.getParent(); 78 // Check current instruction is cvta.to.local 79 if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 && 80 Root.getOpcode() != NVPTX::cvta_to_local_yes) 81 return false; 82 83 auto &Op = Root.getOperand(1); 84 const auto &MRI = MF.getRegInfo(); 85 MachineInstr *GenericAddrDef = nullptr; 86 if (Op.isReg() && Op.getReg().isVirtual()) { 87 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); 88 } 89 90 // Check the register operand is uniquely defined by LEA_ADDRi instruction 91 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || 92 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && 93 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { 94 return false; 95 } 96 97 const NVPTXRegisterInfo *NRI = 98 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); 99 100 // Check the LEA_ADDRi operand is Frame index 101 auto &BaseAddrOp = GenericAddrDef->getOperand(1); 102 if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) { 103 return true; 104 } 105 106 return false; 107 } 108 109 static void CombineCVTAToLocal(MachineInstr &Root) { 110 auto &MBB = *Root.getParent(); 111 auto &MF = *MBB.getParent(); 112 const auto &MRI = MF.getRegInfo(); 113 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 114 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); 115 116 const NVPTXRegisterInfo *NRI = 117 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); 118 119 MachineInstrBuilder MIB = 120 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), 121 Root.getOperand(0).getReg()) 122 .addReg(NRI->getFrameLocalRegister(MF)) 123 .add(Prev.getOperand(2)); 124 125 MBB.insert((MachineBasicBlock::iterator)&Root, MIB); 126 127 // Check if MRI has only one non dbg use, which is Root 128 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) { 129 Prev.eraseFromParent(); 130 } 131 Root.eraseFromParent(); 132 } 133 134 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { 135 if (skipFunction(MF.getFunction())) 136 return false; 137 138 bool Changed = false; 139 // Loop over all of the basic blocks. 140 for (auto &MBB : MF) { 141 // Traverse the basic block. 142 auto BlockIter = MBB.begin(); 143 144 while (BlockIter != MBB.end()) { 145 auto &MI = *BlockIter++; 146 if (isCVTAToLocalCombinationCandidate(MI)) { 147 CombineCVTAToLocal(MI); 148 Changed = true; 149 } 150 } // Instruction 151 } // Basic Block 152 153 const NVPTXRegisterInfo *NRI = 154 MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo(); 155 156 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal 157 const auto &MRI = MF.getRegInfo(); 158 if (MRI.use_empty(NRI->getFrameRegister(MF))) { 159 if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) { 160 MI->eraseFromParent(); 161 } 162 } 163 164 return Changed; 165 } 166 167 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } 168