10b57cec5SDimitry Andric //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass performs peephole optimizations to cleanup ugly code sequences at 100b57cec5SDimitry Andric // MachineInstruction layer. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // Currently, there are two optimizations implemented: 130b57cec5SDimitry Andric // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those 140b57cec5SDimitry Andric // zero extend 32-bit subregisters to 64-bit registers, if the compiler 150b57cec5SDimitry Andric // could prove the subregisters is defined by 32-bit operations in which 160b57cec5SDimitry Andric // case the upper half of the underlying 64-bit registers were zeroed 170b57cec5SDimitry Andric // implicitly. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // - One post-RA PreEmit pass to do final cleanup on some redundant 200b57cec5SDimitry Andric // instructions generated due to bad RA on subregister. 210b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #include "BPF.h" 240b57cec5SDimitry Andric #include "BPFInstrInfo.h" 250b57cec5SDimitry Andric #include "BPFTargetMachine.h" 260b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 2781ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 308bcb0991SDimitry Andric #include "llvm/Support/Debug.h" 315ffd83dbSDimitry Andric #include <set> 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric using namespace llvm; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric #define DEBUG_TYPE "bpf-mi-zext-elim" 360b57cec5SDimitry Andric 37*5f757f3fSDimitry Andric static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden, 38*5f757f3fSDimitry Andric cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound")); 39*5f757f3fSDimitry Andric 400b57cec5SDimitry Andric STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated"); 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric namespace { 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric struct BPFMIPeephole : public MachineFunctionPass { 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric static char ID; 470b57cec5SDimitry Andric const BPFInstrInfo *TII; 480b57cec5SDimitry Andric MachineFunction *MF; 490b57cec5SDimitry Andric MachineRegisterInfo *MRI; 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric BPFMIPeephole() : MachineFunctionPass(ID) { 520b57cec5SDimitry Andric initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry()); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric private: 560b57cec5SDimitry Andric // Initialize class variables. 570b57cec5SDimitry Andric void initialize(MachineFunction &MFParm); 580b57cec5SDimitry Andric 59480093f4SDimitry Andric bool isCopyFrom32Def(MachineInstr *CopyMI); 60480093f4SDimitry Andric bool isInsnFrom32Def(MachineInstr *DefInsn); 61480093f4SDimitry Andric bool isPhiFrom32Def(MachineInstr *MovMI); 620b57cec5SDimitry Andric bool isMovFrom32Def(MachineInstr *MovMI); 6304eeddc0SDimitry Andric bool eliminateZExtSeq(); 6404eeddc0SDimitry Andric bool eliminateZExt(); 650b57cec5SDimitry Andric 66480093f4SDimitry Andric std::set<MachineInstr *> PhiInsns; 67480093f4SDimitry Andric 680b57cec5SDimitry Andric public: 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric // Main entry point for this pass. 710b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 720b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 730b57cec5SDimitry Andric return false; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric initialize(MF); 760b57cec5SDimitry Andric 775ffd83dbSDimitry Andric // First try to eliminate (zext, lshift, rshift) and then 785ffd83dbSDimitry Andric // try to eliminate zext. 795ffd83dbSDimitry Andric bool ZExtSeqExist, ZExtExist; 805ffd83dbSDimitry Andric ZExtSeqExist = eliminateZExtSeq(); 815ffd83dbSDimitry Andric ZExtExist = eliminateZExt(); 825ffd83dbSDimitry Andric return ZExtSeqExist || ZExtExist; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric }; 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric // Initialize class variables. 870b57cec5SDimitry Andric void BPFMIPeephole::initialize(MachineFunction &MFParm) { 880b57cec5SDimitry Andric MF = &MFParm; 890b57cec5SDimitry Andric MRI = &MF->getRegInfo(); 900b57cec5SDimitry Andric TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 918bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 94480093f4SDimitry Andric bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI) 95480093f4SDimitry Andric { 96480093f4SDimitry Andric MachineOperand &opnd = CopyMI->getOperand(1); 97480093f4SDimitry Andric 98480093f4SDimitry Andric if (!opnd.isReg()) 99480093f4SDimitry Andric return false; 100480093f4SDimitry Andric 101480093f4SDimitry Andric // Return false if getting value from a 32bit physical register. 102480093f4SDimitry Andric // Most likely, this physical register is aliased to 103480093f4SDimitry Andric // function call return value or current function parameters. 104480093f4SDimitry Andric Register Reg = opnd.getReg(); 105bdd1243dSDimitry Andric if (!Reg.isVirtual()) 106480093f4SDimitry Andric return false; 107480093f4SDimitry Andric 108480093f4SDimitry Andric if (MRI->getRegClass(Reg) == &BPF::GPRRegClass) 109480093f4SDimitry Andric return false; 110480093f4SDimitry Andric 111480093f4SDimitry Andric MachineInstr *DefInsn = MRI->getVRegDef(Reg); 112480093f4SDimitry Andric if (!isInsnFrom32Def(DefInsn)) 113480093f4SDimitry Andric return false; 114480093f4SDimitry Andric 115480093f4SDimitry Andric return true; 116480093f4SDimitry Andric } 117480093f4SDimitry Andric 118480093f4SDimitry Andric bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI) 119480093f4SDimitry Andric { 120480093f4SDimitry Andric for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) { 121480093f4SDimitry Andric MachineOperand &opnd = PhiMI->getOperand(i); 122480093f4SDimitry Andric 123480093f4SDimitry Andric if (!opnd.isReg()) 124480093f4SDimitry Andric return false; 125480093f4SDimitry Andric 126480093f4SDimitry Andric MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); 127480093f4SDimitry Andric if (!PhiDef) 128480093f4SDimitry Andric return false; 129480093f4SDimitry Andric if (PhiDef->isPHI()) { 13081ad6265SDimitry Andric if (!PhiInsns.insert(PhiDef).second) 131480093f4SDimitry Andric return false; 132480093f4SDimitry Andric if (!isPhiFrom32Def(PhiDef)) 133480093f4SDimitry Andric return false; 134480093f4SDimitry Andric } 135480093f4SDimitry Andric if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef)) 136480093f4SDimitry Andric return false; 137480093f4SDimitry Andric } 138480093f4SDimitry Andric 139480093f4SDimitry Andric return true; 140480093f4SDimitry Andric } 141480093f4SDimitry Andric 142480093f4SDimitry Andric // The \p DefInsn instruction defines a virtual register. 143480093f4SDimitry Andric bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) 144480093f4SDimitry Andric { 145480093f4SDimitry Andric if (!DefInsn) 146480093f4SDimitry Andric return false; 147480093f4SDimitry Andric 148480093f4SDimitry Andric if (DefInsn->isPHI()) { 14981ad6265SDimitry Andric if (!PhiInsns.insert(DefInsn).second) 150480093f4SDimitry Andric return false; 151480093f4SDimitry Andric if (!isPhiFrom32Def(DefInsn)) 152480093f4SDimitry Andric return false; 153480093f4SDimitry Andric } else if (DefInsn->getOpcode() == BPF::COPY) { 154480093f4SDimitry Andric if (!isCopyFrom32Def(DefInsn)) 155480093f4SDimitry Andric return false; 156480093f4SDimitry Andric } 157480093f4SDimitry Andric 158480093f4SDimitry Andric return true; 159480093f4SDimitry Andric } 160480093f4SDimitry Andric 1610b57cec5SDimitry Andric bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) 1620b57cec5SDimitry Andric { 1630b57cec5SDimitry Andric MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Def of Mov Src:"); 1660b57cec5SDimitry Andric LLVM_DEBUG(DefInsn->dump()); 1670b57cec5SDimitry Andric 168480093f4SDimitry Andric PhiInsns.clear(); 169480093f4SDimitry Andric if (!isInsnFrom32Def(DefInsn)) 1700b57cec5SDimitry Andric return false; 1710b57cec5SDimitry Andric 1720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric return true; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 17704eeddc0SDimitry Andric bool BPFMIPeephole::eliminateZExtSeq() { 1780b57cec5SDimitry Andric MachineInstr* ToErase = nullptr; 1790b57cec5SDimitry Andric bool Eliminated = false; 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric for (MachineBasicBlock &MBB : *MF) { 1820b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 1830b57cec5SDimitry Andric // If the previous instruction was marked for elimination, remove it now. 1840b57cec5SDimitry Andric if (ToErase) { 1850b57cec5SDimitry Andric ToErase->eraseFromParent(); 1860b57cec5SDimitry Andric ToErase = nullptr; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric // Eliminate the 32-bit to 64-bit zero extension sequence when possible. 1900b57cec5SDimitry Andric // 1910b57cec5SDimitry Andric // MOV_32_64 rB, wA 1920b57cec5SDimitry Andric // SLL_ri rB, rB, 32 1930b57cec5SDimitry Andric // SRL_ri rB, rB, 32 1940b57cec5SDimitry Andric if (MI.getOpcode() == BPF::SRL_ri && 1950b57cec5SDimitry Andric MI.getOperand(2).getImm() == 32) { 1968bcb0991SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1978bcb0991SDimitry Andric Register ShfReg = MI.getOperand(1).getReg(); 1980b57cec5SDimitry Andric MachineInstr *SllMI = MRI->getVRegDef(ShfReg); 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting SRL found:"); 2010b57cec5SDimitry Andric LLVM_DEBUG(MI.dump()); 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric if (!SllMI || 2040b57cec5SDimitry Andric SllMI->isPHI() || 2050b57cec5SDimitry Andric SllMI->getOpcode() != BPF::SLL_ri || 2060b57cec5SDimitry Andric SllMI->getOperand(2).getImm() != 32) 2070b57cec5SDimitry Andric continue; 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " SLL found:"); 2100b57cec5SDimitry Andric LLVM_DEBUG(SllMI->dump()); 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); 2130b57cec5SDimitry Andric if (!MovMI || 2140b57cec5SDimitry Andric MovMI->isPHI() || 2150b57cec5SDimitry Andric MovMI->getOpcode() != BPF::MOV_32_64) 2160b57cec5SDimitry Andric continue; 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Type cast Mov found:"); 2190b57cec5SDimitry Andric LLVM_DEBUG(MovMI->dump()); 2200b57cec5SDimitry Andric 2218bcb0991SDimitry Andric Register SubReg = MovMI->getOperand(1).getReg(); 2220b57cec5SDimitry Andric if (!isMovFrom32Def(MovMI)) { 2230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 2240b57cec5SDimitry Andric << " One ZExt elim sequence failed qualifying elim.\n"); 2250b57cec5SDimitry Andric continue; 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg) 2290b57cec5SDimitry Andric .addImm(0).addReg(SubReg).addImm(BPF::sub_32); 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric SllMI->eraseFromParent(); 2320b57cec5SDimitry Andric MovMI->eraseFromParent(); 2330b57cec5SDimitry Andric // MI is the right shift, we can't erase it in it's own iteration. 2340b57cec5SDimitry Andric // Mark it to ToErase, and erase in the next iteration. 2350b57cec5SDimitry Andric ToErase = &MI; 2360b57cec5SDimitry Andric ZExtElemNum++; 2370b57cec5SDimitry Andric Eliminated = true; 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric return Eliminated; 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 24504eeddc0SDimitry Andric bool BPFMIPeephole::eliminateZExt() { 2465ffd83dbSDimitry Andric MachineInstr* ToErase = nullptr; 2475ffd83dbSDimitry Andric bool Eliminated = false; 2485ffd83dbSDimitry Andric 2495ffd83dbSDimitry Andric for (MachineBasicBlock &MBB : *MF) { 2505ffd83dbSDimitry Andric for (MachineInstr &MI : MBB) { 2515ffd83dbSDimitry Andric // If the previous instruction was marked for elimination, remove it now. 2525ffd83dbSDimitry Andric if (ToErase) { 2535ffd83dbSDimitry Andric ToErase->eraseFromParent(); 2545ffd83dbSDimitry Andric ToErase = nullptr; 2555ffd83dbSDimitry Andric } 2565ffd83dbSDimitry Andric 2575ffd83dbSDimitry Andric if (MI.getOpcode() != BPF::MOV_32_64) 2585ffd83dbSDimitry Andric continue; 2595ffd83dbSDimitry Andric 2605ffd83dbSDimitry Andric // Eliminate MOV_32_64 if possible. 2615ffd83dbSDimitry Andric // MOV_32_64 rA, wB 2625ffd83dbSDimitry Andric // 2635ffd83dbSDimitry Andric // If wB has been zero extended, replace it with a SUBREG_TO_REG. 2645ffd83dbSDimitry Andric // This is to workaround BPF programs where pkt->{data, data_end} 2655ffd83dbSDimitry Andric // is encoded as u32, but actually the verifier populates them 2665ffd83dbSDimitry Andric // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits. 2675ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:"); 2685ffd83dbSDimitry Andric LLVM_DEBUG(MI.dump()); 2695ffd83dbSDimitry Andric 2705ffd83dbSDimitry Andric if (!isMovFrom32Def(&MI)) 2715ffd83dbSDimitry Andric continue; 2725ffd83dbSDimitry Andric 2735ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n"); 2745ffd83dbSDimitry Andric 2755ffd83dbSDimitry Andric Register dst = MI.getOperand(0).getReg(); 2765ffd83dbSDimitry Andric Register src = MI.getOperand(1).getReg(); 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric // Build a SUBREG_TO_REG instruction. 2795ffd83dbSDimitry Andric BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst) 2805ffd83dbSDimitry Andric .addImm(0).addReg(src).addImm(BPF::sub_32); 2815ffd83dbSDimitry Andric 2825ffd83dbSDimitry Andric ToErase = &MI; 2835ffd83dbSDimitry Andric Eliminated = true; 2845ffd83dbSDimitry Andric } 2855ffd83dbSDimitry Andric } 2865ffd83dbSDimitry Andric 2875ffd83dbSDimitry Andric return Eliminated; 2885ffd83dbSDimitry Andric } 2895ffd83dbSDimitry Andric 2900b57cec5SDimitry Andric } // end default namespace 2910b57cec5SDimitry Andric 2920b57cec5SDimitry Andric INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, 2938bcb0991SDimitry Andric "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", 2948bcb0991SDimitry Andric false, false) 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric char BPFMIPeephole::ID = 0; 2970b57cec5SDimitry Andric FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated"); 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric namespace { 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric struct BPFMIPreEmitPeephole : public MachineFunctionPass { 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric static char ID; 3060b57cec5SDimitry Andric MachineFunction *MF; 3070b57cec5SDimitry Andric const TargetRegisterInfo *TRI; 308*5f757f3fSDimitry Andric const BPFInstrInfo *TII; 309*5f757f3fSDimitry Andric bool SupportGotol; 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { 3120b57cec5SDimitry Andric initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 3130b57cec5SDimitry Andric } 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric private: 3160b57cec5SDimitry Andric // Initialize class variables. 3170b57cec5SDimitry Andric void initialize(MachineFunction &MFParm); 3180b57cec5SDimitry Andric 319*5f757f3fSDimitry Andric bool in16BitRange(int Num); 32004eeddc0SDimitry Andric bool eliminateRedundantMov(); 321*5f757f3fSDimitry Andric bool adjustBranch(); 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric public: 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric // Main entry point for this pass. 3260b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 3270b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 3280b57cec5SDimitry Andric return false; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric initialize(MF); 3310b57cec5SDimitry Andric 332*5f757f3fSDimitry Andric bool Changed; 333*5f757f3fSDimitry Andric Changed = eliminateRedundantMov(); 334*5f757f3fSDimitry Andric if (SupportGotol) 335*5f757f3fSDimitry Andric Changed = adjustBranch() || Changed; 336*5f757f3fSDimitry Andric return Changed; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric }; 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric // Initialize class variables. 3410b57cec5SDimitry Andric void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { 3420b57cec5SDimitry Andric MF = &MFParm; 343*5f757f3fSDimitry Andric TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 3440b57cec5SDimitry Andric TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo(); 345*5f757f3fSDimitry Andric SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol(); 3460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 34904eeddc0SDimitry Andric bool BPFMIPreEmitPeephole::eliminateRedundantMov() { 3500b57cec5SDimitry Andric MachineInstr* ToErase = nullptr; 3510b57cec5SDimitry Andric bool Eliminated = false; 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric for (MachineBasicBlock &MBB : *MF) { 3540b57cec5SDimitry Andric for (MachineInstr &MI : MBB) { 3550b57cec5SDimitry Andric // If the previous instruction was marked for elimination, remove it now. 3560b57cec5SDimitry Andric if (ToErase) { 3570b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:"); 3580b57cec5SDimitry Andric LLVM_DEBUG(ToErase->dump()); 3590b57cec5SDimitry Andric ToErase->eraseFromParent(); 3600b57cec5SDimitry Andric ToErase = nullptr; 3610b57cec5SDimitry Andric } 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric // Eliminate identical move: 3640b57cec5SDimitry Andric // 3650b57cec5SDimitry Andric // MOV rA, rA 3660b57cec5SDimitry Andric // 3675ffd83dbSDimitry Andric // Note that we cannot remove 3685ffd83dbSDimitry Andric // MOV_32_64 rA, wA 3695ffd83dbSDimitry Andric // MOV_rr_32 wA, wA 3705ffd83dbSDimitry Andric // as these two instructions having side effects, zeroing out 3715ffd83dbSDimitry Andric // top 32 bits of rA. 3728bcb0991SDimitry Andric unsigned Opcode = MI.getOpcode(); 3735ffd83dbSDimitry Andric if (Opcode == BPF::MOV_rr) { 3748bcb0991SDimitry Andric Register dst = MI.getOperand(0).getReg(); 3758bcb0991SDimitry Andric Register src = MI.getOperand(1).getReg(); 3760b57cec5SDimitry Andric 3778bcb0991SDimitry Andric if (dst != src) 3780b57cec5SDimitry Andric continue; 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric ToErase = &MI; 3810b57cec5SDimitry Andric RedundantMovElemNum++; 3820b57cec5SDimitry Andric Eliminated = true; 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric } 3860b57cec5SDimitry Andric 3870b57cec5SDimitry Andric return Eliminated; 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric 390*5f757f3fSDimitry Andric bool BPFMIPreEmitPeephole::in16BitRange(int Num) { 391*5f757f3fSDimitry Andric // Well, the cut-off is not precisely at 16bit range since 392*5f757f3fSDimitry Andric // new codes are added during the transformation. So let us 393*5f757f3fSDimitry Andric // a little bit conservative. 394*5f757f3fSDimitry Andric return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound; 395*5f757f3fSDimitry Andric } 396*5f757f3fSDimitry Andric 397*5f757f3fSDimitry Andric // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff) 398*5f757f3fSDimitry Andric // is supported for both unconditional (JMP) and condition (JEQ, JSGT, 399*5f757f3fSDimitry Andric // etc.) branches. In certain cases, e.g., full unrolling, the branch 400*5f757f3fSDimitry Andric // target offset might exceed 16bit range. If this happens, the llvm 401*5f757f3fSDimitry Andric // will generate incorrect code as the offset is truncated to 16bit. 402*5f757f3fSDimitry Andric // 403*5f757f3fSDimitry Andric // To fix this rare case, a new insn JMPL is introduced. This new 404*5f757f3fSDimitry Andric // insn supports supports 32bit branch target offset. The compiler 405*5f757f3fSDimitry Andric // does not use this insn during insn selection. Rather, BPF backend 406*5f757f3fSDimitry Andric // will estimate the branch target offset and do JMP -> JMPL and 407*5f757f3fSDimitry Andric // JEQ -> JEQ + JMPL conversion if the estimated branch target offset 408*5f757f3fSDimitry Andric // is beyond 16bit. 409*5f757f3fSDimitry Andric bool BPFMIPreEmitPeephole::adjustBranch() { 410*5f757f3fSDimitry Andric bool Changed = false; 411*5f757f3fSDimitry Andric int CurrNumInsns = 0; 412*5f757f3fSDimitry Andric DenseMap<MachineBasicBlock *, int> SoFarNumInsns; 413*5f757f3fSDimitry Andric DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB; 414*5f757f3fSDimitry Andric std::vector<MachineBasicBlock *> MBBs; 415*5f757f3fSDimitry Andric 416*5f757f3fSDimitry Andric MachineBasicBlock *PrevBB = nullptr; 417*5f757f3fSDimitry Andric for (MachineBasicBlock &MBB : *MF) { 418*5f757f3fSDimitry Andric // MBB.size() is the number of insns in this basic block, including some 419*5f757f3fSDimitry Andric // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit. 420*5f757f3fSDimitry Andric // Typically we have way more normal insns than DEBUG_VALUE insns. 421*5f757f3fSDimitry Andric // Also, if we indeed need to convert conditional branch like JEQ to 422*5f757f3fSDimitry Andric // JEQ + JMPL, we actually introduced some new insns like below. 423*5f757f3fSDimitry Andric CurrNumInsns += (int)MBB.size(); 424*5f757f3fSDimitry Andric SoFarNumInsns[&MBB] = CurrNumInsns; 425*5f757f3fSDimitry Andric if (PrevBB != nullptr) 426*5f757f3fSDimitry Andric FollowThroughBB[PrevBB] = &MBB; 427*5f757f3fSDimitry Andric PrevBB = &MBB; 428*5f757f3fSDimitry Andric // A list of original BBs to make later traveral easier. 429*5f757f3fSDimitry Andric MBBs.push_back(&MBB); 430*5f757f3fSDimitry Andric } 431*5f757f3fSDimitry Andric FollowThroughBB[PrevBB] = nullptr; 432*5f757f3fSDimitry Andric 433*5f757f3fSDimitry Andric for (unsigned i = 0; i < MBBs.size(); i++) { 434*5f757f3fSDimitry Andric // We have four cases here: 435*5f757f3fSDimitry Andric // (1). no terminator, simple follow through. 436*5f757f3fSDimitry Andric // (2). jmp to another bb. 437*5f757f3fSDimitry Andric // (3). conditional jmp to another bb or follow through. 438*5f757f3fSDimitry Andric // (4). conditional jmp followed by an unconditional jmp. 439*5f757f3fSDimitry Andric MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr; 440*5f757f3fSDimitry Andric 441*5f757f3fSDimitry Andric MachineBasicBlock *MBB = MBBs[i]; 442*5f757f3fSDimitry Andric for (MachineInstr &Term : MBB->terminators()) { 443*5f757f3fSDimitry Andric if (Term.isConditionalBranch()) { 444*5f757f3fSDimitry Andric assert(CondJmp == nullptr); 445*5f757f3fSDimitry Andric CondJmp = &Term; 446*5f757f3fSDimitry Andric } else if (Term.isUnconditionalBranch()) { 447*5f757f3fSDimitry Andric assert(UncondJmp == nullptr); 448*5f757f3fSDimitry Andric UncondJmp = &Term; 449*5f757f3fSDimitry Andric } 450*5f757f3fSDimitry Andric } 451*5f757f3fSDimitry Andric 452*5f757f3fSDimitry Andric // (1). no terminator, simple follow through. 453*5f757f3fSDimitry Andric if (!CondJmp && !UncondJmp) 454*5f757f3fSDimitry Andric continue; 455*5f757f3fSDimitry Andric 456*5f757f3fSDimitry Andric MachineBasicBlock *CondTargetBB, *JmpBB; 457*5f757f3fSDimitry Andric CurrNumInsns = SoFarNumInsns[MBB]; 458*5f757f3fSDimitry Andric 459*5f757f3fSDimitry Andric // (2). jmp to another bb. 460*5f757f3fSDimitry Andric if (!CondJmp && UncondJmp) { 461*5f757f3fSDimitry Andric JmpBB = UncondJmp->getOperand(0).getMBB(); 462*5f757f3fSDimitry Andric if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns)) 463*5f757f3fSDimitry Andric continue; 464*5f757f3fSDimitry Andric 465*5f757f3fSDimitry Andric // replace this insn as a JMPL. 466*5f757f3fSDimitry Andric BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); 467*5f757f3fSDimitry Andric UncondJmp->eraseFromParent(); 468*5f757f3fSDimitry Andric Changed = true; 469*5f757f3fSDimitry Andric continue; 470*5f757f3fSDimitry Andric } 471*5f757f3fSDimitry Andric 472*5f757f3fSDimitry Andric const BasicBlock *TermBB = MBB->getBasicBlock(); 473*5f757f3fSDimitry Andric int Dist; 474*5f757f3fSDimitry Andric 475*5f757f3fSDimitry Andric // (3). conditional jmp to another bb or follow through. 476*5f757f3fSDimitry Andric if (!UncondJmp) { 477*5f757f3fSDimitry Andric CondTargetBB = CondJmp->getOperand(2).getMBB(); 478*5f757f3fSDimitry Andric MachineBasicBlock *FollowBB = FollowThroughBB[MBB]; 479*5f757f3fSDimitry Andric Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; 480*5f757f3fSDimitry Andric if (in16BitRange(Dist)) 481*5f757f3fSDimitry Andric continue; 482*5f757f3fSDimitry Andric 483*5f757f3fSDimitry Andric // We have 484*5f757f3fSDimitry Andric // B2: ... 485*5f757f3fSDimitry Andric // if (cond) goto B5 486*5f757f3fSDimitry Andric // B3: ... 487*5f757f3fSDimitry Andric // where B2 -> B5 is beyond 16bit range. 488*5f757f3fSDimitry Andric // 489*5f757f3fSDimitry Andric // We do not have 32bit cond jmp insn. So we try to do 490*5f757f3fSDimitry Andric // the following. 491*5f757f3fSDimitry Andric // B2: ... 492*5f757f3fSDimitry Andric // if (cond) goto New_B1 493*5f757f3fSDimitry Andric // New_B0 goto B3 494*5f757f3fSDimitry Andric // New_B1: gotol B5 495*5f757f3fSDimitry Andric // B3: ... 496*5f757f3fSDimitry Andric // Basically two new basic blocks are created. 497*5f757f3fSDimitry Andric MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB); 498*5f757f3fSDimitry Andric MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB); 499*5f757f3fSDimitry Andric 500*5f757f3fSDimitry Andric // Insert New_B0 and New_B1 into function block list. 501*5f757f3fSDimitry Andric MachineFunction::iterator MBB_I = ++MBB->getIterator(); 502*5f757f3fSDimitry Andric MF->insert(MBB_I, New_B0); 503*5f757f3fSDimitry Andric MF->insert(MBB_I, New_B1); 504*5f757f3fSDimitry Andric 505*5f757f3fSDimitry Andric // replace B2 cond jump 506*5f757f3fSDimitry Andric if (CondJmp->getOperand(1).isReg()) 507*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) 508*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg()) 509*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(1).getReg()) 510*5f757f3fSDimitry Andric .addMBB(New_B1); 511*5f757f3fSDimitry Andric else 512*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) 513*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg()) 514*5f757f3fSDimitry Andric .addImm(CondJmp->getOperand(1).getImm()) 515*5f757f3fSDimitry Andric .addMBB(New_B1); 516*5f757f3fSDimitry Andric 517*5f757f3fSDimitry Andric // it is possible that CondTargetBB and FollowBB are the same. But the 518*5f757f3fSDimitry Andric // above Dist checking should already filtered this case. 519*5f757f3fSDimitry Andric MBB->removeSuccessor(CondTargetBB); 520*5f757f3fSDimitry Andric MBB->removeSuccessor(FollowBB); 521*5f757f3fSDimitry Andric MBB->addSuccessor(New_B0); 522*5f757f3fSDimitry Andric MBB->addSuccessor(New_B1); 523*5f757f3fSDimitry Andric 524*5f757f3fSDimitry Andric // Populate insns in New_B0 and New_B1. 525*5f757f3fSDimitry Andric BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB); 526*5f757f3fSDimitry Andric BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)) 527*5f757f3fSDimitry Andric .addMBB(CondTargetBB); 528*5f757f3fSDimitry Andric 529*5f757f3fSDimitry Andric New_B0->addSuccessor(FollowBB); 530*5f757f3fSDimitry Andric New_B1->addSuccessor(CondTargetBB); 531*5f757f3fSDimitry Andric CondJmp->eraseFromParent(); 532*5f757f3fSDimitry Andric Changed = true; 533*5f757f3fSDimitry Andric continue; 534*5f757f3fSDimitry Andric } 535*5f757f3fSDimitry Andric 536*5f757f3fSDimitry Andric // (4). conditional jmp followed by an unconditional jmp. 537*5f757f3fSDimitry Andric CondTargetBB = CondJmp->getOperand(2).getMBB(); 538*5f757f3fSDimitry Andric JmpBB = UncondJmp->getOperand(0).getMBB(); 539*5f757f3fSDimitry Andric 540*5f757f3fSDimitry Andric // We have 541*5f757f3fSDimitry Andric // B2: ... 542*5f757f3fSDimitry Andric // if (cond) goto B5 543*5f757f3fSDimitry Andric // JMP B7 544*5f757f3fSDimitry Andric // B3: ... 545*5f757f3fSDimitry Andric // 546*5f757f3fSDimitry Andric // If only B2->B5 is out of 16bit range, we can do 547*5f757f3fSDimitry Andric // B2: ... 548*5f757f3fSDimitry Andric // if (cond) goto new_B 549*5f757f3fSDimitry Andric // JMP B7 550*5f757f3fSDimitry Andric // New_B: gotol B5 551*5f757f3fSDimitry Andric // B3: ... 552*5f757f3fSDimitry Andric // 553*5f757f3fSDimitry Andric // If only 'JMP B7' is out of 16bit range, we can replace 554*5f757f3fSDimitry Andric // 'JMP B7' with 'JMPL B7'. 555*5f757f3fSDimitry Andric // 556*5f757f3fSDimitry Andric // If both B2->B5 and 'JMP B7' is out of range, just do 557*5f757f3fSDimitry Andric // both the above transformations. 558*5f757f3fSDimitry Andric Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns; 559*5f757f3fSDimitry Andric if (!in16BitRange(Dist)) { 560*5f757f3fSDimitry Andric MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB); 561*5f757f3fSDimitry Andric 562*5f757f3fSDimitry Andric // Insert New_B0 into function block list. 563*5f757f3fSDimitry Andric MF->insert(++MBB->getIterator(), New_B); 564*5f757f3fSDimitry Andric 565*5f757f3fSDimitry Andric // replace B2 cond jump 566*5f757f3fSDimitry Andric if (CondJmp->getOperand(1).isReg()) 567*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) 568*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg()) 569*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(1).getReg()) 570*5f757f3fSDimitry Andric .addMBB(New_B); 571*5f757f3fSDimitry Andric else 572*5f757f3fSDimitry Andric BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode())) 573*5f757f3fSDimitry Andric .addReg(CondJmp->getOperand(0).getReg()) 574*5f757f3fSDimitry Andric .addImm(CondJmp->getOperand(1).getImm()) 575*5f757f3fSDimitry Andric .addMBB(New_B); 576*5f757f3fSDimitry Andric 577*5f757f3fSDimitry Andric if (CondTargetBB != JmpBB) 578*5f757f3fSDimitry Andric MBB->removeSuccessor(CondTargetBB); 579*5f757f3fSDimitry Andric MBB->addSuccessor(New_B); 580*5f757f3fSDimitry Andric 581*5f757f3fSDimitry Andric // Populate insn in New_B. 582*5f757f3fSDimitry Andric BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB); 583*5f757f3fSDimitry Andric 584*5f757f3fSDimitry Andric New_B->addSuccessor(CondTargetBB); 585*5f757f3fSDimitry Andric CondJmp->eraseFromParent(); 586*5f757f3fSDimitry Andric Changed = true; 587*5f757f3fSDimitry Andric } 588*5f757f3fSDimitry Andric 589*5f757f3fSDimitry Andric if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) { 590*5f757f3fSDimitry Andric BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB); 591*5f757f3fSDimitry Andric UncondJmp->eraseFromParent(); 592*5f757f3fSDimitry Andric Changed = true; 593*5f757f3fSDimitry Andric } 594*5f757f3fSDimitry Andric } 595*5f757f3fSDimitry Andric 596*5f757f3fSDimitry Andric return Changed; 597*5f757f3fSDimitry Andric } 598*5f757f3fSDimitry Andric 5990b57cec5SDimitry Andric } // end default namespace 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", 6020b57cec5SDimitry Andric "BPF PreEmit Peephole Optimization", false, false) 6030b57cec5SDimitry Andric 6040b57cec5SDimitry Andric char BPFMIPreEmitPeephole::ID = 0; 6050b57cec5SDimitry Andric FunctionPass* llvm::createBPFMIPreEmitPeepholePass() 6060b57cec5SDimitry Andric { 6070b57cec5SDimitry Andric return new BPFMIPreEmitPeephole(); 6080b57cec5SDimitry Andric } 609