1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs peephole optimizations to cleanup ugly code sequences at 10 // MachineInstruction layer. 11 // 12 // Currently, there are two optimizations implemented: 13 // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those 14 // zero extend 32-bit subregisters to 64-bit registers, if the compiler 15 // could prove the subregisters is defined by 32-bit operations in which 16 // case the upper half of the underlying 64-bit registers were zeroed 17 // implicitly. 18 // 19 // - One post-RA PreEmit pass to do final cleanup on some redundant 20 // instructions generated due to bad RA on subregister. 21 //===----------------------------------------------------------------------===// 22 23 #include "BPF.h" 24 #include "BPFInstrInfo.h" 25 #include "BPFTargetMachine.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/Support/Debug.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "bpf-mi-zext-elim" 34 35 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated"); 36 37 namespace { 38 39 struct BPFMIPeephole : public MachineFunctionPass { 40 41 static char ID; 42 const BPFInstrInfo *TII; 43 MachineFunction *MF; 44 MachineRegisterInfo *MRI; 45 46 BPFMIPeephole() : MachineFunctionPass(ID) { 47 initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry()); 48 } 49 50 private: 51 // Initialize class variables. 52 void initialize(MachineFunction &MFParm); 53 54 bool isMovFrom32Def(MachineInstr *MovMI); 55 bool eliminateZExtSeq(void); 56 57 public: 58 59 // Main entry point for this pass. 60 bool runOnMachineFunction(MachineFunction &MF) override { 61 if (skipFunction(MF.getFunction())) 62 return false; 63 64 initialize(MF); 65 66 return eliminateZExtSeq(); 67 } 68 }; 69 70 // Initialize class variables. 71 void BPFMIPeephole::initialize(MachineFunction &MFParm) { 72 MF = &MFParm; 73 MRI = &MF->getRegInfo(); 74 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 75 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); 76 } 77 78 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) 79 { 80 MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); 81 82 LLVM_DEBUG(dbgs() << " Def of Mov Src:"); 83 LLVM_DEBUG(DefInsn->dump()); 84 85 if (!DefInsn) 86 return false; 87 88 if (DefInsn->isPHI()) { 89 for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) { 90 MachineOperand &opnd = DefInsn->getOperand(i); 91 92 if (!opnd.isReg()) 93 return false; 94 95 MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); 96 // quick check on PHI incoming definitions. 97 if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY) 98 return false; 99 } 100 } 101 102 if (DefInsn->getOpcode() == BPF::COPY) { 103 MachineOperand &opnd = DefInsn->getOperand(1); 104 105 if (!opnd.isReg()) 106 return false; 107 108 Register Reg = opnd.getReg(); 109 if ((Register::isVirtualRegister(Reg) && 110 MRI->getRegClass(Reg) == &BPF::GPRRegClass)) 111 return false; 112 } 113 114 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); 115 116 return true; 117 } 118 119 bool BPFMIPeephole::eliminateZExtSeq(void) { 120 MachineInstr* ToErase = nullptr; 121 bool Eliminated = false; 122 123 for (MachineBasicBlock &MBB : *MF) { 124 for (MachineInstr &MI : MBB) { 125 // If the previous instruction was marked for elimination, remove it now. 126 if (ToErase) { 127 ToErase->eraseFromParent(); 128 ToErase = nullptr; 129 } 130 131 // Eliminate the 32-bit to 64-bit zero extension sequence when possible. 132 // 133 // MOV_32_64 rB, wA 134 // SLL_ri rB, rB, 32 135 // SRL_ri rB, rB, 32 136 if (MI.getOpcode() == BPF::SRL_ri && 137 MI.getOperand(2).getImm() == 32) { 138 Register DstReg = MI.getOperand(0).getReg(); 139 Register ShfReg = MI.getOperand(1).getReg(); 140 MachineInstr *SllMI = MRI->getVRegDef(ShfReg); 141 142 LLVM_DEBUG(dbgs() << "Starting SRL found:"); 143 LLVM_DEBUG(MI.dump()); 144 145 if (!SllMI || 146 SllMI->isPHI() || 147 SllMI->getOpcode() != BPF::SLL_ri || 148 SllMI->getOperand(2).getImm() != 32) 149 continue; 150 151 LLVM_DEBUG(dbgs() << " SLL found:"); 152 LLVM_DEBUG(SllMI->dump()); 153 154 MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); 155 if (!MovMI || 156 MovMI->isPHI() || 157 MovMI->getOpcode() != BPF::MOV_32_64) 158 continue; 159 160 LLVM_DEBUG(dbgs() << " Type cast Mov found:"); 161 LLVM_DEBUG(MovMI->dump()); 162 163 Register SubReg = MovMI->getOperand(1).getReg(); 164 if (!isMovFrom32Def(MovMI)) { 165 LLVM_DEBUG(dbgs() 166 << " One ZExt elim sequence failed qualifying elim.\n"); 167 continue; 168 } 169 170 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg) 171 .addImm(0).addReg(SubReg).addImm(BPF::sub_32); 172 173 SllMI->eraseFromParent(); 174 MovMI->eraseFromParent(); 175 // MI is the right shift, we can't erase it in it's own iteration. 176 // Mark it to ToErase, and erase in the next iteration. 177 ToErase = &MI; 178 ZExtElemNum++; 179 Eliminated = true; 180 } 181 } 182 } 183 184 return Eliminated; 185 } 186 187 } // end default namespace 188 189 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, 190 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", 191 false, false) 192 193 char BPFMIPeephole::ID = 0; 194 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } 195 196 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated"); 197 198 namespace { 199 200 struct BPFMIPreEmitPeephole : public MachineFunctionPass { 201 202 static char ID; 203 MachineFunction *MF; 204 const TargetRegisterInfo *TRI; 205 206 BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { 207 initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 208 } 209 210 private: 211 // Initialize class variables. 212 void initialize(MachineFunction &MFParm); 213 214 bool eliminateRedundantMov(void); 215 216 public: 217 218 // Main entry point for this pass. 219 bool runOnMachineFunction(MachineFunction &MF) override { 220 if (skipFunction(MF.getFunction())) 221 return false; 222 223 initialize(MF); 224 225 return eliminateRedundantMov(); 226 } 227 }; 228 229 // Initialize class variables. 230 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { 231 MF = &MFParm; 232 TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo(); 233 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); 234 } 235 236 bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { 237 MachineInstr* ToErase = nullptr; 238 bool Eliminated = false; 239 240 for (MachineBasicBlock &MBB : *MF) { 241 for (MachineInstr &MI : MBB) { 242 // If the previous instruction was marked for elimination, remove it now. 243 if (ToErase) { 244 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:"); 245 LLVM_DEBUG(ToErase->dump()); 246 ToErase->eraseFromParent(); 247 ToErase = nullptr; 248 } 249 250 // Eliminate identical move: 251 // 252 // MOV rA, rA 253 // 254 // This is particularly possible to happen when sub-register support 255 // enabled. The special type cast insn MOV_32_64 involves different 256 // register class on src (i32) and dst (i64), RA could generate useless 257 // instruction due to this. 258 unsigned Opcode = MI.getOpcode(); 259 if (Opcode == BPF::MOV_32_64 || 260 Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { 261 Register dst = MI.getOperand(0).getReg(); 262 Register src = MI.getOperand(1).getReg(); 263 264 if (Opcode == BPF::MOV_32_64) 265 dst = TRI->getSubReg(dst, BPF::sub_32); 266 267 if (dst != src) 268 continue; 269 270 ToErase = &MI; 271 RedundantMovElemNum++; 272 Eliminated = true; 273 } 274 } 275 } 276 277 return Eliminated; 278 } 279 280 } // end default namespace 281 282 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", 283 "BPF PreEmit Peephole Optimization", false, false) 284 285 char BPFMIPreEmitPeephole::ID = 0; 286 FunctionPass* llvm::createBPFMIPreEmitPeepholePass() 287 { 288 return new BPFMIPreEmitPeephole(); 289 } 290 291 STATISTIC(TruncElemNum, "Number of truncation eliminated"); 292 293 namespace { 294 295 struct BPFMIPeepholeTruncElim : public MachineFunctionPass { 296 297 static char ID; 298 const BPFInstrInfo *TII; 299 MachineFunction *MF; 300 MachineRegisterInfo *MRI; 301 302 BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { 303 initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); 304 } 305 306 private: 307 // Initialize class variables. 308 void initialize(MachineFunction &MFParm); 309 310 bool eliminateTruncSeq(void); 311 312 public: 313 314 // Main entry point for this pass. 315 bool runOnMachineFunction(MachineFunction &MF) override { 316 if (skipFunction(MF.getFunction())) 317 return false; 318 319 initialize(MF); 320 321 return eliminateTruncSeq(); 322 } 323 }; 324 325 static bool TruncSizeCompatible(int TruncSize, unsigned opcode) 326 { 327 if (TruncSize == 1) 328 return opcode == BPF::LDB || opcode == BPF::LDB32; 329 330 if (TruncSize == 2) 331 return opcode == BPF::LDH || opcode == BPF::LDH32; 332 333 if (TruncSize == 4) 334 return opcode == BPF::LDW || opcode == BPF::LDW32; 335 336 return false; 337 } 338 339 // Initialize class variables. 340 void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { 341 MF = &MFParm; 342 MRI = &MF->getRegInfo(); 343 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 344 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); 345 } 346 347 // Reg truncating is often the result of 8/16/32bit->64bit or 348 // 8/16bit->32bit conversion. If the reg value is loaded with 349 // masked byte width, the AND operation can be removed since 350 // BPF LOAD already has zero extension. 351 // 352 // This also solved a correctness issue. 353 // In BPF socket-related program, e.g., __sk_buff->{data, data_end} 354 // are 32-bit registers, but later on, kernel verifier will rewrite 355 // it with 64-bit value. Therefore, truncating the value after the 356 // load will result in incorrect code. 357 bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { 358 MachineInstr* ToErase = nullptr; 359 bool Eliminated = false; 360 361 for (MachineBasicBlock &MBB : *MF) { 362 for (MachineInstr &MI : MBB) { 363 // The second insn to remove if the eliminate candidate is a pair. 364 MachineInstr *MI2 = nullptr; 365 Register DstReg, SrcReg; 366 MachineInstr *DefMI; 367 int TruncSize = -1; 368 369 // If the previous instruction was marked for elimination, remove it now. 370 if (ToErase) { 371 ToErase->eraseFromParent(); 372 ToErase = nullptr; 373 } 374 375 // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate 376 // for BPF ANDI is i32, and this case only happens on ALU64. 377 if (MI.getOpcode() == BPF::SRL_ri && 378 MI.getOperand(2).getImm() == 32) { 379 SrcReg = MI.getOperand(1).getReg(); 380 MI2 = MRI->getVRegDef(SrcReg); 381 DstReg = MI.getOperand(0).getReg(); 382 383 if (!MI2 || 384 MI2->getOpcode() != BPF::SLL_ri || 385 MI2->getOperand(2).getImm() != 32) 386 continue; 387 388 // Update SrcReg. 389 SrcReg = MI2->getOperand(1).getReg(); 390 DefMI = MRI->getVRegDef(SrcReg); 391 if (DefMI) 392 TruncSize = 4; 393 } else if (MI.getOpcode() == BPF::AND_ri || 394 MI.getOpcode() == BPF::AND_ri_32) { 395 SrcReg = MI.getOperand(1).getReg(); 396 DstReg = MI.getOperand(0).getReg(); 397 DefMI = MRI->getVRegDef(SrcReg); 398 399 if (!DefMI) 400 continue; 401 402 int64_t imm = MI.getOperand(2).getImm(); 403 if (imm == 0xff) 404 TruncSize = 1; 405 else if (imm == 0xffff) 406 TruncSize = 2; 407 } 408 409 if (TruncSize == -1) 410 continue; 411 412 // The definition is PHI node, check all inputs. 413 if (DefMI->isPHI()) { 414 bool CheckFail = false; 415 416 for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { 417 MachineOperand &opnd = DefMI->getOperand(i); 418 if (!opnd.isReg()) { 419 CheckFail = true; 420 break; 421 } 422 423 MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); 424 if (!PhiDef || PhiDef->isPHI() || 425 !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { 426 CheckFail = true; 427 break; 428 } 429 } 430 431 if (CheckFail) 432 continue; 433 } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { 434 continue; 435 } 436 437 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) 438 .addReg(SrcReg); 439 440 if (MI2) 441 MI2->eraseFromParent(); 442 443 // Mark it to ToErase, and erase in the next iteration. 444 ToErase = &MI; 445 TruncElemNum++; 446 Eliminated = true; 447 } 448 } 449 450 return Eliminated; 451 } 452 453 } // end default namespace 454 455 INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", 456 "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", 457 false, false) 458 459 char BPFMIPeepholeTruncElim::ID = 0; 460 FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() 461 { 462 return new BPFMIPeepholeTruncElim(); 463 } 464