1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs peephole optimizations to cleanup ugly code sequences at 10 // MachineInstruction layer. 11 // 12 // Currently, there are two optimizations implemented: 13 // - One pre-RA MachineSSA pass to eliminate type promotion sequences, those 14 // zero extend 32-bit subregisters to 64-bit registers, if the compiler 15 // could prove the subregisters is defined by 32-bit operations in which 16 // case the upper half of the underlying 64-bit registers were zeroed 17 // implicitly. 18 // 19 // - One post-RA PreEmit pass to do final cleanup on some redundant 20 // instructions generated due to bad RA on subregister. 21 //===----------------------------------------------------------------------===// 22 23 #include "BPF.h" 24 #include "BPFInstrInfo.h" 25 #include "BPFTargetMachine.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/Support/Debug.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "bpf-mi-zext-elim" 34 35 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated"); 36 37 namespace { 38 39 struct BPFMIPeephole : public MachineFunctionPass { 40 41 static char ID; 42 const BPFInstrInfo *TII; 43 MachineFunction *MF; 44 MachineRegisterInfo *MRI; 45 46 BPFMIPeephole() : MachineFunctionPass(ID) { 47 initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry()); 48 } 49 50 private: 51 // Initialize class variables. 52 void initialize(MachineFunction &MFParm); 53 54 bool isCopyFrom32Def(MachineInstr *CopyMI); 55 bool isInsnFrom32Def(MachineInstr *DefInsn); 56 bool isPhiFrom32Def(MachineInstr *MovMI); 57 bool isMovFrom32Def(MachineInstr *MovMI); 58 bool eliminateZExtSeq(void); 59 60 std::set<MachineInstr *> PhiInsns; 61 62 public: 63 64 // Main entry point for this pass. 65 bool runOnMachineFunction(MachineFunction &MF) override { 66 if (skipFunction(MF.getFunction())) 67 return false; 68 69 initialize(MF); 70 71 return eliminateZExtSeq(); 72 } 73 }; 74 75 // Initialize class variables. 76 void BPFMIPeephole::initialize(MachineFunction &MFParm) { 77 MF = &MFParm; 78 MRI = &MF->getRegInfo(); 79 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 80 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n"); 81 } 82 83 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI) 84 { 85 MachineOperand &opnd = CopyMI->getOperand(1); 86 87 if (!opnd.isReg()) 88 return false; 89 90 // Return false if getting value from a 32bit physical register. 91 // Most likely, this physical register is aliased to 92 // function call return value or current function parameters. 93 Register Reg = opnd.getReg(); 94 if (!Register::isVirtualRegister(Reg)) 95 return false; 96 97 if (MRI->getRegClass(Reg) == &BPF::GPRRegClass) 98 return false; 99 100 MachineInstr *DefInsn = MRI->getVRegDef(Reg); 101 if (!isInsnFrom32Def(DefInsn)) 102 return false; 103 104 return true; 105 } 106 107 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI) 108 { 109 for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) { 110 MachineOperand &opnd = PhiMI->getOperand(i); 111 112 if (!opnd.isReg()) 113 return false; 114 115 MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); 116 if (!PhiDef) 117 return false; 118 if (PhiDef->isPHI()) { 119 if (PhiInsns.find(PhiDef) != PhiInsns.end()) 120 return false; 121 PhiInsns.insert(PhiDef); 122 if (!isPhiFrom32Def(PhiDef)) 123 return false; 124 } 125 if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef)) 126 return false; 127 } 128 129 return true; 130 } 131 132 // The \p DefInsn instruction defines a virtual register. 133 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn) 134 { 135 if (!DefInsn) 136 return false; 137 138 if (DefInsn->isPHI()) { 139 if (PhiInsns.find(DefInsn) != PhiInsns.end()) 140 return false; 141 PhiInsns.insert(DefInsn); 142 if (!isPhiFrom32Def(DefInsn)) 143 return false; 144 } else if (DefInsn->getOpcode() == BPF::COPY) { 145 if (!isCopyFrom32Def(DefInsn)) 146 return false; 147 } 148 149 return true; 150 } 151 152 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI) 153 { 154 MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg()); 155 156 LLVM_DEBUG(dbgs() << " Def of Mov Src:"); 157 LLVM_DEBUG(DefInsn->dump()); 158 159 PhiInsns.clear(); 160 if (!isInsnFrom32Def(DefInsn)) 161 return false; 162 163 LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n"); 164 165 return true; 166 } 167 168 bool BPFMIPeephole::eliminateZExtSeq(void) { 169 MachineInstr* ToErase = nullptr; 170 bool Eliminated = false; 171 172 for (MachineBasicBlock &MBB : *MF) { 173 for (MachineInstr &MI : MBB) { 174 // If the previous instruction was marked for elimination, remove it now. 175 if (ToErase) { 176 ToErase->eraseFromParent(); 177 ToErase = nullptr; 178 } 179 180 // Eliminate the 32-bit to 64-bit zero extension sequence when possible. 181 // 182 // MOV_32_64 rB, wA 183 // SLL_ri rB, rB, 32 184 // SRL_ri rB, rB, 32 185 if (MI.getOpcode() == BPF::SRL_ri && 186 MI.getOperand(2).getImm() == 32) { 187 Register DstReg = MI.getOperand(0).getReg(); 188 Register ShfReg = MI.getOperand(1).getReg(); 189 MachineInstr *SllMI = MRI->getVRegDef(ShfReg); 190 191 LLVM_DEBUG(dbgs() << "Starting SRL found:"); 192 LLVM_DEBUG(MI.dump()); 193 194 if (!SllMI || 195 SllMI->isPHI() || 196 SllMI->getOpcode() != BPF::SLL_ri || 197 SllMI->getOperand(2).getImm() != 32) 198 continue; 199 200 LLVM_DEBUG(dbgs() << " SLL found:"); 201 LLVM_DEBUG(SllMI->dump()); 202 203 MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg()); 204 if (!MovMI || 205 MovMI->isPHI() || 206 MovMI->getOpcode() != BPF::MOV_32_64) 207 continue; 208 209 LLVM_DEBUG(dbgs() << " Type cast Mov found:"); 210 LLVM_DEBUG(MovMI->dump()); 211 212 Register SubReg = MovMI->getOperand(1).getReg(); 213 if (!isMovFrom32Def(MovMI)) { 214 LLVM_DEBUG(dbgs() 215 << " One ZExt elim sequence failed qualifying elim.\n"); 216 continue; 217 } 218 219 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg) 220 .addImm(0).addReg(SubReg).addImm(BPF::sub_32); 221 222 SllMI->eraseFromParent(); 223 MovMI->eraseFromParent(); 224 // MI is the right shift, we can't erase it in it's own iteration. 225 // Mark it to ToErase, and erase in the next iteration. 226 ToErase = &MI; 227 ZExtElemNum++; 228 Eliminated = true; 229 } 230 } 231 } 232 233 return Eliminated; 234 } 235 236 } // end default namespace 237 238 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE, 239 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate", 240 false, false) 241 242 char BPFMIPeephole::ID = 0; 243 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); } 244 245 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated"); 246 247 namespace { 248 249 struct BPFMIPreEmitPeephole : public MachineFunctionPass { 250 251 static char ID; 252 MachineFunction *MF; 253 const TargetRegisterInfo *TRI; 254 255 BPFMIPreEmitPeephole() : MachineFunctionPass(ID) { 256 initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); 257 } 258 259 private: 260 // Initialize class variables. 261 void initialize(MachineFunction &MFParm); 262 263 bool eliminateRedundantMov(void); 264 265 public: 266 267 // Main entry point for this pass. 268 bool runOnMachineFunction(MachineFunction &MF) override { 269 if (skipFunction(MF.getFunction())) 270 return false; 271 272 initialize(MF); 273 274 return eliminateRedundantMov(); 275 } 276 }; 277 278 // Initialize class variables. 279 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) { 280 MF = &MFParm; 281 TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo(); 282 LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n"); 283 } 284 285 bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) { 286 MachineInstr* ToErase = nullptr; 287 bool Eliminated = false; 288 289 for (MachineBasicBlock &MBB : *MF) { 290 for (MachineInstr &MI : MBB) { 291 // If the previous instruction was marked for elimination, remove it now. 292 if (ToErase) { 293 LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:"); 294 LLVM_DEBUG(ToErase->dump()); 295 ToErase->eraseFromParent(); 296 ToErase = nullptr; 297 } 298 299 // Eliminate identical move: 300 // 301 // MOV rA, rA 302 // 303 // This is particularly possible to happen when sub-register support 304 // enabled. The special type cast insn MOV_32_64 involves different 305 // register class on src (i32) and dst (i64), RA could generate useless 306 // instruction due to this. 307 unsigned Opcode = MI.getOpcode(); 308 if (Opcode == BPF::MOV_32_64 || 309 Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) { 310 Register dst = MI.getOperand(0).getReg(); 311 Register src = MI.getOperand(1).getReg(); 312 313 if (Opcode == BPF::MOV_32_64) 314 dst = TRI->getSubReg(dst, BPF::sub_32); 315 316 if (dst != src) 317 continue; 318 319 ToErase = &MI; 320 RedundantMovElemNum++; 321 Eliminated = true; 322 } 323 } 324 } 325 326 return Eliminated; 327 } 328 329 } // end default namespace 330 331 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole", 332 "BPF PreEmit Peephole Optimization", false, false) 333 334 char BPFMIPreEmitPeephole::ID = 0; 335 FunctionPass* llvm::createBPFMIPreEmitPeepholePass() 336 { 337 return new BPFMIPreEmitPeephole(); 338 } 339 340 STATISTIC(TruncElemNum, "Number of truncation eliminated"); 341 342 namespace { 343 344 struct BPFMIPeepholeTruncElim : public MachineFunctionPass { 345 346 static char ID; 347 const BPFInstrInfo *TII; 348 MachineFunction *MF; 349 MachineRegisterInfo *MRI; 350 351 BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { 352 initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); 353 } 354 355 private: 356 // Initialize class variables. 357 void initialize(MachineFunction &MFParm); 358 359 bool eliminateTruncSeq(void); 360 361 public: 362 363 // Main entry point for this pass. 364 bool runOnMachineFunction(MachineFunction &MF) override { 365 if (skipFunction(MF.getFunction())) 366 return false; 367 368 initialize(MF); 369 370 return eliminateTruncSeq(); 371 } 372 }; 373 374 static bool TruncSizeCompatible(int TruncSize, unsigned opcode) 375 { 376 if (TruncSize == 1) 377 return opcode == BPF::LDB || opcode == BPF::LDB32; 378 379 if (TruncSize == 2) 380 return opcode == BPF::LDH || opcode == BPF::LDH32; 381 382 if (TruncSize == 4) 383 return opcode == BPF::LDW || opcode == BPF::LDW32; 384 385 return false; 386 } 387 388 // Initialize class variables. 389 void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { 390 MF = &MFParm; 391 MRI = &MF->getRegInfo(); 392 TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo(); 393 LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); 394 } 395 396 // Reg truncating is often the result of 8/16/32bit->64bit or 397 // 8/16bit->32bit conversion. If the reg value is loaded with 398 // masked byte width, the AND operation can be removed since 399 // BPF LOAD already has zero extension. 400 // 401 // This also solved a correctness issue. 402 // In BPF socket-related program, e.g., __sk_buff->{data, data_end} 403 // are 32-bit registers, but later on, kernel verifier will rewrite 404 // it with 64-bit value. Therefore, truncating the value after the 405 // load will result in incorrect code. 406 bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { 407 MachineInstr* ToErase = nullptr; 408 bool Eliminated = false; 409 410 for (MachineBasicBlock &MBB : *MF) { 411 for (MachineInstr &MI : MBB) { 412 // The second insn to remove if the eliminate candidate is a pair. 413 MachineInstr *MI2 = nullptr; 414 Register DstReg, SrcReg; 415 MachineInstr *DefMI; 416 int TruncSize = -1; 417 418 // If the previous instruction was marked for elimination, remove it now. 419 if (ToErase) { 420 ToErase->eraseFromParent(); 421 ToErase = nullptr; 422 } 423 424 // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate 425 // for BPF ANDI is i32, and this case only happens on ALU64. 426 if (MI.getOpcode() == BPF::SRL_ri && 427 MI.getOperand(2).getImm() == 32) { 428 SrcReg = MI.getOperand(1).getReg(); 429 MI2 = MRI->getVRegDef(SrcReg); 430 DstReg = MI.getOperand(0).getReg(); 431 432 if (!MI2 || 433 MI2->getOpcode() != BPF::SLL_ri || 434 MI2->getOperand(2).getImm() != 32) 435 continue; 436 437 // Update SrcReg. 438 SrcReg = MI2->getOperand(1).getReg(); 439 DefMI = MRI->getVRegDef(SrcReg); 440 if (DefMI) 441 TruncSize = 4; 442 } else if (MI.getOpcode() == BPF::AND_ri || 443 MI.getOpcode() == BPF::AND_ri_32) { 444 SrcReg = MI.getOperand(1).getReg(); 445 DstReg = MI.getOperand(0).getReg(); 446 DefMI = MRI->getVRegDef(SrcReg); 447 448 if (!DefMI) 449 continue; 450 451 int64_t imm = MI.getOperand(2).getImm(); 452 if (imm == 0xff) 453 TruncSize = 1; 454 else if (imm == 0xffff) 455 TruncSize = 2; 456 } 457 458 if (TruncSize == -1) 459 continue; 460 461 // The definition is PHI node, check all inputs. 462 if (DefMI->isPHI()) { 463 bool CheckFail = false; 464 465 for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { 466 MachineOperand &opnd = DefMI->getOperand(i); 467 if (!opnd.isReg()) { 468 CheckFail = true; 469 break; 470 } 471 472 MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); 473 if (!PhiDef || PhiDef->isPHI() || 474 !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { 475 CheckFail = true; 476 break; 477 } 478 } 479 480 if (CheckFail) 481 continue; 482 } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { 483 continue; 484 } 485 486 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) 487 .addReg(SrcReg); 488 489 if (MI2) 490 MI2->eraseFromParent(); 491 492 // Mark it to ToErase, and erase in the next iteration. 493 ToErase = &MI; 494 TruncElemNum++; 495 Eliminated = true; 496 } 497 } 498 499 return Eliminated; 500 } 501 502 } // end default namespace 503 504 INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", 505 "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", 506 false, false) 507 508 char BPFMIPeepholeTruncElim::ID = 0; 509 FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() 510 { 511 return new BPFMIPeepholeTruncElim(); 512 } 513