xref: /freebsd/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp (revision b37d1deb045d7bc7877fb1d9afdb39d43130dcc4)
1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
11 //
12 // Currently, there are two optimizations implemented:
13 //  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 //    zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 //    could prove the subregisters is defined by 32-bit operations in which
16 //    case the upper half of the underlying 64-bit registers were zeroed
17 //    implicitly.
18 //
19 //  - One post-RA PreEmit pass to do final cleanup on some redundant
20 //    instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
22 
23 #include "BPF.h"
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include <set>
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "bpf-mi-zext-elim"
36 
37 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
38 
39 namespace {
40 
41 struct BPFMIPeephole : public MachineFunctionPass {
42 
43   static char ID;
44   const BPFInstrInfo *TII;
45   MachineFunction *MF;
46   MachineRegisterInfo *MRI;
47 
48   BPFMIPeephole() : MachineFunctionPass(ID) {
49     initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
50   }
51 
52 private:
53   // Initialize class variables.
54   void initialize(MachineFunction &MFParm);
55 
56   bool isCopyFrom32Def(MachineInstr *CopyMI);
57   bool isInsnFrom32Def(MachineInstr *DefInsn);
58   bool isPhiFrom32Def(MachineInstr *MovMI);
59   bool isMovFrom32Def(MachineInstr *MovMI);
60   bool eliminateZExtSeq();
61   bool eliminateZExt();
62 
63   std::set<MachineInstr *> PhiInsns;
64 
65 public:
66 
67   // Main entry point for this pass.
68   bool runOnMachineFunction(MachineFunction &MF) override {
69     if (skipFunction(MF.getFunction()))
70       return false;
71 
72     initialize(MF);
73 
74     // First try to eliminate (zext, lshift, rshift) and then
75     // try to eliminate zext.
76     bool ZExtSeqExist, ZExtExist;
77     ZExtSeqExist = eliminateZExtSeq();
78     ZExtExist = eliminateZExt();
79     return ZExtSeqExist || ZExtExist;
80   }
81 };
82 
83 // Initialize class variables.
84 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
85   MF = &MFParm;
86   MRI = &MF->getRegInfo();
87   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
88   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
89 }
90 
91 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
92 {
93   MachineOperand &opnd = CopyMI->getOperand(1);
94 
95   if (!opnd.isReg())
96     return false;
97 
98   // Return false if getting value from a 32bit physical register.
99   // Most likely, this physical register is aliased to
100   // function call return value or current function parameters.
101   Register Reg = opnd.getReg();
102   if (!Register::isVirtualRegister(Reg))
103     return false;
104 
105   if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
106     return false;
107 
108   MachineInstr *DefInsn = MRI->getVRegDef(Reg);
109   if (!isInsnFrom32Def(DefInsn))
110     return false;
111 
112   return true;
113 }
114 
115 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
116 {
117   for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
118     MachineOperand &opnd = PhiMI->getOperand(i);
119 
120     if (!opnd.isReg())
121       return false;
122 
123     MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
124     if (!PhiDef)
125       return false;
126     if (PhiDef->isPHI()) {
127       if (!PhiInsns.insert(PhiDef).second)
128         return false;
129       if (!isPhiFrom32Def(PhiDef))
130         return false;
131     }
132     if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
133       return false;
134   }
135 
136   return true;
137 }
138 
139 // The \p DefInsn instruction defines a virtual register.
140 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
141 {
142   if (!DefInsn)
143     return false;
144 
145   if (DefInsn->isPHI()) {
146     if (!PhiInsns.insert(DefInsn).second)
147       return false;
148     if (!isPhiFrom32Def(DefInsn))
149       return false;
150   } else if (DefInsn->getOpcode() == BPF::COPY) {
151     if (!isCopyFrom32Def(DefInsn))
152       return false;
153   }
154 
155   return true;
156 }
157 
158 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
159 {
160   MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
161 
162   LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
163   LLVM_DEBUG(DefInsn->dump());
164 
165   PhiInsns.clear();
166   if (!isInsnFrom32Def(DefInsn))
167     return false;
168 
169   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
170 
171   return true;
172 }
173 
174 bool BPFMIPeephole::eliminateZExtSeq() {
175   MachineInstr* ToErase = nullptr;
176   bool Eliminated = false;
177 
178   for (MachineBasicBlock &MBB : *MF) {
179     for (MachineInstr &MI : MBB) {
180       // If the previous instruction was marked for elimination, remove it now.
181       if (ToErase) {
182         ToErase->eraseFromParent();
183         ToErase = nullptr;
184       }
185 
186       // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
187       //
188       //   MOV_32_64 rB, wA
189       //   SLL_ri    rB, rB, 32
190       //   SRL_ri    rB, rB, 32
191       if (MI.getOpcode() == BPF::SRL_ri &&
192           MI.getOperand(2).getImm() == 32) {
193         Register DstReg = MI.getOperand(0).getReg();
194         Register ShfReg = MI.getOperand(1).getReg();
195         MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
196 
197         LLVM_DEBUG(dbgs() << "Starting SRL found:");
198         LLVM_DEBUG(MI.dump());
199 
200         if (!SllMI ||
201             SllMI->isPHI() ||
202             SllMI->getOpcode() != BPF::SLL_ri ||
203             SllMI->getOperand(2).getImm() != 32)
204           continue;
205 
206         LLVM_DEBUG(dbgs() << "  SLL found:");
207         LLVM_DEBUG(SllMI->dump());
208 
209         MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
210         if (!MovMI ||
211             MovMI->isPHI() ||
212             MovMI->getOpcode() != BPF::MOV_32_64)
213           continue;
214 
215         LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
216         LLVM_DEBUG(MovMI->dump());
217 
218         Register SubReg = MovMI->getOperand(1).getReg();
219         if (!isMovFrom32Def(MovMI)) {
220           LLVM_DEBUG(dbgs()
221                      << "  One ZExt elim sequence failed qualifying elim.\n");
222           continue;
223         }
224 
225         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
226           .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
227 
228         SllMI->eraseFromParent();
229         MovMI->eraseFromParent();
230         // MI is the right shift, we can't erase it in it's own iteration.
231         // Mark it to ToErase, and erase in the next iteration.
232         ToErase = &MI;
233         ZExtElemNum++;
234         Eliminated = true;
235       }
236     }
237   }
238 
239   return Eliminated;
240 }
241 
242 bool BPFMIPeephole::eliminateZExt() {
243   MachineInstr* ToErase = nullptr;
244   bool Eliminated = false;
245 
246   for (MachineBasicBlock &MBB : *MF) {
247     for (MachineInstr &MI : MBB) {
248       // If the previous instruction was marked for elimination, remove it now.
249       if (ToErase) {
250         ToErase->eraseFromParent();
251         ToErase = nullptr;
252       }
253 
254       if (MI.getOpcode() != BPF::MOV_32_64)
255         continue;
256 
257       // Eliminate MOV_32_64 if possible.
258       //   MOV_32_64 rA, wB
259       //
260       // If wB has been zero extended, replace it with a SUBREG_TO_REG.
261       // This is to workaround BPF programs where pkt->{data, data_end}
262       // is encoded as u32, but actually the verifier populates them
263       // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
264       LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
265       LLVM_DEBUG(MI.dump());
266 
267       if (!isMovFrom32Def(&MI))
268         continue;
269 
270       LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
271 
272       Register dst = MI.getOperand(0).getReg();
273       Register src = MI.getOperand(1).getReg();
274 
275       // Build a SUBREG_TO_REG instruction.
276       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
277         .addImm(0).addReg(src).addImm(BPF::sub_32);
278 
279       ToErase = &MI;
280       Eliminated = true;
281     }
282   }
283 
284   return Eliminated;
285 }
286 
287 } // end default namespace
288 
289 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
290                 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
291                 false, false)
292 
293 char BPFMIPeephole::ID = 0;
294 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
295 
296 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
297 
298 namespace {
299 
300 struct BPFMIPreEmitPeephole : public MachineFunctionPass {
301 
302   static char ID;
303   MachineFunction *MF;
304   const TargetRegisterInfo *TRI;
305 
306   BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
307     initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
308   }
309 
310 private:
311   // Initialize class variables.
312   void initialize(MachineFunction &MFParm);
313 
314   bool eliminateRedundantMov();
315 
316 public:
317 
318   // Main entry point for this pass.
319   bool runOnMachineFunction(MachineFunction &MF) override {
320     if (skipFunction(MF.getFunction()))
321       return false;
322 
323     initialize(MF);
324 
325     return eliminateRedundantMov();
326   }
327 };
328 
329 // Initialize class variables.
330 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
331   MF = &MFParm;
332   TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
333   LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
334 }
335 
336 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
337   MachineInstr* ToErase = nullptr;
338   bool Eliminated = false;
339 
340   for (MachineBasicBlock &MBB : *MF) {
341     for (MachineInstr &MI : MBB) {
342       // If the previous instruction was marked for elimination, remove it now.
343       if (ToErase) {
344         LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
345         LLVM_DEBUG(ToErase->dump());
346         ToErase->eraseFromParent();
347         ToErase = nullptr;
348       }
349 
350       // Eliminate identical move:
351       //
352       //   MOV rA, rA
353       //
354       // Note that we cannot remove
355       //   MOV_32_64  rA, wA
356       //   MOV_rr_32  wA, wA
357       // as these two instructions having side effects, zeroing out
358       // top 32 bits of rA.
359       unsigned Opcode = MI.getOpcode();
360       if (Opcode == BPF::MOV_rr) {
361         Register dst = MI.getOperand(0).getReg();
362         Register src = MI.getOperand(1).getReg();
363 
364         if (dst != src)
365           continue;
366 
367         ToErase = &MI;
368         RedundantMovElemNum++;
369         Eliminated = true;
370       }
371     }
372   }
373 
374   return Eliminated;
375 }
376 
377 } // end default namespace
378 
379 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
380                 "BPF PreEmit Peephole Optimization", false, false)
381 
382 char BPFMIPreEmitPeephole::ID = 0;
383 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
384 {
385   return new BPFMIPreEmitPeephole();
386 }
387 
388 STATISTIC(TruncElemNum, "Number of truncation eliminated");
389 
390 namespace {
391 
392 struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
393 
394   static char ID;
395   const BPFInstrInfo *TII;
396   MachineFunction *MF;
397   MachineRegisterInfo *MRI;
398 
399   BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
400     initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
401   }
402 
403 private:
404   // Initialize class variables.
405   void initialize(MachineFunction &MFParm);
406 
407   bool eliminateTruncSeq();
408 
409 public:
410 
411   // Main entry point for this pass.
412   bool runOnMachineFunction(MachineFunction &MF) override {
413     if (skipFunction(MF.getFunction()))
414       return false;
415 
416     initialize(MF);
417 
418     return eliminateTruncSeq();
419   }
420 };
421 
422 static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
423 {
424   if (TruncSize == 1)
425     return opcode == BPF::LDB || opcode == BPF::LDB32;
426 
427   if (TruncSize == 2)
428     return opcode == BPF::LDH || opcode == BPF::LDH32;
429 
430   if (TruncSize == 4)
431     return opcode == BPF::LDW || opcode == BPF::LDW32;
432 
433   return false;
434 }
435 
436 // Initialize class variables.
437 void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
438   MF = &MFParm;
439   MRI = &MF->getRegInfo();
440   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
441   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
442 }
443 
444 // Reg truncating is often the result of 8/16/32bit->64bit or
445 // 8/16bit->32bit conversion. If the reg value is loaded with
446 // masked byte width, the AND operation can be removed since
447 // BPF LOAD already has zero extension.
448 //
449 // This also solved a correctness issue.
450 // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
451 // are 32-bit registers, but later on, kernel verifier will rewrite
452 // it with 64-bit value. Therefore, truncating the value after the
453 // load will result in incorrect code.
454 bool BPFMIPeepholeTruncElim::eliminateTruncSeq() {
455   MachineInstr* ToErase = nullptr;
456   bool Eliminated = false;
457 
458   for (MachineBasicBlock &MBB : *MF) {
459     for (MachineInstr &MI : MBB) {
460       // The second insn to remove if the eliminate candidate is a pair.
461       MachineInstr *MI2 = nullptr;
462       Register DstReg, SrcReg;
463       MachineInstr *DefMI;
464       int TruncSize = -1;
465 
466       // If the previous instruction was marked for elimination, remove it now.
467       if (ToErase) {
468         ToErase->eraseFromParent();
469         ToErase = nullptr;
470       }
471 
472       // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
473       // for BPF ANDI is i32, and this case only happens on ALU64.
474       if (MI.getOpcode() == BPF::SRL_ri &&
475           MI.getOperand(2).getImm() == 32) {
476         SrcReg = MI.getOperand(1).getReg();
477         if (!MRI->hasOneNonDBGUse(SrcReg))
478           continue;
479 
480         MI2 = MRI->getVRegDef(SrcReg);
481         DstReg = MI.getOperand(0).getReg();
482 
483         if (!MI2 ||
484             MI2->getOpcode() != BPF::SLL_ri ||
485             MI2->getOperand(2).getImm() != 32)
486           continue;
487 
488         // Update SrcReg.
489         SrcReg = MI2->getOperand(1).getReg();
490         DefMI = MRI->getVRegDef(SrcReg);
491         if (DefMI)
492           TruncSize = 4;
493       } else if (MI.getOpcode() == BPF::AND_ri ||
494                  MI.getOpcode() == BPF::AND_ri_32) {
495         SrcReg = MI.getOperand(1).getReg();
496         DstReg = MI.getOperand(0).getReg();
497         DefMI = MRI->getVRegDef(SrcReg);
498 
499         if (!DefMI)
500           continue;
501 
502         int64_t imm = MI.getOperand(2).getImm();
503         if (imm == 0xff)
504           TruncSize = 1;
505         else if (imm == 0xffff)
506           TruncSize = 2;
507       }
508 
509       if (TruncSize == -1)
510         continue;
511 
512       // The definition is PHI node, check all inputs.
513       if (DefMI->isPHI()) {
514         bool CheckFail = false;
515 
516         for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
517           MachineOperand &opnd = DefMI->getOperand(i);
518           if (!opnd.isReg()) {
519             CheckFail = true;
520             break;
521           }
522 
523           MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
524           if (!PhiDef || PhiDef->isPHI() ||
525               !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
526             CheckFail = true;
527             break;
528           }
529         }
530 
531         if (CheckFail)
532           continue;
533       } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
534         continue;
535       }
536 
537       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
538               .addReg(SrcReg);
539 
540       if (MI2)
541         MI2->eraseFromParent();
542 
543       // Mark it to ToErase, and erase in the next iteration.
544       ToErase = &MI;
545       TruncElemNum++;
546       Eliminated = true;
547     }
548   }
549 
550   return Eliminated;
551 }
552 
553 } // end default namespace
554 
555 INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
556                 "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
557                 false, false)
558 
559 char BPFMIPeepholeTruncElim::ID = 0;
560 FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
561 {
562   return new BPFMIPeepholeTruncElim();
563 }
564