1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does post-instruction-selection optimizations in the GlobalISel 10 // pipeline, before the rest of codegen runs. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64.h" 15 #include "AArch64TargetMachine.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 #include "llvm/CodeGen/MachineOperand.h" 20 #include "llvm/CodeGen/TargetPassConfig.h" 21 #include "llvm/Support/Debug.h" 22 23 #define DEBUG_TYPE "aarch64-post-select-optimize" 24 25 using namespace llvm; 26 27 namespace { 28 class AArch64PostSelectOptimize : public MachineFunctionPass { 29 public: 30 static char ID; 31 32 AArch64PostSelectOptimize(); 33 34 StringRef getPassName() const override { 35 return "AArch64 Post Select Optimizer"; 36 } 37 38 bool runOnMachineFunction(MachineFunction &MF) override; 39 40 void getAnalysisUsage(AnalysisUsage &AU) const override; 41 42 private: 43 bool optimizeNZCVDefs(MachineBasicBlock &MBB); 44 }; 45 } // end anonymous namespace 46 47 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 48 AU.addRequired<TargetPassConfig>(); 49 AU.setPreservesCFG(); 50 getSelectionDAGFallbackAnalysisUsage(AU); 51 MachineFunctionPass::getAnalysisUsage(AU); 52 } 53 54 AArch64PostSelectOptimize::AArch64PostSelectOptimize() 55 : MachineFunctionPass(ID) { 56 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 57 } 58 59 unsigned getNonFlagSettingVariant(unsigned Opc) { 60 switch (Opc) { 61 default: 62 return 0; 63 case AArch64::SUBSXrr: 64 return AArch64::SUBXrr; 65 case AArch64::SUBSWrr: 66 return AArch64::SUBWrr; 67 case AArch64::SUBSXrs: 68 return AArch64::SUBXrs; 69 case AArch64::SUBSXri: 70 return AArch64::SUBXri; 71 case AArch64::SUBSWri: 72 return AArch64::SUBWri; 73 } 74 } 75 76 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 77 // Consider the following code: 78 // FCMPSrr %0, %1, implicit-def $nzcv 79 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 80 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 81 // FCMPSrr %0, %1, implicit-def $nzcv 82 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 83 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 84 // when we have a single IR fcmp being used by two selects. During selection, 85 // to ensure that there can be no clobbering of nzcv between the fcmp and the 86 // csel, we have to generate an fcmp immediately before each csel is 87 // selected. 88 // However, often we can essentially CSE these together later in MachineCSE. 89 // This doesn't work though if there are unrelated flag-setting instructions 90 // in between the two FCMPs. In this case, the SUBS defines NZCV 91 // but it doesn't have any users, being overwritten by the second FCMP. 92 // 93 // Our solution here is to try to convert flag setting operations between 94 // a interval of identical FCMPs, so that CSE will be able to eliminate one. 95 bool Changed = false; 96 const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); 97 98 // The first step is to find the first and last FCMPs. If we have found 99 // at least two, then set the limit of the bottom-up walk to the first FCMP 100 // found since we're only interested in dealing with instructions between 101 // them. 102 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 103 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 104 if (MI.getOpcode() == AArch64::FCMPSrr || 105 MI.getOpcode() == AArch64::FCMPDrr) { 106 if (!FirstCmp) 107 FirstCmp = &MI; 108 else 109 LastCmp = &MI; 110 } 111 } 112 113 // In addition to converting flag-setting ops in fcmp ranges into non-flag 114 // setting ops, across the whole basic block we also detect when nzcv 115 // implicit-defs are dead, and mark them as dead. Peephole optimizations need 116 // this information later. 117 118 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 119 LRU.addLiveOuts(MBB); 120 bool NZCVDead = LRU.available(AArch64::NZCV); 121 bool InsideCmpRange = false; 122 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 123 LRU.stepBackward(II); 124 125 if (LastCmp) { // There's a range present in this block. 126 // If we're inside an fcmp range, look for begin instruction. 127 if (InsideCmpRange && &II == FirstCmp) 128 InsideCmpRange = false; 129 else if (&II == LastCmp) 130 InsideCmpRange = true; 131 } 132 133 // Did this instruction define NZCV? 134 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 135 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 136 // If we have a def and NZCV is dead, then we may convert this op. 137 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 138 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 139 if (DeadNZCVIdx != -1) { 140 // If we're inside an fcmp range, then convert flag setting ops. 141 if (InsideCmpRange && NewOpc) { 142 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 143 "op in fcmp range: " 144 << II); 145 II.setDesc(TII->get(NewOpc)); 146 II.RemoveOperand(DeadNZCVIdx); 147 Changed |= true; 148 } else { 149 // Otherwise, we just set the nzcv imp-def operand to be dead, so the 150 // peephole optimizations can optimize them further. 151 II.getOperand(DeadNZCVIdx).setIsDead(); 152 } 153 } 154 } 155 156 NZCVDead = NZCVDeadAtCurrInstr; 157 } 158 return Changed; 159 } 160 161 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 162 if (MF.getProperties().hasProperty( 163 MachineFunctionProperties::Property::FailedISel)) 164 return false; 165 assert(MF.getProperties().hasProperty( 166 MachineFunctionProperties::Property::Selected) && 167 "Expected a selected MF"); 168 169 bool Changed = false; 170 for (auto &BB : MF) 171 Changed |= optimizeNZCVDefs(BB); 172 return true; 173 } 174 175 char AArch64PostSelectOptimize::ID = 0; 176 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 177 "Optimize AArch64 selected instructions", 178 false, false) 179 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 180 "Optimize AArch64 selected instructions", false, 181 false) 182 183 namespace llvm { 184 FunctionPass *createAArch64PostSelectOptimize() { 185 return new AArch64PostSelectOptimize(); 186 } 187 } // end namespace llvm 188