1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does post-instruction-selection optimizations in the GlobalISel 10 // pipeline, before the rest of codegen runs. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64.h" 15 #include "AArch64TargetMachine.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/CodeGen/GlobalISel/Utils.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineOperand.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/ErrorHandling.h" 26 27 #define DEBUG_TYPE "aarch64-post-select-optimize" 28 29 using namespace llvm; 30 31 namespace { 32 class AArch64PostSelectOptimize : public MachineFunctionPass { 33 public: 34 static char ID; 35 36 AArch64PostSelectOptimize(); 37 38 StringRef getPassName() const override { 39 return "AArch64 Post Select Optimizer"; 40 } 41 42 bool runOnMachineFunction(MachineFunction &MF) override; 43 44 void getAnalysisUsage(AnalysisUsage &AU) const override; 45 46 private: 47 bool optimizeNZCVDefs(MachineBasicBlock &MBB); 48 bool doPeepholeOpts(MachineBasicBlock &MBB); 49 /// Look for cross regclass copies that can be trivially eliminated. 50 bool foldSimpleCrossClassCopies(MachineInstr &MI); 51 }; 52 } // end anonymous namespace 53 54 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 55 AU.addRequired<TargetPassConfig>(); 56 AU.setPreservesCFG(); 57 getSelectionDAGFallbackAnalysisUsage(AU); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 61 AArch64PostSelectOptimize::AArch64PostSelectOptimize() 62 : MachineFunctionPass(ID) { 63 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 64 } 65 66 unsigned getNonFlagSettingVariant(unsigned Opc) { 67 switch (Opc) { 68 default: 69 return 0; 70 case AArch64::SUBSXrr: 71 return AArch64::SUBXrr; 72 case AArch64::SUBSWrr: 73 return AArch64::SUBWrr; 74 case AArch64::SUBSXrs: 75 return AArch64::SUBXrs; 76 case AArch64::SUBSXri: 77 return AArch64::SUBXri; 78 case AArch64::SUBSWri: 79 return AArch64::SUBWri; 80 } 81 } 82 83 bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) { 84 bool Changed = false; 85 for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) { 86 Changed |= foldSimpleCrossClassCopies(MI); 87 } 88 return Changed; 89 } 90 91 bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) { 92 auto *MF = MI.getMF(); 93 auto &MRI = MF->getRegInfo(); 94 95 if (!MI.isCopy()) 96 return false; 97 98 if (MI.getOperand(1).getSubReg()) 99 return false; // Don't deal with subreg copies 100 101 Register Src = MI.getOperand(1).getReg(); 102 Register Dst = MI.getOperand(0).getReg(); 103 104 if (Src.isPhysical() || Dst.isPhysical()) 105 return false; 106 107 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 108 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 109 110 if (SrcRC == DstRC) 111 return false; 112 113 114 if (SrcRC->hasSubClass(DstRC)) { 115 // This is the case where the source class is a superclass of the dest, so 116 // if the copy is the only user of the source, we can just constrain the 117 // source reg to the dest class. 118 119 if (!MRI.hasOneNonDBGUse(Src)) 120 return false; // Only constrain single uses of the source. 121 122 // Constrain to dst reg class as long as it's not a weird class that only 123 // has a few registers. 124 if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25)) 125 return false; 126 } else if (DstRC->hasSubClass(SrcRC)) { 127 // This is the inverse case, where the destination class is a superclass of 128 // the source. Here, if the copy is the only user, we can just constrain 129 // the user of the copy to use the smaller class of the source. 130 } else { 131 return false; 132 } 133 134 MRI.replaceRegWith(Dst, Src); 135 MI.eraseFromParent(); 136 return true; 137 } 138 139 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 140 // Consider the following code: 141 // FCMPSrr %0, %1, implicit-def $nzcv 142 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 143 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 144 // FCMPSrr %0, %1, implicit-def $nzcv 145 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 146 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 147 // when we have a single IR fcmp being used by two selects. During selection, 148 // to ensure that there can be no clobbering of nzcv between the fcmp and the 149 // csel, we have to generate an fcmp immediately before each csel is 150 // selected. 151 // However, often we can essentially CSE these together later in MachineCSE. 152 // This doesn't work though if there are unrelated flag-setting instructions 153 // in between the two FCMPs. In this case, the SUBS defines NZCV 154 // but it doesn't have any users, being overwritten by the second FCMP. 155 // 156 // Our solution here is to try to convert flag setting operations between 157 // a interval of identical FCMPs, so that CSE will be able to eliminate one. 158 bool Changed = false; 159 auto &MF = *MBB.getParent(); 160 auto &Subtarget = MF.getSubtarget(); 161 const auto &TII = Subtarget.getInstrInfo(); 162 auto TRI = Subtarget.getRegisterInfo(); 163 auto RBI = Subtarget.getRegBankInfo(); 164 auto &MRI = MF.getRegInfo(); 165 166 // The first step is to find the first and last FCMPs. If we have found 167 // at least two, then set the limit of the bottom-up walk to the first FCMP 168 // found since we're only interested in dealing with instructions between 169 // them. 170 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 171 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 172 if (MI.getOpcode() == AArch64::FCMPSrr || 173 MI.getOpcode() == AArch64::FCMPDrr) { 174 if (!FirstCmp) 175 FirstCmp = &MI; 176 else 177 LastCmp = &MI; 178 } 179 } 180 181 // In addition to converting flag-setting ops in fcmp ranges into non-flag 182 // setting ops, across the whole basic block we also detect when nzcv 183 // implicit-defs are dead, and mark them as dead. Peephole optimizations need 184 // this information later. 185 186 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 187 LRU.addLiveOuts(MBB); 188 bool NZCVDead = LRU.available(AArch64::NZCV); 189 bool InsideCmpRange = false; 190 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 191 LRU.stepBackward(II); 192 193 if (LastCmp) { // There's a range present in this block. 194 // If we're inside an fcmp range, look for begin instruction. 195 if (InsideCmpRange && &II == FirstCmp) 196 InsideCmpRange = false; 197 else if (&II == LastCmp) 198 InsideCmpRange = true; 199 } 200 201 // Did this instruction define NZCV? 202 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 203 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 204 // If we have a def and NZCV is dead, then we may convert this op. 205 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 206 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 207 if (DeadNZCVIdx != -1) { 208 // If we're inside an fcmp range, then convert flag setting ops. 209 if (InsideCmpRange && NewOpc) { 210 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 211 "op in fcmp range: " 212 << II); 213 II.setDesc(TII->get(NewOpc)); 214 II.removeOperand(DeadNZCVIdx); 215 // Changing the opcode can result in differing regclass requirements, 216 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. 217 // Constrain the regclasses, possibly introducing a copy. 218 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(), 219 II.getOperand(0), 0); 220 Changed |= true; 221 } else { 222 // Otherwise, we just set the nzcv imp-def operand to be dead, so the 223 // peephole optimizations can optimize them further. 224 II.getOperand(DeadNZCVIdx).setIsDead(); 225 } 226 } 227 } 228 229 NZCVDead = NZCVDeadAtCurrInstr; 230 } 231 return Changed; 232 } 233 234 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 235 if (MF.getProperties().hasProperty( 236 MachineFunctionProperties::Property::FailedISel)) 237 return false; 238 assert(MF.getProperties().hasProperty( 239 MachineFunctionProperties::Property::Selected) && 240 "Expected a selected MF"); 241 242 bool Changed = false; 243 for (auto &BB : MF) { 244 Changed |= optimizeNZCVDefs(BB); 245 Changed |= doPeepholeOpts(BB); 246 } 247 return Changed; 248 } 249 250 char AArch64PostSelectOptimize::ID = 0; 251 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 252 "Optimize AArch64 selected instructions", 253 false, false) 254 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 255 "Optimize AArch64 selected instructions", false, 256 false) 257 258 namespace llvm { 259 FunctionPass *createAArch64PostSelectOptimize() { 260 return new AArch64PostSelectOptimize(); 261 } 262 } // end namespace llvm 263