1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does post-instruction-selection optimizations in the GlobalISel 10 // pipeline, before the rest of codegen runs. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64.h" 15 #include "AArch64TargetMachine.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/CodeGen/GlobalISel/Utils.h" 19 #include "llvm/CodeGen/MachineBasicBlock.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineOperand.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/ErrorHandling.h" 26 27 #define DEBUG_TYPE "aarch64-post-select-optimize" 28 29 using namespace llvm; 30 31 namespace { 32 class AArch64PostSelectOptimize : public MachineFunctionPass { 33 public: 34 static char ID; 35 36 AArch64PostSelectOptimize(); 37 38 StringRef getPassName() const override { 39 return "AArch64 Post Select Optimizer"; 40 } 41 42 bool runOnMachineFunction(MachineFunction &MF) override; 43 44 void getAnalysisUsage(AnalysisUsage &AU) const override; 45 46 private: 47 bool optimizeNZCVDefs(MachineBasicBlock &MBB); 48 bool doPeepholeOpts(MachineBasicBlock &MBB); 49 /// Look for cross regclass copies that can be trivially eliminated. 50 bool foldSimpleCrossClassCopies(MachineInstr &MI); 51 }; 52 } // end anonymous namespace 53 54 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 55 AU.addRequired<TargetPassConfig>(); 56 AU.setPreservesCFG(); 57 getSelectionDAGFallbackAnalysisUsage(AU); 58 MachineFunctionPass::getAnalysisUsage(AU); 59 } 60 61 AArch64PostSelectOptimize::AArch64PostSelectOptimize() 62 : MachineFunctionPass(ID) { 63 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 64 } 65 66 unsigned getNonFlagSettingVariant(unsigned Opc) { 67 switch (Opc) { 68 default: 69 return 0; 70 case AArch64::SUBSXrr: 71 return AArch64::SUBXrr; 72 case AArch64::SUBSWrr: 73 return AArch64::SUBWrr; 74 case AArch64::SUBSXrs: 75 return AArch64::SUBXrs; 76 case AArch64::SUBSWrs: 77 return AArch64::SUBWrs; 78 case AArch64::SUBSXri: 79 return AArch64::SUBXri; 80 case AArch64::SUBSWri: 81 return AArch64::SUBWri; 82 case AArch64::ADDSXrr: 83 return AArch64::ADDXrr; 84 case AArch64::ADDSWrr: 85 return AArch64::ADDWrr; 86 case AArch64::ADDSXrs: 87 return AArch64::ADDXrs; 88 case AArch64::ADDSWrs: 89 return AArch64::ADDWrs; 90 case AArch64::ADDSXri: 91 return AArch64::ADDXri; 92 case AArch64::ADDSWri: 93 return AArch64::ADDWri; 94 case AArch64::SBCSXr: 95 return AArch64::SBCXr; 96 case AArch64::SBCSWr: 97 return AArch64::SBCWr; 98 case AArch64::ADCSXr: 99 return AArch64::ADCXr; 100 case AArch64::ADCSWr: 101 return AArch64::ADCWr; 102 } 103 } 104 105 bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) { 106 bool Changed = false; 107 for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) { 108 Changed |= foldSimpleCrossClassCopies(MI); 109 } 110 return Changed; 111 } 112 113 bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) { 114 auto *MF = MI.getMF(); 115 auto &MRI = MF->getRegInfo(); 116 117 if (!MI.isCopy()) 118 return false; 119 120 if (MI.getOperand(1).getSubReg()) 121 return false; // Don't deal with subreg copies 122 123 Register Src = MI.getOperand(1).getReg(); 124 Register Dst = MI.getOperand(0).getReg(); 125 126 if (Src.isPhysical() || Dst.isPhysical()) 127 return false; 128 129 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 130 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 131 132 if (SrcRC == DstRC) 133 return false; 134 135 136 if (SrcRC->hasSubClass(DstRC)) { 137 // This is the case where the source class is a superclass of the dest, so 138 // if the copy is the only user of the source, we can just constrain the 139 // source reg to the dest class. 140 141 if (!MRI.hasOneNonDBGUse(Src)) 142 return false; // Only constrain single uses of the source. 143 144 // Constrain to dst reg class as long as it's not a weird class that only 145 // has a few registers. 146 if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25)) 147 return false; 148 } else if (DstRC->hasSubClass(SrcRC)) { 149 // This is the inverse case, where the destination class is a superclass of 150 // the source. Here, if the copy is the only user, we can just constrain 151 // the user of the copy to use the smaller class of the source. 152 } else { 153 return false; 154 } 155 156 MRI.replaceRegWith(Dst, Src); 157 MI.eraseFromParent(); 158 return true; 159 } 160 161 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 162 // If we find a dead NZCV implicit-def, we 163 // - try to convert the operation to a non-flag-setting equivalent 164 // - or mark the def as dead to aid later peephole optimizations. 165 166 // Use cases: 167 // 1) 168 // Consider the following code: 169 // FCMPSrr %0, %1, implicit-def $nzcv 170 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 171 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 172 // FCMPSrr %0, %1, implicit-def $nzcv 173 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 174 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 175 // when we have a single IR fcmp being used by two selects. During selection, 176 // to ensure that there can be no clobbering of nzcv between the fcmp and the 177 // csel, we have to generate an fcmp immediately before each csel is 178 // selected. 179 // However, often we can essentially CSE these together later in MachineCSE. 180 // This doesn't work though if there are unrelated flag-setting instructions 181 // in between the two FCMPs. In this case, the SUBS defines NZCV 182 // but it doesn't have any users, being overwritten by the second FCMP. 183 // 184 // 2) 185 // The instruction selector always emits the flag-setting variant of ADC/SBC 186 // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these 187 // instructions is never used, we can switch to the non-flag-setting variant. 188 189 bool Changed = false; 190 auto &MF = *MBB.getParent(); 191 auto &Subtarget = MF.getSubtarget(); 192 const auto &TII = Subtarget.getInstrInfo(); 193 auto TRI = Subtarget.getRegisterInfo(); 194 auto RBI = Subtarget.getRegBankInfo(); 195 auto &MRI = MF.getRegInfo(); 196 197 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 198 LRU.addLiveOuts(MBB); 199 200 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 201 bool NZCVDead = LRU.available(AArch64::NZCV); 202 if (NZCVDead && II.definesRegister(AArch64::NZCV)) { 203 // The instruction defines NZCV, but NZCV is dead. 204 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 205 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 206 if (DeadNZCVIdx != -1) { 207 if (NewOpc) { 208 // If there is an equivalent non-flag-setting op, we convert. 209 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 210 "op: " 211 << II); 212 II.setDesc(TII->get(NewOpc)); 213 II.removeOperand(DeadNZCVIdx); 214 // Changing the opcode can result in differing regclass requirements, 215 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. 216 // Constrain the regclasses, possibly introducing a copy. 217 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(), 218 II.getOperand(0), 0); 219 Changed |= true; 220 } else { 221 // Otherwise, we just set the nzcv imp-def operand to be dead, so the 222 // peephole optimizations can optimize them further. 223 II.getOperand(DeadNZCVIdx).setIsDead(); 224 } 225 } 226 } 227 LRU.stepBackward(II); 228 } 229 return Changed; 230 } 231 232 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 233 if (MF.getProperties().hasProperty( 234 MachineFunctionProperties::Property::FailedISel)) 235 return false; 236 assert(MF.getProperties().hasProperty( 237 MachineFunctionProperties::Property::Selected) && 238 "Expected a selected MF"); 239 240 bool Changed = false; 241 for (auto &BB : MF) { 242 Changed |= optimizeNZCVDefs(BB); 243 Changed |= doPeepholeOpts(BB); 244 } 245 return Changed; 246 } 247 248 char AArch64PostSelectOptimize::ID = 0; 249 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 250 "Optimize AArch64 selected instructions", 251 false, false) 252 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 253 "Optimize AArch64 selected instructions", false, 254 false) 255 256 namespace llvm { 257 FunctionPass *createAArch64PostSelectOptimize() { 258 return new AArch64PostSelectOptimize(); 259 } 260 } // end namespace llvm 261