1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does post-instruction-selection optimizations in the GlobalISel 10 // pipeline, before the rest of codegen runs. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64.h" 15 #include "AArch64TargetMachine.h" 16 #include "MCTargetDesc/AArch64MCTargetDesc.h" 17 #include "llvm/CodeGen/GlobalISel/Utils.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineOperand.h" 21 #include "llvm/CodeGen/TargetPassConfig.h" 22 #include "llvm/Support/Debug.h" 23 24 #define DEBUG_TYPE "aarch64-post-select-optimize" 25 26 using namespace llvm; 27 28 namespace { 29 class AArch64PostSelectOptimize : public MachineFunctionPass { 30 public: 31 static char ID; 32 33 AArch64PostSelectOptimize(); 34 35 StringRef getPassName() const override { 36 return "AArch64 Post Select Optimizer"; 37 } 38 39 bool runOnMachineFunction(MachineFunction &MF) override; 40 41 void getAnalysisUsage(AnalysisUsage &AU) const override; 42 43 private: 44 bool optimizeNZCVDefs(MachineBasicBlock &MBB); 45 }; 46 } // end anonymous namespace 47 48 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const { 49 AU.addRequired<TargetPassConfig>(); 50 AU.setPreservesCFG(); 51 getSelectionDAGFallbackAnalysisUsage(AU); 52 MachineFunctionPass::getAnalysisUsage(AU); 53 } 54 55 AArch64PostSelectOptimize::AArch64PostSelectOptimize() 56 : MachineFunctionPass(ID) { 57 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry()); 58 } 59 60 unsigned getNonFlagSettingVariant(unsigned Opc) { 61 switch (Opc) { 62 default: 63 return 0; 64 case AArch64::SUBSXrr: 65 return AArch64::SUBXrr; 66 case AArch64::SUBSWrr: 67 return AArch64::SUBWrr; 68 case AArch64::SUBSXrs: 69 return AArch64::SUBXrs; 70 case AArch64::SUBSXri: 71 return AArch64::SUBXri; 72 case AArch64::SUBSWri: 73 return AArch64::SUBWri; 74 } 75 } 76 77 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { 78 // Consider the following code: 79 // FCMPSrr %0, %1, implicit-def $nzcv 80 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 81 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv 82 // FCMPSrr %0, %1, implicit-def $nzcv 83 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv 84 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen 85 // when we have a single IR fcmp being used by two selects. During selection, 86 // to ensure that there can be no clobbering of nzcv between the fcmp and the 87 // csel, we have to generate an fcmp immediately before each csel is 88 // selected. 89 // However, often we can essentially CSE these together later in MachineCSE. 90 // This doesn't work though if there are unrelated flag-setting instructions 91 // in between the two FCMPs. In this case, the SUBS defines NZCV 92 // but it doesn't have any users, being overwritten by the second FCMP. 93 // 94 // Our solution here is to try to convert flag setting operations between 95 // a interval of identical FCMPs, so that CSE will be able to eliminate one. 96 bool Changed = false; 97 auto &MF = *MBB.getParent(); 98 auto &Subtarget = MF.getSubtarget(); 99 const auto &TII = Subtarget.getInstrInfo(); 100 auto TRI = Subtarget.getRegisterInfo(); 101 auto RBI = Subtarget.getRegBankInfo(); 102 auto &MRI = MF.getRegInfo(); 103 104 // The first step is to find the first and last FCMPs. If we have found 105 // at least two, then set the limit of the bottom-up walk to the first FCMP 106 // found since we're only interested in dealing with instructions between 107 // them. 108 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; 109 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { 110 if (MI.getOpcode() == AArch64::FCMPSrr || 111 MI.getOpcode() == AArch64::FCMPDrr) { 112 if (!FirstCmp) 113 FirstCmp = &MI; 114 else 115 LastCmp = &MI; 116 } 117 } 118 119 // In addition to converting flag-setting ops in fcmp ranges into non-flag 120 // setting ops, across the whole basic block we also detect when nzcv 121 // implicit-defs are dead, and mark them as dead. Peephole optimizations need 122 // this information later. 123 124 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); 125 LRU.addLiveOuts(MBB); 126 bool NZCVDead = LRU.available(AArch64::NZCV); 127 bool InsideCmpRange = false; 128 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { 129 LRU.stepBackward(II); 130 131 if (LastCmp) { // There's a range present in this block. 132 // If we're inside an fcmp range, look for begin instruction. 133 if (InsideCmpRange && &II == FirstCmp) 134 InsideCmpRange = false; 135 else if (&II == LastCmp) 136 InsideCmpRange = true; 137 } 138 139 // Did this instruction define NZCV? 140 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); 141 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { 142 // If we have a def and NZCV is dead, then we may convert this op. 143 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); 144 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); 145 if (DeadNZCVIdx != -1) { 146 // If we're inside an fcmp range, then convert flag setting ops. 147 if (InsideCmpRange && NewOpc) { 148 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " 149 "op in fcmp range: " 150 << II); 151 II.setDesc(TII->get(NewOpc)); 152 II.removeOperand(DeadNZCVIdx); 153 // Changing the opcode can result in differing regclass requirements, 154 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. 155 // Constrain the regclasses, possibly introducing a copy. 156 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(), 157 II.getOperand(0), 0); 158 Changed |= true; 159 } else { 160 // Otherwise, we just set the nzcv imp-def operand to be dead, so the 161 // peephole optimizations can optimize them further. 162 II.getOperand(DeadNZCVIdx).setIsDead(); 163 } 164 } 165 } 166 167 NZCVDead = NZCVDeadAtCurrInstr; 168 } 169 return Changed; 170 } 171 172 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) { 173 if (MF.getProperties().hasProperty( 174 MachineFunctionProperties::Property::FailedISel)) 175 return false; 176 assert(MF.getProperties().hasProperty( 177 MachineFunctionProperties::Property::Selected) && 178 "Expected a selected MF"); 179 180 bool Changed = false; 181 for (auto &BB : MF) 182 Changed |= optimizeNZCVDefs(BB); 183 return Changed; 184 } 185 186 char AArch64PostSelectOptimize::ID = 0; 187 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE, 188 "Optimize AArch64 selected instructions", 189 false, false) 190 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE, 191 "Optimize AArch64 selected instructions", false, 192 false) 193 194 namespace llvm { 195 FunctionPass *createAArch64PostSelectOptimize() { 196 return new AArch64PostSelectOptimize(); 197 } 198 } // end namespace llvm 199