1*0b57cec5SDimitry Andric //===--------------- PPCVSXFMAMutate.cpp - VSX FMA Mutation ---------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This pass mutates the form of VSX FMA instructions to avoid unnecessary 10*0b57cec5SDimitry Andric // copies. 11*0b57cec5SDimitry Andric // 12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 13*0b57cec5SDimitry Andric 14*0b57cec5SDimitry Andric #include "MCTargetDesc/PPCPredicates.h" 15*0b57cec5SDimitry Andric #include "PPC.h" 16*0b57cec5SDimitry Andric #include "PPCInstrBuilder.h" 17*0b57cec5SDimitry Andric #include "PPCInstrInfo.h" 18*0b57cec5SDimitry Andric #include "PPCMachineFunctionInfo.h" 19*0b57cec5SDimitry Andric #include "PPCTargetMachine.h" 20*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 21*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 22*0b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 23*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 24*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 25*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 26*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 27*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 29*0b57cec5SDimitry Andric #include "llvm/CodeGen/PseudoSourceValue.h" 30*0b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 31*0b57cec5SDimitry Andric #include "llvm/CodeGen/SlotIndexes.h" 32*0b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 33*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 34*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 35*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 36*0b57cec5SDimitry Andric #include "llvm/Support/TargetRegistry.h" 37*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric using namespace llvm; 40*0b57cec5SDimitry Andric 41*0b57cec5SDimitry Andric // Temporarily disable FMA mutation by default, since it doesn't handle 42*0b57cec5SDimitry Andric // cross-basic-block intervals well. 43*0b57cec5SDimitry Andric // See: http://lists.llvm.org/pipermail/llvm-dev/2016-February/095669.html 44*0b57cec5SDimitry Andric // http://reviews.llvm.org/D17087 45*0b57cec5SDimitry Andric static cl::opt<bool> DisableVSXFMAMutate( 46*0b57cec5SDimitry Andric "disable-ppc-vsx-fma-mutation", 47*0b57cec5SDimitry Andric cl::desc("Disable VSX FMA instruction mutation"), cl::init(true), 48*0b57cec5SDimitry Andric cl::Hidden); 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric #define DEBUG_TYPE "ppc-vsx-fma-mutate" 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric namespace llvm { namespace PPC { 53*0b57cec5SDimitry Andric int getAltVSXFMAOpcode(uint16_t Opcode); 54*0b57cec5SDimitry Andric } } 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric namespace { 57*0b57cec5SDimitry Andric // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers 58*0b57cec5SDimitry Andric // (Altivec and scalar floating-point registers), we need to transform the 59*0b57cec5SDimitry Andric // copies into subregister copies with other restrictions. 60*0b57cec5SDimitry Andric struct PPCVSXFMAMutate : public MachineFunctionPass { 61*0b57cec5SDimitry Andric static char ID; 62*0b57cec5SDimitry Andric PPCVSXFMAMutate() : MachineFunctionPass(ID) { 63*0b57cec5SDimitry Andric initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); 64*0b57cec5SDimitry Andric } 65*0b57cec5SDimitry Andric 66*0b57cec5SDimitry Andric LiveIntervals *LIS; 67*0b57cec5SDimitry Andric const PPCInstrInfo *TII; 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric protected: 70*0b57cec5SDimitry Andric bool processBlock(MachineBasicBlock &MBB) { 71*0b57cec5SDimitry Andric bool Changed = false; 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 74*0b57cec5SDimitry Andric const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 75*0b57cec5SDimitry Andric for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end(); 76*0b57cec5SDimitry Andric I != IE; ++I) { 77*0b57cec5SDimitry Andric MachineInstr &MI = *I; 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric // The default (A-type) VSX FMA form kills the addend (it is taken from 80*0b57cec5SDimitry Andric // the target register, which is then updated to reflect the result of 81*0b57cec5SDimitry Andric // the FMA). If the instruction, however, kills one of the registers 82*0b57cec5SDimitry Andric // used for the product, then we can use the M-form instruction (which 83*0b57cec5SDimitry Andric // will take that value from the to-be-defined register). 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode()); 86*0b57cec5SDimitry Andric if (AltOpc == -1) 87*0b57cec5SDimitry Andric continue; 88*0b57cec5SDimitry Andric 89*0b57cec5SDimitry Andric // This pass is run after register coalescing, and so we're looking for 90*0b57cec5SDimitry Andric // a situation like this: 91*0b57cec5SDimitry Andric // ... 92*0b57cec5SDimitry Andric // %5 = COPY %9; VSLRC:%5,%9 93*0b57cec5SDimitry Andric // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16, 94*0b57cec5SDimitry Andric // implicit %rm; VSLRC:%5,%17,%16 95*0b57cec5SDimitry Andric // ... 96*0b57cec5SDimitry Andric // %9<def,tied1> = XSMADDADP %9<tied0>, %17, %19, 97*0b57cec5SDimitry Andric // implicit %rm; VSLRC:%9,%17,%19 98*0b57cec5SDimitry Andric // ... 99*0b57cec5SDimitry Andric // Where we can eliminate the copy by changing from the A-type to the 100*0b57cec5SDimitry Andric // M-type instruction. Specifically, for this example, this means: 101*0b57cec5SDimitry Andric // %5<def,tied1> = XSMADDADP %5<tied0>, %17, %16, 102*0b57cec5SDimitry Andric // implicit %rm; VSLRC:%5,%17,%16 103*0b57cec5SDimitry Andric // is replaced by: 104*0b57cec5SDimitry Andric // %16<def,tied1> = XSMADDMDP %16<tied0>, %18, %9, 105*0b57cec5SDimitry Andric // implicit %rm; VSLRC:%16,%18,%9 106*0b57cec5SDimitry Andric // and we remove: %5 = COPY %9; VSLRC:%5,%9 107*0b57cec5SDimitry Andric 108*0b57cec5SDimitry Andric SlotIndex FMAIdx = LIS->getInstructionIndex(MI); 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric VNInfo *AddendValNo = 111*0b57cec5SDimitry Andric LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn(); 112*0b57cec5SDimitry Andric 113*0b57cec5SDimitry Andric // This can be null if the register is undef. 114*0b57cec5SDimitry Andric if (!AddendValNo) 115*0b57cec5SDimitry Andric continue; 116*0b57cec5SDimitry Andric 117*0b57cec5SDimitry Andric MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def); 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric // The addend and this instruction must be in the same block. 120*0b57cec5SDimitry Andric 121*0b57cec5SDimitry Andric if (!AddendMI || AddendMI->getParent() != MI.getParent()) 122*0b57cec5SDimitry Andric continue; 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric // The addend must be a full copy within the same register class. 125*0b57cec5SDimitry Andric 126*0b57cec5SDimitry Andric if (!AddendMI->isFullCopy()) 127*0b57cec5SDimitry Andric continue; 128*0b57cec5SDimitry Andric 129*0b57cec5SDimitry Andric unsigned AddendSrcReg = AddendMI->getOperand(1).getReg(); 130*0b57cec5SDimitry Andric if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) { 131*0b57cec5SDimitry Andric if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) != 132*0b57cec5SDimitry Andric MRI.getRegClass(AddendSrcReg)) 133*0b57cec5SDimitry Andric continue; 134*0b57cec5SDimitry Andric } else { 135*0b57cec5SDimitry Andric // If AddendSrcReg is a physical register, make sure the destination 136*0b57cec5SDimitry Andric // register class contains it. 137*0b57cec5SDimitry Andric if (!MRI.getRegClass(AddendMI->getOperand(0).getReg()) 138*0b57cec5SDimitry Andric ->contains(AddendSrcReg)) 139*0b57cec5SDimitry Andric continue; 140*0b57cec5SDimitry Andric } 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric // In theory, there could be other uses of the addend copy before this 143*0b57cec5SDimitry Andric // fma. We could deal with this, but that would require additional 144*0b57cec5SDimitry Andric // logic below and I suspect it will not occur in any relevant 145*0b57cec5SDimitry Andric // situations. Additionally, check whether the copy source is killed 146*0b57cec5SDimitry Andric // prior to the fma. In order to replace the addend here with the 147*0b57cec5SDimitry Andric // source of the copy, it must still be live here. We can't use 148*0b57cec5SDimitry Andric // interval testing for a physical register, so as long as we're 149*0b57cec5SDimitry Andric // walking the MIs we may as well test liveness here. 150*0b57cec5SDimitry Andric // 151*0b57cec5SDimitry Andric // FIXME: There is a case that occurs in practice, like this: 152*0b57cec5SDimitry Andric // %9 = COPY %f1; VSSRC:%9 153*0b57cec5SDimitry Andric // ... 154*0b57cec5SDimitry Andric // %6 = COPY %9; VSSRC:%6,%9 155*0b57cec5SDimitry Andric // %7 = COPY %9; VSSRC:%7,%9 156*0b57cec5SDimitry Andric // %9<def,tied1> = XSMADDASP %9<tied0>, %1, %4; VSSRC: 157*0b57cec5SDimitry Andric // %6<def,tied1> = XSMADDASP %6<tied0>, %1, %2; VSSRC: 158*0b57cec5SDimitry Andric // %7<def,tied1> = XSMADDASP %7<tied0>, %1, %3; VSSRC: 159*0b57cec5SDimitry Andric // which prevents an otherwise-profitable transformation. 160*0b57cec5SDimitry Andric bool OtherUsers = false, KillsAddendSrc = false; 161*0b57cec5SDimitry Andric for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); 162*0b57cec5SDimitry Andric J != JE; --J) { 163*0b57cec5SDimitry Andric if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) { 164*0b57cec5SDimitry Andric OtherUsers = true; 165*0b57cec5SDimitry Andric break; 166*0b57cec5SDimitry Andric } 167*0b57cec5SDimitry Andric if (J->modifiesRegister(AddendSrcReg, TRI) || 168*0b57cec5SDimitry Andric J->killsRegister(AddendSrcReg, TRI)) { 169*0b57cec5SDimitry Andric KillsAddendSrc = true; 170*0b57cec5SDimitry Andric break; 171*0b57cec5SDimitry Andric } 172*0b57cec5SDimitry Andric } 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric if (OtherUsers || KillsAddendSrc) 175*0b57cec5SDimitry Andric continue; 176*0b57cec5SDimitry Andric 177*0b57cec5SDimitry Andric 178*0b57cec5SDimitry Andric // The transformation doesn't work well with things like: 179*0b57cec5SDimitry Andric // %5 = A-form-op %5, %11, %5; 180*0b57cec5SDimitry Andric // unless %11 is also a kill, so skip when it is not, 181*0b57cec5SDimitry Andric // and check operand 3 to see it is also a kill to handle the case: 182*0b57cec5SDimitry Andric // %5 = A-form-op %5, %5, %11; 183*0b57cec5SDimitry Andric // where %5 and %11 are both kills. This case would be skipped 184*0b57cec5SDimitry Andric // otherwise. 185*0b57cec5SDimitry Andric unsigned OldFMAReg = MI.getOperand(0).getReg(); 186*0b57cec5SDimitry Andric 187*0b57cec5SDimitry Andric // Find one of the product operands that is killed by this instruction. 188*0b57cec5SDimitry Andric unsigned KilledProdOp = 0, OtherProdOp = 0; 189*0b57cec5SDimitry Andric unsigned Reg2 = MI.getOperand(2).getReg(); 190*0b57cec5SDimitry Andric unsigned Reg3 = MI.getOperand(3).getReg(); 191*0b57cec5SDimitry Andric if (LIS->getInterval(Reg2).Query(FMAIdx).isKill() 192*0b57cec5SDimitry Andric && Reg2 != OldFMAReg) { 193*0b57cec5SDimitry Andric KilledProdOp = 2; 194*0b57cec5SDimitry Andric OtherProdOp = 3; 195*0b57cec5SDimitry Andric } else if (LIS->getInterval(Reg3).Query(FMAIdx).isKill() 196*0b57cec5SDimitry Andric && Reg3 != OldFMAReg) { 197*0b57cec5SDimitry Andric KilledProdOp = 3; 198*0b57cec5SDimitry Andric OtherProdOp = 2; 199*0b57cec5SDimitry Andric } 200*0b57cec5SDimitry Andric 201*0b57cec5SDimitry Andric // If there are no usable killed product operands, then this 202*0b57cec5SDimitry Andric // transformation is likely not profitable. 203*0b57cec5SDimitry Andric if (!KilledProdOp) 204*0b57cec5SDimitry Andric continue; 205*0b57cec5SDimitry Andric 206*0b57cec5SDimitry Andric // If the addend copy is used only by this MI, then the addend source 207*0b57cec5SDimitry Andric // register is likely not live here. This could be fixed (based on the 208*0b57cec5SDimitry Andric // legality checks above, the live range for the addend source register 209*0b57cec5SDimitry Andric // could be extended), but it seems likely that such a trivial copy can 210*0b57cec5SDimitry Andric // be coalesced away later, and thus is not worth the effort. 211*0b57cec5SDimitry Andric if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) && 212*0b57cec5SDimitry Andric !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx)) 213*0b57cec5SDimitry Andric continue; 214*0b57cec5SDimitry Andric 215*0b57cec5SDimitry Andric // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3. 216*0b57cec5SDimitry Andric 217*0b57cec5SDimitry Andric unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg(); 218*0b57cec5SDimitry Andric unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg(); 219*0b57cec5SDimitry Andric 220*0b57cec5SDimitry Andric unsigned AddSubReg = AddendMI->getOperand(1).getSubReg(); 221*0b57cec5SDimitry Andric unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg(); 222*0b57cec5SDimitry Andric unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg(); 223*0b57cec5SDimitry Andric 224*0b57cec5SDimitry Andric bool AddRegKill = AddendMI->getOperand(1).isKill(); 225*0b57cec5SDimitry Andric bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill(); 226*0b57cec5SDimitry Andric bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill(); 227*0b57cec5SDimitry Andric 228*0b57cec5SDimitry Andric bool AddRegUndef = AddendMI->getOperand(1).isUndef(); 229*0b57cec5SDimitry Andric bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef(); 230*0b57cec5SDimitry Andric bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef(); 231*0b57cec5SDimitry Andric 232*0b57cec5SDimitry Andric // If there isn't a class that fits, we can't perform the transform. 233*0b57cec5SDimitry Andric // This is needed for correctness with a mixture of VSX and Altivec 234*0b57cec5SDimitry Andric // instructions to make sure that a low VSX register is not assigned to 235*0b57cec5SDimitry Andric // the Altivec instruction. 236*0b57cec5SDimitry Andric if (!MRI.constrainRegClass(KilledProdReg, 237*0b57cec5SDimitry Andric MRI.getRegClass(OldFMAReg))) 238*0b57cec5SDimitry Andric continue; 239*0b57cec5SDimitry Andric 240*0b57cec5SDimitry Andric assert(OldFMAReg == AddendMI->getOperand(0).getReg() && 241*0b57cec5SDimitry Andric "Addend copy not tied to old FMA output!"); 242*0b57cec5SDimitry Andric 243*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "VSX FMA Mutation:\n " << MI); 244*0b57cec5SDimitry Andric 245*0b57cec5SDimitry Andric MI.getOperand(0).setReg(KilledProdReg); 246*0b57cec5SDimitry Andric MI.getOperand(1).setReg(KilledProdReg); 247*0b57cec5SDimitry Andric MI.getOperand(3).setReg(AddendSrcReg); 248*0b57cec5SDimitry Andric 249*0b57cec5SDimitry Andric MI.getOperand(0).setSubReg(KilledProdSubReg); 250*0b57cec5SDimitry Andric MI.getOperand(1).setSubReg(KilledProdSubReg); 251*0b57cec5SDimitry Andric MI.getOperand(3).setSubReg(AddSubReg); 252*0b57cec5SDimitry Andric 253*0b57cec5SDimitry Andric MI.getOperand(1).setIsKill(KilledProdRegKill); 254*0b57cec5SDimitry Andric MI.getOperand(3).setIsKill(AddRegKill); 255*0b57cec5SDimitry Andric 256*0b57cec5SDimitry Andric MI.getOperand(1).setIsUndef(KilledProdRegUndef); 257*0b57cec5SDimitry Andric MI.getOperand(3).setIsUndef(AddRegUndef); 258*0b57cec5SDimitry Andric 259*0b57cec5SDimitry Andric MI.setDesc(TII->get(AltOpc)); 260*0b57cec5SDimitry Andric 261*0b57cec5SDimitry Andric // If the addend is also a multiplicand, replace it with the addend 262*0b57cec5SDimitry Andric // source in both places. 263*0b57cec5SDimitry Andric if (OtherProdReg == AddendMI->getOperand(0).getReg()) { 264*0b57cec5SDimitry Andric MI.getOperand(2).setReg(AddendSrcReg); 265*0b57cec5SDimitry Andric MI.getOperand(2).setSubReg(AddSubReg); 266*0b57cec5SDimitry Andric MI.getOperand(2).setIsKill(AddRegKill); 267*0b57cec5SDimitry Andric MI.getOperand(2).setIsUndef(AddRegUndef); 268*0b57cec5SDimitry Andric } else { 269*0b57cec5SDimitry Andric MI.getOperand(2).setReg(OtherProdReg); 270*0b57cec5SDimitry Andric MI.getOperand(2).setSubReg(OtherProdSubReg); 271*0b57cec5SDimitry Andric MI.getOperand(2).setIsKill(OtherProdRegKill); 272*0b57cec5SDimitry Andric MI.getOperand(2).setIsUndef(OtherProdRegUndef); 273*0b57cec5SDimitry Andric } 274*0b57cec5SDimitry Andric 275*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " -> " << MI); 276*0b57cec5SDimitry Andric 277*0b57cec5SDimitry Andric // The killed product operand was killed here, so we can reuse it now 278*0b57cec5SDimitry Andric // for the result of the fma. 279*0b57cec5SDimitry Andric 280*0b57cec5SDimitry Andric LiveInterval &FMAInt = LIS->getInterval(OldFMAReg); 281*0b57cec5SDimitry Andric VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot()); 282*0b57cec5SDimitry Andric for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end(); 283*0b57cec5SDimitry Andric UI != UE;) { 284*0b57cec5SDimitry Andric MachineOperand &UseMO = *UI; 285*0b57cec5SDimitry Andric MachineInstr *UseMI = UseMO.getParent(); 286*0b57cec5SDimitry Andric ++UI; 287*0b57cec5SDimitry Andric 288*0b57cec5SDimitry Andric // Don't replace the result register of the copy we're about to erase. 289*0b57cec5SDimitry Andric if (UseMI == AddendMI) 290*0b57cec5SDimitry Andric continue; 291*0b57cec5SDimitry Andric 292*0b57cec5SDimitry Andric UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); 293*0b57cec5SDimitry Andric } 294*0b57cec5SDimitry Andric 295*0b57cec5SDimitry Andric // Extend the live intervals of the killed product operand to hold the 296*0b57cec5SDimitry Andric // fma result. 297*0b57cec5SDimitry Andric 298*0b57cec5SDimitry Andric LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg); 299*0b57cec5SDimitry Andric for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end(); 300*0b57cec5SDimitry Andric AI != AE; ++AI) { 301*0b57cec5SDimitry Andric // Don't add the segment that corresponds to the original copy. 302*0b57cec5SDimitry Andric if (AI->valno == AddendValNo) 303*0b57cec5SDimitry Andric continue; 304*0b57cec5SDimitry Andric 305*0b57cec5SDimitry Andric VNInfo *NewFMAValNo = 306*0b57cec5SDimitry Andric NewFMAInt.getNextValue(AI->start, 307*0b57cec5SDimitry Andric LIS->getVNInfoAllocator()); 308*0b57cec5SDimitry Andric 309*0b57cec5SDimitry Andric NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end, 310*0b57cec5SDimitry Andric NewFMAValNo)); 311*0b57cec5SDimitry Andric } 312*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " extended: " << NewFMAInt << '\n'); 313*0b57cec5SDimitry Andric 314*0b57cec5SDimitry Andric // Extend the live interval of the addend source (it might end at the 315*0b57cec5SDimitry Andric // copy to be removed, or somewhere in between there and here). This 316*0b57cec5SDimitry Andric // is necessary only if it is a physical register. 317*0b57cec5SDimitry Andric if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) 318*0b57cec5SDimitry Andric for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid(); 319*0b57cec5SDimitry Andric ++Units) { 320*0b57cec5SDimitry Andric unsigned Unit = *Units; 321*0b57cec5SDimitry Andric 322*0b57cec5SDimitry Andric LiveRange &AddendSrcRange = LIS->getRegUnit(Unit); 323*0b57cec5SDimitry Andric AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB), 324*0b57cec5SDimitry Andric FMAIdx.getRegSlot()); 325*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n'); 326*0b57cec5SDimitry Andric } 327*0b57cec5SDimitry Andric 328*0b57cec5SDimitry Andric FMAInt.removeValNo(FMAValNo); 329*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " trimmed: " << FMAInt << '\n'); 330*0b57cec5SDimitry Andric 331*0b57cec5SDimitry Andric // Remove the (now unused) copy. 332*0b57cec5SDimitry Andric 333*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " removing: " << *AddendMI << '\n'); 334*0b57cec5SDimitry Andric LIS->RemoveMachineInstrFromMaps(*AddendMI); 335*0b57cec5SDimitry Andric AddendMI->eraseFromParent(); 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric Changed = true; 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric 340*0b57cec5SDimitry Andric return Changed; 341*0b57cec5SDimitry Andric } 342*0b57cec5SDimitry Andric 343*0b57cec5SDimitry Andric public: 344*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override { 345*0b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 346*0b57cec5SDimitry Andric return false; 347*0b57cec5SDimitry Andric 348*0b57cec5SDimitry Andric // If we don't have VSX then go ahead and return without doing 349*0b57cec5SDimitry Andric // anything. 350*0b57cec5SDimitry Andric const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); 351*0b57cec5SDimitry Andric if (!STI.hasVSX()) 352*0b57cec5SDimitry Andric return false; 353*0b57cec5SDimitry Andric 354*0b57cec5SDimitry Andric LIS = &getAnalysis<LiveIntervals>(); 355*0b57cec5SDimitry Andric 356*0b57cec5SDimitry Andric TII = STI.getInstrInfo(); 357*0b57cec5SDimitry Andric 358*0b57cec5SDimitry Andric bool Changed = false; 359*0b57cec5SDimitry Andric 360*0b57cec5SDimitry Andric if (DisableVSXFMAMutate) 361*0b57cec5SDimitry Andric return Changed; 362*0b57cec5SDimitry Andric 363*0b57cec5SDimitry Andric for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { 364*0b57cec5SDimitry Andric MachineBasicBlock &B = *I++; 365*0b57cec5SDimitry Andric if (processBlock(B)) 366*0b57cec5SDimitry Andric Changed = true; 367*0b57cec5SDimitry Andric } 368*0b57cec5SDimitry Andric 369*0b57cec5SDimitry Andric return Changed; 370*0b57cec5SDimitry Andric } 371*0b57cec5SDimitry Andric 372*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 373*0b57cec5SDimitry Andric AU.addRequired<LiveIntervals>(); 374*0b57cec5SDimitry Andric AU.addPreserved<LiveIntervals>(); 375*0b57cec5SDimitry Andric AU.addRequired<SlotIndexes>(); 376*0b57cec5SDimitry Andric AU.addPreserved<SlotIndexes>(); 377*0b57cec5SDimitry Andric AU.addRequired<MachineDominatorTree>(); 378*0b57cec5SDimitry Andric AU.addPreserved<MachineDominatorTree>(); 379*0b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 380*0b57cec5SDimitry Andric } 381*0b57cec5SDimitry Andric }; 382*0b57cec5SDimitry Andric } 383*0b57cec5SDimitry Andric 384*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE, 385*0b57cec5SDimitry Andric "PowerPC VSX FMA Mutation", false, false) 386*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 387*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 388*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 389*0b57cec5SDimitry Andric INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE, 390*0b57cec5SDimitry Andric "PowerPC VSX FMA Mutation", false, false) 391*0b57cec5SDimitry Andric 392*0b57cec5SDimitry Andric char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID; 393*0b57cec5SDimitry Andric 394*0b57cec5SDimitry Andric char PPCVSXFMAMutate::ID = 0; 395*0b57cec5SDimitry Andric FunctionPass *llvm::createPPCVSXFMAMutatePass() { 396*0b57cec5SDimitry Andric return new PPCVSXFMAMutate(); 397*0b57cec5SDimitry Andric } 398