1*0b57cec5SDimitry Andric //===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric // 9*0b57cec5SDimitry Andric // This implements a simple VLIW packetizer using DFA. The packetizer works on 10*0b57cec5SDimitry Andric // machine basic blocks. For each instruction I in BB, the packetizer consults 11*0b57cec5SDimitry Andric // the DFA to see if machine resources are available to execute I. If so, the 12*0b57cec5SDimitry Andric // packetizer checks if I depends on any instruction J in the current packet. 13*0b57cec5SDimitry Andric // If no dependency is found, I is added to current packet and machine resource 14*0b57cec5SDimitry Andric // is marked as taken. If any dependency is found, a target API call is made to 15*0b57cec5SDimitry Andric // prune the dependence. 16*0b57cec5SDimitry Andric // 17*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric #include "HexagonVLIWPacketizer.h" 20*0b57cec5SDimitry Andric #include "Hexagon.h" 21*0b57cec5SDimitry Andric #include "HexagonInstrInfo.h" 22*0b57cec5SDimitry Andric #include "HexagonRegisterInfo.h" 23*0b57cec5SDimitry Andric #include "HexagonSubtarget.h" 24*0b57cec5SDimitry Andric #include "llvm/ADT/BitVector.h" 25*0b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 26*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 27*0b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 28*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 29*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 30*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 31*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 32*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 33*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 34*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 35*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBundle.h" 36*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 37*0b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 38*0b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 39*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 40*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 41*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 42*0b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h" 43*0b57cec5SDimitry Andric #include "llvm/Pass.h" 44*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 45*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 46*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 47*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 48*0b57cec5SDimitry Andric #include <cassert> 49*0b57cec5SDimitry Andric #include <cstdint> 50*0b57cec5SDimitry Andric #include <iterator> 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric using namespace llvm; 53*0b57cec5SDimitry Andric 54*0b57cec5SDimitry Andric #define DEBUG_TYPE "packets" 55*0b57cec5SDimitry Andric 56*0b57cec5SDimitry Andric static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, 57*0b57cec5SDimitry Andric cl::ZeroOrMore, cl::init(false), 58*0b57cec5SDimitry Andric cl::desc("Disable Hexagon packetizer pass")); 59*0b57cec5SDimitry Andric 60*0b57cec5SDimitry Andric cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden, 61*0b57cec5SDimitry Andric cl::ZeroOrMore, cl::init(true), 62*0b57cec5SDimitry Andric cl::desc("Allow slot1 store and slot0 load")); 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", 65*0b57cec5SDimitry Andric cl::ZeroOrMore, cl::Hidden, cl::init(true), 66*0b57cec5SDimitry Andric cl::desc("Allow non-solo packetization of volatile memory references")); 67*0b57cec5SDimitry Andric 68*0b57cec5SDimitry Andric static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), 69*0b57cec5SDimitry Andric cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); 70*0b57cec5SDimitry Andric 71*0b57cec5SDimitry Andric static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", 72*0b57cec5SDimitry Andric cl::init(false), cl::Hidden, cl::ZeroOrMore, 73*0b57cec5SDimitry Andric cl::desc("Disable vector double new-value-stores")); 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric extern cl::opt<bool> ScheduleInlineAsm; 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric namespace llvm { 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric FunctionPass *createHexagonPacketizer(bool Minimal); 80*0b57cec5SDimitry Andric void initializeHexagonPacketizerPass(PassRegistry&); 81*0b57cec5SDimitry Andric 82*0b57cec5SDimitry Andric } // end namespace llvm 83*0b57cec5SDimitry Andric 84*0b57cec5SDimitry Andric namespace { 85*0b57cec5SDimitry Andric 86*0b57cec5SDimitry Andric class HexagonPacketizer : public MachineFunctionPass { 87*0b57cec5SDimitry Andric public: 88*0b57cec5SDimitry Andric static char ID; 89*0b57cec5SDimitry Andric 90*0b57cec5SDimitry Andric HexagonPacketizer(bool Min = false) 91*0b57cec5SDimitry Andric : MachineFunctionPass(ID), Minimal(Min) {} 92*0b57cec5SDimitry Andric 93*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 94*0b57cec5SDimitry Andric AU.setPreservesCFG(); 95*0b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 96*0b57cec5SDimitry Andric AU.addRequired<MachineBranchProbabilityInfo>(); 97*0b57cec5SDimitry Andric AU.addRequired<MachineDominatorTree>(); 98*0b57cec5SDimitry Andric AU.addRequired<MachineLoopInfo>(); 99*0b57cec5SDimitry Andric AU.addPreserved<MachineDominatorTree>(); 100*0b57cec5SDimitry Andric AU.addPreserved<MachineLoopInfo>(); 101*0b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 102*0b57cec5SDimitry Andric } 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric StringRef getPassName() const override { return "Hexagon Packetizer"; } 105*0b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 108*0b57cec5SDimitry Andric return MachineFunctionProperties().set( 109*0b57cec5SDimitry Andric MachineFunctionProperties::Property::NoVRegs); 110*0b57cec5SDimitry Andric } 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric private: 113*0b57cec5SDimitry Andric const HexagonInstrInfo *HII; 114*0b57cec5SDimitry Andric const HexagonRegisterInfo *HRI; 115*0b57cec5SDimitry Andric const bool Minimal; 116*0b57cec5SDimitry Andric }; 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry Andric } // end anonymous namespace 119*0b57cec5SDimitry Andric 120*0b57cec5SDimitry Andric char HexagonPacketizer::ID = 0; 121*0b57cec5SDimitry Andric 122*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer", 123*0b57cec5SDimitry Andric "Hexagon Packetizer", false, false) 124*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 125*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 126*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 127*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 128*0b57cec5SDimitry Andric INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", 129*0b57cec5SDimitry Andric "Hexagon Packetizer", false, false) 130*0b57cec5SDimitry Andric 131*0b57cec5SDimitry Andric HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, 132*0b57cec5SDimitry Andric MachineLoopInfo &MLI, AliasAnalysis *AA, 133*0b57cec5SDimitry Andric const MachineBranchProbabilityInfo *MBPI, bool Minimal) 134*0b57cec5SDimitry Andric : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI), 135*0b57cec5SDimitry Andric Minimal(Minimal) { 136*0b57cec5SDimitry Andric HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 137*0b57cec5SDimitry Andric HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 138*0b57cec5SDimitry Andric 139*0b57cec5SDimitry Andric addMutation(llvm::make_unique<HexagonSubtarget::UsrOverflowMutation>()); 140*0b57cec5SDimitry Andric addMutation(llvm::make_unique<HexagonSubtarget::HVXMemLatencyMutation>()); 141*0b57cec5SDimitry Andric addMutation(llvm::make_unique<HexagonSubtarget::BankConflictMutation>()); 142*0b57cec5SDimitry Andric } 143*0b57cec5SDimitry Andric 144*0b57cec5SDimitry Andric // Check if FirstI modifies a register that SecondI reads. 145*0b57cec5SDimitry Andric static bool hasWriteToReadDep(const MachineInstr &FirstI, 146*0b57cec5SDimitry Andric const MachineInstr &SecondI, 147*0b57cec5SDimitry Andric const TargetRegisterInfo *TRI) { 148*0b57cec5SDimitry Andric for (auto &MO : FirstI.operands()) { 149*0b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef()) 150*0b57cec5SDimitry Andric continue; 151*0b57cec5SDimitry Andric unsigned R = MO.getReg(); 152*0b57cec5SDimitry Andric if (SecondI.readsRegister(R, TRI)) 153*0b57cec5SDimitry Andric return true; 154*0b57cec5SDimitry Andric } 155*0b57cec5SDimitry Andric return false; 156*0b57cec5SDimitry Andric } 157*0b57cec5SDimitry Andric 158*0b57cec5SDimitry Andric 159*0b57cec5SDimitry Andric static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, 160*0b57cec5SDimitry Andric MachineBasicBlock::iterator BundleIt, bool Before) { 161*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator InsertPt; 162*0b57cec5SDimitry Andric if (Before) 163*0b57cec5SDimitry Andric InsertPt = BundleIt.getInstrIterator(); 164*0b57cec5SDimitry Andric else 165*0b57cec5SDimitry Andric InsertPt = std::next(BundleIt).getInstrIterator(); 166*0b57cec5SDimitry Andric 167*0b57cec5SDimitry Andric MachineBasicBlock &B = *MI.getParent(); 168*0b57cec5SDimitry Andric // The instruction should at least be bundled with the preceding instruction 169*0b57cec5SDimitry Andric // (there will always be one, i.e. BUNDLE, if nothing else). 170*0b57cec5SDimitry Andric assert(MI.isBundledWithPred()); 171*0b57cec5SDimitry Andric if (MI.isBundledWithSucc()) { 172*0b57cec5SDimitry Andric MI.clearFlag(MachineInstr::BundledSucc); 173*0b57cec5SDimitry Andric MI.clearFlag(MachineInstr::BundledPred); 174*0b57cec5SDimitry Andric } else { 175*0b57cec5SDimitry Andric // If it's not bundled with the successor (i.e. it is the last one 176*0b57cec5SDimitry Andric // in the bundle), then we can simply unbundle it from the predecessor, 177*0b57cec5SDimitry Andric // which will take care of updating the predecessor's flag. 178*0b57cec5SDimitry Andric MI.unbundleFromPred(); 179*0b57cec5SDimitry Andric } 180*0b57cec5SDimitry Andric B.splice(InsertPt, &B, MI.getIterator()); 181*0b57cec5SDimitry Andric 182*0b57cec5SDimitry Andric // Get the size of the bundle without asserting. 183*0b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator(); 184*0b57cec5SDimitry Andric MachineBasicBlock::const_instr_iterator E = B.instr_end(); 185*0b57cec5SDimitry Andric unsigned Size = 0; 186*0b57cec5SDimitry Andric for (++I; I != E && I->isBundledWithPred(); ++I) 187*0b57cec5SDimitry Andric ++Size; 188*0b57cec5SDimitry Andric 189*0b57cec5SDimitry Andric // If there are still two or more instructions, then there is nothing 190*0b57cec5SDimitry Andric // else to be done. 191*0b57cec5SDimitry Andric if (Size > 1) 192*0b57cec5SDimitry Andric return BundleIt; 193*0b57cec5SDimitry Andric 194*0b57cec5SDimitry Andric // Otherwise, extract the single instruction out and delete the bundle. 195*0b57cec5SDimitry Andric MachineBasicBlock::iterator NextIt = std::next(BundleIt); 196*0b57cec5SDimitry Andric MachineInstr &SingleI = *BundleIt->getNextNode(); 197*0b57cec5SDimitry Andric SingleI.unbundleFromPred(); 198*0b57cec5SDimitry Andric assert(!SingleI.isBundledWithSucc()); 199*0b57cec5SDimitry Andric BundleIt->eraseFromParent(); 200*0b57cec5SDimitry Andric return NextIt; 201*0b57cec5SDimitry Andric } 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { 204*0b57cec5SDimitry Andric auto &HST = MF.getSubtarget<HexagonSubtarget>(); 205*0b57cec5SDimitry Andric HII = HST.getInstrInfo(); 206*0b57cec5SDimitry Andric HRI = HST.getRegisterInfo(); 207*0b57cec5SDimitry Andric auto &MLI = getAnalysis<MachineLoopInfo>(); 208*0b57cec5SDimitry Andric auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 209*0b57cec5SDimitry Andric auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 210*0b57cec5SDimitry Andric 211*0b57cec5SDimitry Andric if (EnableGenAllInsnClass) 212*0b57cec5SDimitry Andric HII->genAllInsnTimingClasses(MF); 213*0b57cec5SDimitry Andric 214*0b57cec5SDimitry Andric // Instantiate the packetizer. 215*0b57cec5SDimitry Andric bool MinOnly = Minimal || DisablePacketizer || !HST.usePackets() || 216*0b57cec5SDimitry Andric skipFunction(MF.getFunction()); 217*0b57cec5SDimitry Andric HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly); 218*0b57cec5SDimitry Andric 219*0b57cec5SDimitry Andric // DFA state table should not be empty. 220*0b57cec5SDimitry Andric assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 221*0b57cec5SDimitry Andric 222*0b57cec5SDimitry Andric // Loop over all basic blocks and remove KILL pseudo-instructions 223*0b57cec5SDimitry Andric // These instructions confuse the dependence analysis. Consider: 224*0b57cec5SDimitry Andric // D0 = ... (Insn 0) 225*0b57cec5SDimitry Andric // R0 = KILL R0, D0 (Insn 1) 226*0b57cec5SDimitry Andric // R0 = ... (Insn 2) 227*0b57cec5SDimitry Andric // Here, Insn 1 will result in the dependence graph not emitting an output 228*0b57cec5SDimitry Andric // dependence between Insn 0 and Insn 2. This can lead to incorrect 229*0b57cec5SDimitry Andric // packetization 230*0b57cec5SDimitry Andric for (MachineBasicBlock &MB : MF) { 231*0b57cec5SDimitry Andric auto End = MB.end(); 232*0b57cec5SDimitry Andric auto MI = MB.begin(); 233*0b57cec5SDimitry Andric while (MI != End) { 234*0b57cec5SDimitry Andric auto NextI = std::next(MI); 235*0b57cec5SDimitry Andric if (MI->isKill()) { 236*0b57cec5SDimitry Andric MB.erase(MI); 237*0b57cec5SDimitry Andric End = MB.end(); 238*0b57cec5SDimitry Andric } 239*0b57cec5SDimitry Andric MI = NextI; 240*0b57cec5SDimitry Andric } 241*0b57cec5SDimitry Andric } 242*0b57cec5SDimitry Andric 243*0b57cec5SDimitry Andric // Loop over all of the basic blocks. 244*0b57cec5SDimitry Andric for (auto &MB : MF) { 245*0b57cec5SDimitry Andric auto Begin = MB.begin(), End = MB.end(); 246*0b57cec5SDimitry Andric while (Begin != End) { 247*0b57cec5SDimitry Andric // Find the first non-boundary starting from the end of the last 248*0b57cec5SDimitry Andric // scheduling region. 249*0b57cec5SDimitry Andric MachineBasicBlock::iterator RB = Begin; 250*0b57cec5SDimitry Andric while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) 251*0b57cec5SDimitry Andric ++RB; 252*0b57cec5SDimitry Andric // Find the first boundary starting from the beginning of the new 253*0b57cec5SDimitry Andric // region. 254*0b57cec5SDimitry Andric MachineBasicBlock::iterator RE = RB; 255*0b57cec5SDimitry Andric while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) 256*0b57cec5SDimitry Andric ++RE; 257*0b57cec5SDimitry Andric // Add the scheduling boundary if it's not block end. 258*0b57cec5SDimitry Andric if (RE != End) 259*0b57cec5SDimitry Andric ++RE; 260*0b57cec5SDimitry Andric // If RB == End, then RE == End. 261*0b57cec5SDimitry Andric if (RB != End) 262*0b57cec5SDimitry Andric Packetizer.PacketizeMIs(&MB, RB, RE); 263*0b57cec5SDimitry Andric 264*0b57cec5SDimitry Andric Begin = RE; 265*0b57cec5SDimitry Andric } 266*0b57cec5SDimitry Andric } 267*0b57cec5SDimitry Andric 268*0b57cec5SDimitry Andric Packetizer.unpacketizeSoloInstrs(MF); 269*0b57cec5SDimitry Andric return true; 270*0b57cec5SDimitry Andric } 271*0b57cec5SDimitry Andric 272*0b57cec5SDimitry Andric // Reserve resources for a constant extender. Trigger an assertion if the 273*0b57cec5SDimitry Andric // reservation fails. 274*0b57cec5SDimitry Andric void HexagonPacketizerList::reserveResourcesForConstExt() { 275*0b57cec5SDimitry Andric if (!tryAllocateResourcesForConstExt(true)) 276*0b57cec5SDimitry Andric llvm_unreachable("Resources not available"); 277*0b57cec5SDimitry Andric } 278*0b57cec5SDimitry Andric 279*0b57cec5SDimitry Andric bool HexagonPacketizerList::canReserveResourcesForConstExt() { 280*0b57cec5SDimitry Andric return tryAllocateResourcesForConstExt(false); 281*0b57cec5SDimitry Andric } 282*0b57cec5SDimitry Andric 283*0b57cec5SDimitry Andric // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, 284*0b57cec5SDimitry Andric // return true, otherwise, return false. 285*0b57cec5SDimitry Andric bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { 286*0b57cec5SDimitry Andric auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); 287*0b57cec5SDimitry Andric bool Avail = ResourceTracker->canReserveResources(*ExtMI); 288*0b57cec5SDimitry Andric if (Reserve && Avail) 289*0b57cec5SDimitry Andric ResourceTracker->reserveResources(*ExtMI); 290*0b57cec5SDimitry Andric MF.DeleteMachineInstr(ExtMI); 291*0b57cec5SDimitry Andric return Avail; 292*0b57cec5SDimitry Andric } 293*0b57cec5SDimitry Andric 294*0b57cec5SDimitry Andric bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI, 295*0b57cec5SDimitry Andric SDep::Kind DepType, unsigned DepReg) { 296*0b57cec5SDimitry Andric // Check for LR dependence. 297*0b57cec5SDimitry Andric if (DepReg == HRI->getRARegister()) 298*0b57cec5SDimitry Andric return true; 299*0b57cec5SDimitry Andric 300*0b57cec5SDimitry Andric if (HII->isDeallocRet(MI)) 301*0b57cec5SDimitry Andric if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) 302*0b57cec5SDimitry Andric return true; 303*0b57cec5SDimitry Andric 304*0b57cec5SDimitry Andric // Call-like instructions can be packetized with preceding instructions 305*0b57cec5SDimitry Andric // that define registers implicitly used or modified by the call. Explicit 306*0b57cec5SDimitry Andric // uses are still prohibited, as in the case of indirect calls: 307*0b57cec5SDimitry Andric // r0 = ... 308*0b57cec5SDimitry Andric // J2_jumpr r0 309*0b57cec5SDimitry Andric if (DepType == SDep::Data) { 310*0b57cec5SDimitry Andric for (const MachineOperand MO : MI.operands()) 311*0b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit()) 312*0b57cec5SDimitry Andric return true; 313*0b57cec5SDimitry Andric } 314*0b57cec5SDimitry Andric 315*0b57cec5SDimitry Andric return false; 316*0b57cec5SDimitry Andric } 317*0b57cec5SDimitry Andric 318*0b57cec5SDimitry Andric static bool isRegDependence(const SDep::Kind DepType) { 319*0b57cec5SDimitry Andric return DepType == SDep::Data || DepType == SDep::Anti || 320*0b57cec5SDimitry Andric DepType == SDep::Output; 321*0b57cec5SDimitry Andric } 322*0b57cec5SDimitry Andric 323*0b57cec5SDimitry Andric static bool isDirectJump(const MachineInstr &MI) { 324*0b57cec5SDimitry Andric return MI.getOpcode() == Hexagon::J2_jump; 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric static bool isSchedBarrier(const MachineInstr &MI) { 328*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 329*0b57cec5SDimitry Andric case Hexagon::Y2_barrier: 330*0b57cec5SDimitry Andric return true; 331*0b57cec5SDimitry Andric } 332*0b57cec5SDimitry Andric return false; 333*0b57cec5SDimitry Andric } 334*0b57cec5SDimitry Andric 335*0b57cec5SDimitry Andric static bool isControlFlow(const MachineInstr &MI) { 336*0b57cec5SDimitry Andric return MI.getDesc().isTerminator() || MI.getDesc().isCall(); 337*0b57cec5SDimitry Andric } 338*0b57cec5SDimitry Andric 339*0b57cec5SDimitry Andric /// Returns true if the instruction modifies a callee-saved register. 340*0b57cec5SDimitry Andric static bool doesModifyCalleeSavedReg(const MachineInstr &MI, 341*0b57cec5SDimitry Andric const TargetRegisterInfo *TRI) { 342*0b57cec5SDimitry Andric const MachineFunction &MF = *MI.getParent()->getParent(); 343*0b57cec5SDimitry Andric for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) 344*0b57cec5SDimitry Andric if (MI.modifiesRegister(*CSR, TRI)) 345*0b57cec5SDimitry Andric return true; 346*0b57cec5SDimitry Andric return false; 347*0b57cec5SDimitry Andric } 348*0b57cec5SDimitry Andric 349*0b57cec5SDimitry Andric // Returns true if an instruction can be promoted to .new predicate or 350*0b57cec5SDimitry Andric // new-value store. 351*0b57cec5SDimitry Andric bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI, 352*0b57cec5SDimitry Andric const TargetRegisterClass *NewRC) { 353*0b57cec5SDimitry Andric // Vector stores can be predicated, and can be new-value stores, but 354*0b57cec5SDimitry Andric // they cannot be predicated on a .new predicate value. 355*0b57cec5SDimitry Andric if (NewRC == &Hexagon::PredRegsRegClass) { 356*0b57cec5SDimitry Andric if (HII->isHVXVec(MI) && MI.mayStore()) 357*0b57cec5SDimitry Andric return false; 358*0b57cec5SDimitry Andric return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0; 359*0b57cec5SDimitry Andric } 360*0b57cec5SDimitry Andric // If the class is not PredRegs, it could only apply to new-value stores. 361*0b57cec5SDimitry Andric return HII->mayBeNewStore(MI); 362*0b57cec5SDimitry Andric } 363*0b57cec5SDimitry Andric 364*0b57cec5SDimitry Andric // Promote an instructiont to its .cur form. 365*0b57cec5SDimitry Andric // At this time, we have already made a call to canPromoteToDotCur and made 366*0b57cec5SDimitry Andric // sure that it can *indeed* be promoted. 367*0b57cec5SDimitry Andric bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI, 368*0b57cec5SDimitry Andric SDep::Kind DepType, MachineBasicBlock::iterator &MII, 369*0b57cec5SDimitry Andric const TargetRegisterClass* RC) { 370*0b57cec5SDimitry Andric assert(DepType == SDep::Data); 371*0b57cec5SDimitry Andric int CurOpcode = HII->getDotCurOp(MI); 372*0b57cec5SDimitry Andric MI.setDesc(HII->get(CurOpcode)); 373*0b57cec5SDimitry Andric return true; 374*0b57cec5SDimitry Andric } 375*0b57cec5SDimitry Andric 376*0b57cec5SDimitry Andric void HexagonPacketizerList::cleanUpDotCur() { 377*0b57cec5SDimitry Andric MachineInstr *MI = nullptr; 378*0b57cec5SDimitry Andric for (auto BI : CurrentPacketMIs) { 379*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); 380*0b57cec5SDimitry Andric if (HII->isDotCurInst(*BI)) { 381*0b57cec5SDimitry Andric MI = BI; 382*0b57cec5SDimitry Andric continue; 383*0b57cec5SDimitry Andric } 384*0b57cec5SDimitry Andric if (MI) { 385*0b57cec5SDimitry Andric for (auto &MO : BI->operands()) 386*0b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) 387*0b57cec5SDimitry Andric return; 388*0b57cec5SDimitry Andric } 389*0b57cec5SDimitry Andric } 390*0b57cec5SDimitry Andric if (!MI) 391*0b57cec5SDimitry Andric return; 392*0b57cec5SDimitry Andric // We did not find a use of the CUR, so de-cur it. 393*0b57cec5SDimitry Andric MI->setDesc(HII->get(HII->getNonDotCurOp(*MI))); 394*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump();); 395*0b57cec5SDimitry Andric } 396*0b57cec5SDimitry Andric 397*0b57cec5SDimitry Andric // Check to see if an instruction can be dot cur. 398*0b57cec5SDimitry Andric bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, 399*0b57cec5SDimitry Andric const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 400*0b57cec5SDimitry Andric const TargetRegisterClass *RC) { 401*0b57cec5SDimitry Andric if (!HII->isHVXVec(MI)) 402*0b57cec5SDimitry Andric return false; 403*0b57cec5SDimitry Andric if (!HII->isHVXVec(*MII)) 404*0b57cec5SDimitry Andric return false; 405*0b57cec5SDimitry Andric 406*0b57cec5SDimitry Andric // Already a dot new instruction. 407*0b57cec5SDimitry Andric if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) 408*0b57cec5SDimitry Andric return false; 409*0b57cec5SDimitry Andric 410*0b57cec5SDimitry Andric if (!HII->mayBeCurLoad(MI)) 411*0b57cec5SDimitry Andric return false; 412*0b57cec5SDimitry Andric 413*0b57cec5SDimitry Andric // The "cur value" cannot come from inline asm. 414*0b57cec5SDimitry Andric if (PacketSU->getInstr()->isInlineAsm()) 415*0b57cec5SDimitry Andric return false; 416*0b57cec5SDimitry Andric 417*0b57cec5SDimitry Andric // Make sure candidate instruction uses cur. 418*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump(); 419*0b57cec5SDimitry Andric dbgs() << "in packet\n";); 420*0b57cec5SDimitry Andric MachineInstr &MJ = *MII; 421*0b57cec5SDimitry Andric LLVM_DEBUG({ 422*0b57cec5SDimitry Andric dbgs() << "Checking CUR against "; 423*0b57cec5SDimitry Andric MJ.dump(); 424*0b57cec5SDimitry Andric }); 425*0b57cec5SDimitry Andric unsigned DestReg = MI.getOperand(0).getReg(); 426*0b57cec5SDimitry Andric bool FoundMatch = false; 427*0b57cec5SDimitry Andric for (auto &MO : MJ.operands()) 428*0b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == DestReg) 429*0b57cec5SDimitry Andric FoundMatch = true; 430*0b57cec5SDimitry Andric if (!FoundMatch) 431*0b57cec5SDimitry Andric return false; 432*0b57cec5SDimitry Andric 433*0b57cec5SDimitry Andric // Check for existing uses of a vector register within the packet which 434*0b57cec5SDimitry Andric // would be affected by converting a vector load into .cur formt. 435*0b57cec5SDimitry Andric for (auto BI : CurrentPacketMIs) { 436*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "packet has "; BI->dump();); 437*0b57cec5SDimitry Andric if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) 438*0b57cec5SDimitry Andric return false; 439*0b57cec5SDimitry Andric } 440*0b57cec5SDimitry Andric 441*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump();); 442*0b57cec5SDimitry Andric // We can convert the opcode into a .cur. 443*0b57cec5SDimitry Andric return true; 444*0b57cec5SDimitry Andric } 445*0b57cec5SDimitry Andric 446*0b57cec5SDimitry Andric // Promote an instruction to its .new form. At this time, we have already 447*0b57cec5SDimitry Andric // made a call to canPromoteToDotNew and made sure that it can *indeed* be 448*0b57cec5SDimitry Andric // promoted. 449*0b57cec5SDimitry Andric bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI, 450*0b57cec5SDimitry Andric SDep::Kind DepType, MachineBasicBlock::iterator &MII, 451*0b57cec5SDimitry Andric const TargetRegisterClass* RC) { 452*0b57cec5SDimitry Andric assert(DepType == SDep::Data); 453*0b57cec5SDimitry Andric int NewOpcode; 454*0b57cec5SDimitry Andric if (RC == &Hexagon::PredRegsRegClass) 455*0b57cec5SDimitry Andric NewOpcode = HII->getDotNewPredOp(MI, MBPI); 456*0b57cec5SDimitry Andric else 457*0b57cec5SDimitry Andric NewOpcode = HII->getDotNewOp(MI); 458*0b57cec5SDimitry Andric MI.setDesc(HII->get(NewOpcode)); 459*0b57cec5SDimitry Andric return true; 460*0b57cec5SDimitry Andric } 461*0b57cec5SDimitry Andric 462*0b57cec5SDimitry Andric bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) { 463*0b57cec5SDimitry Andric int NewOpcode = HII->getDotOldOp(MI); 464*0b57cec5SDimitry Andric MI.setDesc(HII->get(NewOpcode)); 465*0b57cec5SDimitry Andric return true; 466*0b57cec5SDimitry Andric } 467*0b57cec5SDimitry Andric 468*0b57cec5SDimitry Andric bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) { 469*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 470*0b57cec5SDimitry Andric switch (Opc) { 471*0b57cec5SDimitry Andric case Hexagon::S2_storerd_io: 472*0b57cec5SDimitry Andric case Hexagon::S2_storeri_io: 473*0b57cec5SDimitry Andric case Hexagon::S2_storerh_io: 474*0b57cec5SDimitry Andric case Hexagon::S2_storerb_io: 475*0b57cec5SDimitry Andric break; 476*0b57cec5SDimitry Andric default: 477*0b57cec5SDimitry Andric llvm_unreachable("Unexpected instruction"); 478*0b57cec5SDimitry Andric } 479*0b57cec5SDimitry Andric unsigned FrameSize = MF.getFrameInfo().getStackSize(); 480*0b57cec5SDimitry Andric MachineOperand &Off = MI.getOperand(1); 481*0b57cec5SDimitry Andric int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE); 482*0b57cec5SDimitry Andric if (HII->isValidOffset(Opc, NewOff, HRI)) { 483*0b57cec5SDimitry Andric Off.setImm(NewOff); 484*0b57cec5SDimitry Andric return true; 485*0b57cec5SDimitry Andric } 486*0b57cec5SDimitry Andric return false; 487*0b57cec5SDimitry Andric } 488*0b57cec5SDimitry Andric 489*0b57cec5SDimitry Andric void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) { 490*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 491*0b57cec5SDimitry Andric switch (Opc) { 492*0b57cec5SDimitry Andric case Hexagon::S2_storerd_io: 493*0b57cec5SDimitry Andric case Hexagon::S2_storeri_io: 494*0b57cec5SDimitry Andric case Hexagon::S2_storerh_io: 495*0b57cec5SDimitry Andric case Hexagon::S2_storerb_io: 496*0b57cec5SDimitry Andric break; 497*0b57cec5SDimitry Andric default: 498*0b57cec5SDimitry Andric llvm_unreachable("Unexpected instruction"); 499*0b57cec5SDimitry Andric } 500*0b57cec5SDimitry Andric unsigned FrameSize = MF.getFrameInfo().getStackSize(); 501*0b57cec5SDimitry Andric MachineOperand &Off = MI.getOperand(1); 502*0b57cec5SDimitry Andric Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE); 503*0b57cec5SDimitry Andric } 504*0b57cec5SDimitry Andric 505*0b57cec5SDimitry Andric /// Return true if we can update the offset in MI so that MI and MJ 506*0b57cec5SDimitry Andric /// can be packetized together. 507*0b57cec5SDimitry Andric bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) { 508*0b57cec5SDimitry Andric assert(SUI->getInstr() && SUJ->getInstr()); 509*0b57cec5SDimitry Andric MachineInstr &MI = *SUI->getInstr(); 510*0b57cec5SDimitry Andric MachineInstr &MJ = *SUJ->getInstr(); 511*0b57cec5SDimitry Andric 512*0b57cec5SDimitry Andric unsigned BPI, OPI; 513*0b57cec5SDimitry Andric if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI)) 514*0b57cec5SDimitry Andric return false; 515*0b57cec5SDimitry Andric unsigned BPJ, OPJ; 516*0b57cec5SDimitry Andric if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ)) 517*0b57cec5SDimitry Andric return false; 518*0b57cec5SDimitry Andric unsigned Reg = MI.getOperand(BPI).getReg(); 519*0b57cec5SDimitry Andric if (Reg != MJ.getOperand(BPJ).getReg()) 520*0b57cec5SDimitry Andric return false; 521*0b57cec5SDimitry Andric // Make sure that the dependences do not restrict adding MI to the packet. 522*0b57cec5SDimitry Andric // That is, ignore anti dependences, and make sure the only data dependence 523*0b57cec5SDimitry Andric // involves the specific register. 524*0b57cec5SDimitry Andric for (const auto &PI : SUI->Preds) 525*0b57cec5SDimitry Andric if (PI.getKind() != SDep::Anti && 526*0b57cec5SDimitry Andric (PI.getKind() != SDep::Data || PI.getReg() != Reg)) 527*0b57cec5SDimitry Andric return false; 528*0b57cec5SDimitry Andric int Incr; 529*0b57cec5SDimitry Andric if (!HII->getIncrementValue(MJ, Incr)) 530*0b57cec5SDimitry Andric return false; 531*0b57cec5SDimitry Andric 532*0b57cec5SDimitry Andric int64_t Offset = MI.getOperand(OPI).getImm(); 533*0b57cec5SDimitry Andric if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI)) 534*0b57cec5SDimitry Andric return false; 535*0b57cec5SDimitry Andric 536*0b57cec5SDimitry Andric MI.getOperand(OPI).setImm(Offset + Incr); 537*0b57cec5SDimitry Andric ChangedOffset = Offset; 538*0b57cec5SDimitry Andric return true; 539*0b57cec5SDimitry Andric } 540*0b57cec5SDimitry Andric 541*0b57cec5SDimitry Andric /// Undo the changed offset. This is needed if the instruction cannot be 542*0b57cec5SDimitry Andric /// added to the current packet due to a different instruction. 543*0b57cec5SDimitry Andric void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) { 544*0b57cec5SDimitry Andric unsigned BP, OP; 545*0b57cec5SDimitry Andric if (!HII->getBaseAndOffsetPosition(MI, BP, OP)) 546*0b57cec5SDimitry Andric llvm_unreachable("Unable to find base and offset operands."); 547*0b57cec5SDimitry Andric MI.getOperand(OP).setImm(ChangedOffset); 548*0b57cec5SDimitry Andric } 549*0b57cec5SDimitry Andric 550*0b57cec5SDimitry Andric enum PredicateKind { 551*0b57cec5SDimitry Andric PK_False, 552*0b57cec5SDimitry Andric PK_True, 553*0b57cec5SDimitry Andric PK_Unknown 554*0b57cec5SDimitry Andric }; 555*0b57cec5SDimitry Andric 556*0b57cec5SDimitry Andric /// Returns true if an instruction is predicated on p0 and false if it's 557*0b57cec5SDimitry Andric /// predicated on !p0. 558*0b57cec5SDimitry Andric static PredicateKind getPredicateSense(const MachineInstr &MI, 559*0b57cec5SDimitry Andric const HexagonInstrInfo *HII) { 560*0b57cec5SDimitry Andric if (!HII->isPredicated(MI)) 561*0b57cec5SDimitry Andric return PK_Unknown; 562*0b57cec5SDimitry Andric if (HII->isPredicatedTrue(MI)) 563*0b57cec5SDimitry Andric return PK_True; 564*0b57cec5SDimitry Andric return PK_False; 565*0b57cec5SDimitry Andric } 566*0b57cec5SDimitry Andric 567*0b57cec5SDimitry Andric static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI, 568*0b57cec5SDimitry Andric const HexagonInstrInfo *HII) { 569*0b57cec5SDimitry Andric assert(HII->isPostIncrement(MI) && "Not a post increment operation."); 570*0b57cec5SDimitry Andric #ifndef NDEBUG 571*0b57cec5SDimitry Andric // Post Increment means duplicates. Use dense map to find duplicates in the 572*0b57cec5SDimitry Andric // list. Caution: Densemap initializes with the minimum of 64 buckets, 573*0b57cec5SDimitry Andric // whereas there are at most 5 operands in the post increment. 574*0b57cec5SDimitry Andric DenseSet<unsigned> DefRegsSet; 575*0b57cec5SDimitry Andric for (auto &MO : MI.operands()) 576*0b57cec5SDimitry Andric if (MO.isReg() && MO.isDef()) 577*0b57cec5SDimitry Andric DefRegsSet.insert(MO.getReg()); 578*0b57cec5SDimitry Andric 579*0b57cec5SDimitry Andric for (auto &MO : MI.operands()) 580*0b57cec5SDimitry Andric if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) 581*0b57cec5SDimitry Andric return MO; 582*0b57cec5SDimitry Andric #else 583*0b57cec5SDimitry Andric if (MI.mayLoad()) { 584*0b57cec5SDimitry Andric const MachineOperand &Op1 = MI.getOperand(1); 585*0b57cec5SDimitry Andric // The 2nd operand is always the post increment operand in load. 586*0b57cec5SDimitry Andric assert(Op1.isReg() && "Post increment operand has be to a register."); 587*0b57cec5SDimitry Andric return Op1; 588*0b57cec5SDimitry Andric } 589*0b57cec5SDimitry Andric if (MI.getDesc().mayStore()) { 590*0b57cec5SDimitry Andric const MachineOperand &Op0 = MI.getOperand(0); 591*0b57cec5SDimitry Andric // The 1st operand is always the post increment operand in store. 592*0b57cec5SDimitry Andric assert(Op0.isReg() && "Post increment operand has be to a register."); 593*0b57cec5SDimitry Andric return Op0; 594*0b57cec5SDimitry Andric } 595*0b57cec5SDimitry Andric #endif 596*0b57cec5SDimitry Andric // we should never come here. 597*0b57cec5SDimitry Andric llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); 598*0b57cec5SDimitry Andric } 599*0b57cec5SDimitry Andric 600*0b57cec5SDimitry Andric // Get the value being stored. 601*0b57cec5SDimitry Andric static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) { 602*0b57cec5SDimitry Andric // value being stored is always the last operand. 603*0b57cec5SDimitry Andric return MI.getOperand(MI.getNumOperands()-1); 604*0b57cec5SDimitry Andric } 605*0b57cec5SDimitry Andric 606*0b57cec5SDimitry Andric static bool isLoadAbsSet(const MachineInstr &MI) { 607*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 608*0b57cec5SDimitry Andric switch (Opc) { 609*0b57cec5SDimitry Andric case Hexagon::L4_loadrd_ap: 610*0b57cec5SDimitry Andric case Hexagon::L4_loadrb_ap: 611*0b57cec5SDimitry Andric case Hexagon::L4_loadrh_ap: 612*0b57cec5SDimitry Andric case Hexagon::L4_loadrub_ap: 613*0b57cec5SDimitry Andric case Hexagon::L4_loadruh_ap: 614*0b57cec5SDimitry Andric case Hexagon::L4_loadri_ap: 615*0b57cec5SDimitry Andric return true; 616*0b57cec5SDimitry Andric } 617*0b57cec5SDimitry Andric return false; 618*0b57cec5SDimitry Andric } 619*0b57cec5SDimitry Andric 620*0b57cec5SDimitry Andric static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) { 621*0b57cec5SDimitry Andric assert(isLoadAbsSet(MI)); 622*0b57cec5SDimitry Andric return MI.getOperand(1); 623*0b57cec5SDimitry Andric } 624*0b57cec5SDimitry Andric 625*0b57cec5SDimitry Andric // Can be new value store? 626*0b57cec5SDimitry Andric // Following restrictions are to be respected in convert a store into 627*0b57cec5SDimitry Andric // a new value store. 628*0b57cec5SDimitry Andric // 1. If an instruction uses auto-increment, its address register cannot 629*0b57cec5SDimitry Andric // be a new-value register. Arch Spec 5.4.2.1 630*0b57cec5SDimitry Andric // 2. If an instruction uses absolute-set addressing mode, its address 631*0b57cec5SDimitry Andric // register cannot be a new-value register. Arch Spec 5.4.2.1. 632*0b57cec5SDimitry Andric // 3. If an instruction produces a 64-bit result, its registers cannot be used 633*0b57cec5SDimitry Andric // as new-value registers. Arch Spec 5.4.2.2. 634*0b57cec5SDimitry Andric // 4. If the instruction that sets the new-value register is conditional, then 635*0b57cec5SDimitry Andric // the instruction that uses the new-value register must also be conditional, 636*0b57cec5SDimitry Andric // and both must always have their predicates evaluate identically. 637*0b57cec5SDimitry Andric // Arch Spec 5.4.2.3. 638*0b57cec5SDimitry Andric // 5. There is an implied restriction that a packet cannot have another store, 639*0b57cec5SDimitry Andric // if there is a new value store in the packet. Corollary: if there is 640*0b57cec5SDimitry Andric // already a store in a packet, there can not be a new value store. 641*0b57cec5SDimitry Andric // Arch Spec: 3.4.4.2 642*0b57cec5SDimitry Andric bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI, 643*0b57cec5SDimitry Andric const MachineInstr &PacketMI, unsigned DepReg) { 644*0b57cec5SDimitry Andric // Make sure we are looking at the store, that can be promoted. 645*0b57cec5SDimitry Andric if (!HII->mayBeNewStore(MI)) 646*0b57cec5SDimitry Andric return false; 647*0b57cec5SDimitry Andric 648*0b57cec5SDimitry Andric // Make sure there is dependency and can be new value'd. 649*0b57cec5SDimitry Andric const MachineOperand &Val = getStoreValueOperand(MI); 650*0b57cec5SDimitry Andric if (Val.isReg() && Val.getReg() != DepReg) 651*0b57cec5SDimitry Andric return false; 652*0b57cec5SDimitry Andric 653*0b57cec5SDimitry Andric const MCInstrDesc& MCID = PacketMI.getDesc(); 654*0b57cec5SDimitry Andric 655*0b57cec5SDimitry Andric // First operand is always the result. 656*0b57cec5SDimitry Andric const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); 657*0b57cec5SDimitry Andric // Double regs can not feed into new value store: PRM section: 5.4.2.2. 658*0b57cec5SDimitry Andric if (PacketRC == &Hexagon::DoubleRegsRegClass) 659*0b57cec5SDimitry Andric return false; 660*0b57cec5SDimitry Andric 661*0b57cec5SDimitry Andric // New-value stores are of class NV (slot 0), dual stores require class ST 662*0b57cec5SDimitry Andric // in slot 0 (PRM 5.5). 663*0b57cec5SDimitry Andric for (auto I : CurrentPacketMIs) { 664*0b57cec5SDimitry Andric SUnit *PacketSU = MIToSUnit.find(I)->second; 665*0b57cec5SDimitry Andric if (PacketSU->getInstr()->mayStore()) 666*0b57cec5SDimitry Andric return false; 667*0b57cec5SDimitry Andric } 668*0b57cec5SDimitry Andric 669*0b57cec5SDimitry Andric // Make sure it's NOT the post increment register that we are going to 670*0b57cec5SDimitry Andric // new value. 671*0b57cec5SDimitry Andric if (HII->isPostIncrement(MI) && 672*0b57cec5SDimitry Andric getPostIncrementOperand(MI, HII).getReg() == DepReg) { 673*0b57cec5SDimitry Andric return false; 674*0b57cec5SDimitry Andric } 675*0b57cec5SDimitry Andric 676*0b57cec5SDimitry Andric if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() && 677*0b57cec5SDimitry Andric getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { 678*0b57cec5SDimitry Andric // If source is post_inc, or absolute-set addressing, it can not feed 679*0b57cec5SDimitry Andric // into new value store 680*0b57cec5SDimitry Andric // r3 = memw(r2++#4) 681*0b57cec5SDimitry Andric // memw(r30 + #-1404) = r2.new -> can not be new value store 682*0b57cec5SDimitry Andric // arch spec section: 5.4.2.1. 683*0b57cec5SDimitry Andric return false; 684*0b57cec5SDimitry Andric } 685*0b57cec5SDimitry Andric 686*0b57cec5SDimitry Andric if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) 687*0b57cec5SDimitry Andric return false; 688*0b57cec5SDimitry Andric 689*0b57cec5SDimitry Andric // If the source that feeds the store is predicated, new value store must 690*0b57cec5SDimitry Andric // also be predicated. 691*0b57cec5SDimitry Andric if (HII->isPredicated(PacketMI)) { 692*0b57cec5SDimitry Andric if (!HII->isPredicated(MI)) 693*0b57cec5SDimitry Andric return false; 694*0b57cec5SDimitry Andric 695*0b57cec5SDimitry Andric // Check to make sure that they both will have their predicates 696*0b57cec5SDimitry Andric // evaluate identically. 697*0b57cec5SDimitry Andric unsigned predRegNumSrc = 0; 698*0b57cec5SDimitry Andric unsigned predRegNumDst = 0; 699*0b57cec5SDimitry Andric const TargetRegisterClass* predRegClass = nullptr; 700*0b57cec5SDimitry Andric 701*0b57cec5SDimitry Andric // Get predicate register used in the source instruction. 702*0b57cec5SDimitry Andric for (auto &MO : PacketMI.operands()) { 703*0b57cec5SDimitry Andric if (!MO.isReg()) 704*0b57cec5SDimitry Andric continue; 705*0b57cec5SDimitry Andric predRegNumSrc = MO.getReg(); 706*0b57cec5SDimitry Andric predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); 707*0b57cec5SDimitry Andric if (predRegClass == &Hexagon::PredRegsRegClass) 708*0b57cec5SDimitry Andric break; 709*0b57cec5SDimitry Andric } 710*0b57cec5SDimitry Andric assert((predRegClass == &Hexagon::PredRegsRegClass) && 711*0b57cec5SDimitry Andric "predicate register not found in a predicated PacketMI instruction"); 712*0b57cec5SDimitry Andric 713*0b57cec5SDimitry Andric // Get predicate register used in new-value store instruction. 714*0b57cec5SDimitry Andric for (auto &MO : MI.operands()) { 715*0b57cec5SDimitry Andric if (!MO.isReg()) 716*0b57cec5SDimitry Andric continue; 717*0b57cec5SDimitry Andric predRegNumDst = MO.getReg(); 718*0b57cec5SDimitry Andric predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); 719*0b57cec5SDimitry Andric if (predRegClass == &Hexagon::PredRegsRegClass) 720*0b57cec5SDimitry Andric break; 721*0b57cec5SDimitry Andric } 722*0b57cec5SDimitry Andric assert((predRegClass == &Hexagon::PredRegsRegClass) && 723*0b57cec5SDimitry Andric "predicate register not found in a predicated MI instruction"); 724*0b57cec5SDimitry Andric 725*0b57cec5SDimitry Andric // New-value register producer and user (store) need to satisfy these 726*0b57cec5SDimitry Andric // constraints: 727*0b57cec5SDimitry Andric // 1) Both instructions should be predicated on the same register. 728*0b57cec5SDimitry Andric // 2) If producer of the new-value register is .new predicated then store 729*0b57cec5SDimitry Andric // should also be .new predicated and if producer is not .new predicated 730*0b57cec5SDimitry Andric // then store should not be .new predicated. 731*0b57cec5SDimitry Andric // 3) Both new-value register producer and user should have same predicate 732*0b57cec5SDimitry Andric // sense, i.e, either both should be negated or both should be non-negated. 733*0b57cec5SDimitry Andric if (predRegNumDst != predRegNumSrc || 734*0b57cec5SDimitry Andric HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || 735*0b57cec5SDimitry Andric getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) 736*0b57cec5SDimitry Andric return false; 737*0b57cec5SDimitry Andric } 738*0b57cec5SDimitry Andric 739*0b57cec5SDimitry Andric // Make sure that other than the new-value register no other store instruction 740*0b57cec5SDimitry Andric // register has been modified in the same packet. Predicate registers can be 741*0b57cec5SDimitry Andric // modified by they should not be modified between the producer and the store 742*0b57cec5SDimitry Andric // instruction as it will make them both conditional on different values. 743*0b57cec5SDimitry Andric // We already know this to be true for all the instructions before and 744*0b57cec5SDimitry Andric // including PacketMI. Howerver, we need to perform the check for the 745*0b57cec5SDimitry Andric // remaining instructions in the packet. 746*0b57cec5SDimitry Andric 747*0b57cec5SDimitry Andric unsigned StartCheck = 0; 748*0b57cec5SDimitry Andric 749*0b57cec5SDimitry Andric for (auto I : CurrentPacketMIs) { 750*0b57cec5SDimitry Andric SUnit *TempSU = MIToSUnit.find(I)->second; 751*0b57cec5SDimitry Andric MachineInstr &TempMI = *TempSU->getInstr(); 752*0b57cec5SDimitry Andric 753*0b57cec5SDimitry Andric // Following condition is true for all the instructions until PacketMI is 754*0b57cec5SDimitry Andric // reached (StartCheck is set to 0 before the for loop). 755*0b57cec5SDimitry Andric // StartCheck flag is 1 for all the instructions after PacketMI. 756*0b57cec5SDimitry Andric if (&TempMI != &PacketMI && !StartCheck) // Start processing only after 757*0b57cec5SDimitry Andric continue; // encountering PacketMI. 758*0b57cec5SDimitry Andric 759*0b57cec5SDimitry Andric StartCheck = 1; 760*0b57cec5SDimitry Andric if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence. 761*0b57cec5SDimitry Andric continue; 762*0b57cec5SDimitry Andric 763*0b57cec5SDimitry Andric for (auto &MO : MI.operands()) 764*0b57cec5SDimitry Andric if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) 765*0b57cec5SDimitry Andric return false; 766*0b57cec5SDimitry Andric } 767*0b57cec5SDimitry Andric 768*0b57cec5SDimitry Andric // Make sure that for non-POST_INC stores: 769*0b57cec5SDimitry Andric // 1. The only use of reg is DepReg and no other registers. 770*0b57cec5SDimitry Andric // This handles base+index registers. 771*0b57cec5SDimitry Andric // The following store can not be dot new. 772*0b57cec5SDimitry Andric // Eg. r0 = add(r0, #3) 773*0b57cec5SDimitry Andric // memw(r1+r0<<#2) = r0 774*0b57cec5SDimitry Andric if (!HII->isPostIncrement(MI)) { 775*0b57cec5SDimitry Andric for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) { 776*0b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(opNum); 777*0b57cec5SDimitry Andric if (MO.isReg() && MO.getReg() == DepReg) 778*0b57cec5SDimitry Andric return false; 779*0b57cec5SDimitry Andric } 780*0b57cec5SDimitry Andric } 781*0b57cec5SDimitry Andric 782*0b57cec5SDimitry Andric // If data definition is because of implicit definition of the register, 783*0b57cec5SDimitry Andric // do not newify the store. Eg. 784*0b57cec5SDimitry Andric // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12 785*0b57cec5SDimitry Andric // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343] 786*0b57cec5SDimitry Andric for (auto &MO : PacketMI.operands()) { 787*0b57cec5SDimitry Andric if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) 788*0b57cec5SDimitry Andric return false; 789*0b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) 790*0b57cec5SDimitry Andric continue; 791*0b57cec5SDimitry Andric unsigned R = MO.getReg(); 792*0b57cec5SDimitry Andric if (R == DepReg || HRI->isSuperRegister(DepReg, R)) 793*0b57cec5SDimitry Andric return false; 794*0b57cec5SDimitry Andric } 795*0b57cec5SDimitry Andric 796*0b57cec5SDimitry Andric // Handle imp-use of super reg case. There is a target independent side 797*0b57cec5SDimitry Andric // change that should prevent this situation but I am handling it for 798*0b57cec5SDimitry Andric // just-in-case. For example, we cannot newify R2 in the following case: 799*0b57cec5SDimitry Andric // %r3 = A2_tfrsi 0; 800*0b57cec5SDimitry Andric // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1; 801*0b57cec5SDimitry Andric for (auto &MO : MI.operands()) { 802*0b57cec5SDimitry Andric if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) 803*0b57cec5SDimitry Andric return false; 804*0b57cec5SDimitry Andric } 805*0b57cec5SDimitry Andric 806*0b57cec5SDimitry Andric // Can be dot new store. 807*0b57cec5SDimitry Andric return true; 808*0b57cec5SDimitry Andric } 809*0b57cec5SDimitry Andric 810*0b57cec5SDimitry Andric // Can this MI to promoted to either new value store or new value jump. 811*0b57cec5SDimitry Andric bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI, 812*0b57cec5SDimitry Andric const SUnit *PacketSU, unsigned DepReg, 813*0b57cec5SDimitry Andric MachineBasicBlock::iterator &MII) { 814*0b57cec5SDimitry Andric if (!HII->mayBeNewStore(MI)) 815*0b57cec5SDimitry Andric return false; 816*0b57cec5SDimitry Andric 817*0b57cec5SDimitry Andric // Check to see the store can be new value'ed. 818*0b57cec5SDimitry Andric MachineInstr &PacketMI = *PacketSU->getInstr(); 819*0b57cec5SDimitry Andric if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) 820*0b57cec5SDimitry Andric return true; 821*0b57cec5SDimitry Andric 822*0b57cec5SDimitry Andric // Check to see the compare/jump can be new value'ed. 823*0b57cec5SDimitry Andric // This is done as a pass on its own. Don't need to check it here. 824*0b57cec5SDimitry Andric return false; 825*0b57cec5SDimitry Andric } 826*0b57cec5SDimitry Andric 827*0b57cec5SDimitry Andric static bool isImplicitDependency(const MachineInstr &I, bool CheckDef, 828*0b57cec5SDimitry Andric unsigned DepReg) { 829*0b57cec5SDimitry Andric for (auto &MO : I.operands()) { 830*0b57cec5SDimitry Andric if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg)) 831*0b57cec5SDimitry Andric return true; 832*0b57cec5SDimitry Andric if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit()) 833*0b57cec5SDimitry Andric continue; 834*0b57cec5SDimitry Andric if (CheckDef == MO.isDef()) 835*0b57cec5SDimitry Andric return true; 836*0b57cec5SDimitry Andric } 837*0b57cec5SDimitry Andric return false; 838*0b57cec5SDimitry Andric } 839*0b57cec5SDimitry Andric 840*0b57cec5SDimitry Andric // Check to see if an instruction can be dot new. 841*0b57cec5SDimitry Andric bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, 842*0b57cec5SDimitry Andric const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 843*0b57cec5SDimitry Andric const TargetRegisterClass* RC) { 844*0b57cec5SDimitry Andric // Already a dot new instruction. 845*0b57cec5SDimitry Andric if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) 846*0b57cec5SDimitry Andric return false; 847*0b57cec5SDimitry Andric 848*0b57cec5SDimitry Andric if (!isNewifiable(MI, RC)) 849*0b57cec5SDimitry Andric return false; 850*0b57cec5SDimitry Andric 851*0b57cec5SDimitry Andric const MachineInstr &PI = *PacketSU->getInstr(); 852*0b57cec5SDimitry Andric 853*0b57cec5SDimitry Andric // The "new value" cannot come from inline asm. 854*0b57cec5SDimitry Andric if (PI.isInlineAsm()) 855*0b57cec5SDimitry Andric return false; 856*0b57cec5SDimitry Andric 857*0b57cec5SDimitry Andric // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no 858*0b57cec5SDimitry Andric // sense. 859*0b57cec5SDimitry Andric if (PI.isImplicitDef()) 860*0b57cec5SDimitry Andric return false; 861*0b57cec5SDimitry Andric 862*0b57cec5SDimitry Andric // If dependency is trough an implicitly defined register, we should not 863*0b57cec5SDimitry Andric // newify the use. 864*0b57cec5SDimitry Andric if (isImplicitDependency(PI, true, DepReg) || 865*0b57cec5SDimitry Andric isImplicitDependency(MI, false, DepReg)) 866*0b57cec5SDimitry Andric return false; 867*0b57cec5SDimitry Andric 868*0b57cec5SDimitry Andric const MCInstrDesc& MCID = PI.getDesc(); 869*0b57cec5SDimitry Andric const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); 870*0b57cec5SDimitry Andric if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass) 871*0b57cec5SDimitry Andric return false; 872*0b57cec5SDimitry Andric 873*0b57cec5SDimitry Andric // predicate .new 874*0b57cec5SDimitry Andric if (RC == &Hexagon::PredRegsRegClass) 875*0b57cec5SDimitry Andric return HII->predCanBeUsedAsDotNew(PI, DepReg); 876*0b57cec5SDimitry Andric 877*0b57cec5SDimitry Andric if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) 878*0b57cec5SDimitry Andric return false; 879*0b57cec5SDimitry Andric 880*0b57cec5SDimitry Andric // Create a dot new machine instruction to see if resources can be 881*0b57cec5SDimitry Andric // allocated. If not, bail out now. 882*0b57cec5SDimitry Andric int NewOpcode = HII->getDotNewOp(MI); 883*0b57cec5SDimitry Andric const MCInstrDesc &D = HII->get(NewOpcode); 884*0b57cec5SDimitry Andric MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); 885*0b57cec5SDimitry Andric bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI); 886*0b57cec5SDimitry Andric MF.DeleteMachineInstr(NewMI); 887*0b57cec5SDimitry Andric if (!ResourcesAvailable) 888*0b57cec5SDimitry Andric return false; 889*0b57cec5SDimitry Andric 890*0b57cec5SDimitry Andric // New Value Store only. New Value Jump generated as a separate pass. 891*0b57cec5SDimitry Andric if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) 892*0b57cec5SDimitry Andric return false; 893*0b57cec5SDimitry Andric 894*0b57cec5SDimitry Andric return true; 895*0b57cec5SDimitry Andric } 896*0b57cec5SDimitry Andric 897*0b57cec5SDimitry Andric // Go through the packet instructions and search for an anti dependency between 898*0b57cec5SDimitry Andric // them and DepReg from MI. Consider this case: 899*0b57cec5SDimitry Andric // Trying to add 900*0b57cec5SDimitry Andric // a) %r1 = TFRI_cdNotPt %p3, 2 901*0b57cec5SDimitry Andric // to this packet: 902*0b57cec5SDimitry Andric // { 903*0b57cec5SDimitry Andric // b) %p0 = C2_or killed %p3, killed %p0 904*0b57cec5SDimitry Andric // c) %p3 = C2_tfrrp %r23 905*0b57cec5SDimitry Andric // d) %r1 = C2_cmovenewit %p3, 4 906*0b57cec5SDimitry Andric // } 907*0b57cec5SDimitry Andric // The P3 from a) and d) will be complements after 908*0b57cec5SDimitry Andric // a)'s P3 is converted to .new form 909*0b57cec5SDimitry Andric // Anti-dep between c) and b) is irrelevant for this case 910*0b57cec5SDimitry Andric bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI, 911*0b57cec5SDimitry Andric unsigned DepReg) { 912*0b57cec5SDimitry Andric SUnit *PacketSUDep = MIToSUnit.find(&MI)->second; 913*0b57cec5SDimitry Andric 914*0b57cec5SDimitry Andric for (auto I : CurrentPacketMIs) { 915*0b57cec5SDimitry Andric // We only care for dependencies to predicated instructions 916*0b57cec5SDimitry Andric if (!HII->isPredicated(*I)) 917*0b57cec5SDimitry Andric continue; 918*0b57cec5SDimitry Andric 919*0b57cec5SDimitry Andric // Scheduling Unit for current insn in the packet 920*0b57cec5SDimitry Andric SUnit *PacketSU = MIToSUnit.find(I)->second; 921*0b57cec5SDimitry Andric 922*0b57cec5SDimitry Andric // Look at dependencies between current members of the packet and 923*0b57cec5SDimitry Andric // predicate defining instruction MI. Make sure that dependency is 924*0b57cec5SDimitry Andric // on the exact register we care about. 925*0b57cec5SDimitry Andric if (PacketSU->isSucc(PacketSUDep)) { 926*0b57cec5SDimitry Andric for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 927*0b57cec5SDimitry Andric auto &Dep = PacketSU->Succs[i]; 928*0b57cec5SDimitry Andric if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && 929*0b57cec5SDimitry Andric Dep.getReg() == DepReg) 930*0b57cec5SDimitry Andric return true; 931*0b57cec5SDimitry Andric } 932*0b57cec5SDimitry Andric } 933*0b57cec5SDimitry Andric } 934*0b57cec5SDimitry Andric 935*0b57cec5SDimitry Andric return false; 936*0b57cec5SDimitry Andric } 937*0b57cec5SDimitry Andric 938*0b57cec5SDimitry Andric /// Gets the predicate register of a predicated instruction. 939*0b57cec5SDimitry Andric static unsigned getPredicatedRegister(MachineInstr &MI, 940*0b57cec5SDimitry Andric const HexagonInstrInfo *QII) { 941*0b57cec5SDimitry Andric /// We use the following rule: The first predicate register that is a use is 942*0b57cec5SDimitry Andric /// the predicate register of a predicated instruction. 943*0b57cec5SDimitry Andric assert(QII->isPredicated(MI) && "Must be predicated instruction"); 944*0b57cec5SDimitry Andric 945*0b57cec5SDimitry Andric for (auto &Op : MI.operands()) { 946*0b57cec5SDimitry Andric if (Op.isReg() && Op.getReg() && Op.isUse() && 947*0b57cec5SDimitry Andric Hexagon::PredRegsRegClass.contains(Op.getReg())) 948*0b57cec5SDimitry Andric return Op.getReg(); 949*0b57cec5SDimitry Andric } 950*0b57cec5SDimitry Andric 951*0b57cec5SDimitry Andric llvm_unreachable("Unknown instruction operand layout"); 952*0b57cec5SDimitry Andric return 0; 953*0b57cec5SDimitry Andric } 954*0b57cec5SDimitry Andric 955*0b57cec5SDimitry Andric // Given two predicated instructions, this function detects whether 956*0b57cec5SDimitry Andric // the predicates are complements. 957*0b57cec5SDimitry Andric bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, 958*0b57cec5SDimitry Andric MachineInstr &MI2) { 959*0b57cec5SDimitry Andric // If we don't know the predicate sense of the instructions bail out early, we 960*0b57cec5SDimitry Andric // need it later. 961*0b57cec5SDimitry Andric if (getPredicateSense(MI1, HII) == PK_Unknown || 962*0b57cec5SDimitry Andric getPredicateSense(MI2, HII) == PK_Unknown) 963*0b57cec5SDimitry Andric return false; 964*0b57cec5SDimitry Andric 965*0b57cec5SDimitry Andric // Scheduling unit for candidate. 966*0b57cec5SDimitry Andric SUnit *SU = MIToSUnit[&MI1]; 967*0b57cec5SDimitry Andric 968*0b57cec5SDimitry Andric // One corner case deals with the following scenario: 969*0b57cec5SDimitry Andric // Trying to add 970*0b57cec5SDimitry Andric // a) %r24 = A2_tfrt %p0, %r25 971*0b57cec5SDimitry Andric // to this packet: 972*0b57cec5SDimitry Andric // { 973*0b57cec5SDimitry Andric // b) %r25 = A2_tfrf %p0, %r24 974*0b57cec5SDimitry Andric // c) %p0 = C2_cmpeqi %r26, 1 975*0b57cec5SDimitry Andric // } 976*0b57cec5SDimitry Andric // 977*0b57cec5SDimitry Andric // On general check a) and b) are complements, but presence of c) will 978*0b57cec5SDimitry Andric // convert a) to .new form, and then it is not a complement. 979*0b57cec5SDimitry Andric // We attempt to detect it by analyzing existing dependencies in the packet. 980*0b57cec5SDimitry Andric 981*0b57cec5SDimitry Andric // Analyze relationships between all existing members of the packet. 982*0b57cec5SDimitry Andric // Look for Anti dependecy on the same predicate reg as used in the 983*0b57cec5SDimitry Andric // candidate. 984*0b57cec5SDimitry Andric for (auto I : CurrentPacketMIs) { 985*0b57cec5SDimitry Andric // Scheduling Unit for current insn in the packet. 986*0b57cec5SDimitry Andric SUnit *PacketSU = MIToSUnit.find(I)->second; 987*0b57cec5SDimitry Andric 988*0b57cec5SDimitry Andric // If this instruction in the packet is succeeded by the candidate... 989*0b57cec5SDimitry Andric if (PacketSU->isSucc(SU)) { 990*0b57cec5SDimitry Andric for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 991*0b57cec5SDimitry Andric auto Dep = PacketSU->Succs[i]; 992*0b57cec5SDimitry Andric // The corner case exist when there is true data dependency between 993*0b57cec5SDimitry Andric // candidate and one of current packet members, this dep is on 994*0b57cec5SDimitry Andric // predicate reg, and there already exist anti dep on the same pred in 995*0b57cec5SDimitry Andric // the packet. 996*0b57cec5SDimitry Andric if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && 997*0b57cec5SDimitry Andric Hexagon::PredRegsRegClass.contains(Dep.getReg())) { 998*0b57cec5SDimitry Andric // Here I know that I is predicate setting instruction with true 999*0b57cec5SDimitry Andric // data dep to candidate on the register we care about - c) in the 1000*0b57cec5SDimitry Andric // above example. Now I need to see if there is an anti dependency 1001*0b57cec5SDimitry Andric // from c) to any other instruction in the same packet on the pred 1002*0b57cec5SDimitry Andric // reg of interest. 1003*0b57cec5SDimitry Andric if (restrictingDepExistInPacket(*I, Dep.getReg())) 1004*0b57cec5SDimitry Andric return false; 1005*0b57cec5SDimitry Andric } 1006*0b57cec5SDimitry Andric } 1007*0b57cec5SDimitry Andric } 1008*0b57cec5SDimitry Andric } 1009*0b57cec5SDimitry Andric 1010*0b57cec5SDimitry Andric // If the above case does not apply, check regular complement condition. 1011*0b57cec5SDimitry Andric // Check that the predicate register is the same and that the predicate 1012*0b57cec5SDimitry Andric // sense is different We also need to differentiate .old vs. .new: !p0 1013*0b57cec5SDimitry Andric // is not complementary to p0.new. 1014*0b57cec5SDimitry Andric unsigned PReg1 = getPredicatedRegister(MI1, HII); 1015*0b57cec5SDimitry Andric unsigned PReg2 = getPredicatedRegister(MI2, HII); 1016*0b57cec5SDimitry Andric return PReg1 == PReg2 && 1017*0b57cec5SDimitry Andric Hexagon::PredRegsRegClass.contains(PReg1) && 1018*0b57cec5SDimitry Andric Hexagon::PredRegsRegClass.contains(PReg2) && 1019*0b57cec5SDimitry Andric getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && 1020*0b57cec5SDimitry Andric HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); 1021*0b57cec5SDimitry Andric } 1022*0b57cec5SDimitry Andric 1023*0b57cec5SDimitry Andric // Initialize packetizer flags. 1024*0b57cec5SDimitry Andric void HexagonPacketizerList::initPacketizerState() { 1025*0b57cec5SDimitry Andric Dependence = false; 1026*0b57cec5SDimitry Andric PromotedToDotNew = false; 1027*0b57cec5SDimitry Andric GlueToNewValueJump = false; 1028*0b57cec5SDimitry Andric GlueAllocframeStore = false; 1029*0b57cec5SDimitry Andric FoundSequentialDependence = false; 1030*0b57cec5SDimitry Andric ChangedOffset = INT64_MAX; 1031*0b57cec5SDimitry Andric } 1032*0b57cec5SDimitry Andric 1033*0b57cec5SDimitry Andric // Ignore bundling of pseudo instructions. 1034*0b57cec5SDimitry Andric bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI, 1035*0b57cec5SDimitry Andric const MachineBasicBlock *) { 1036*0b57cec5SDimitry Andric if (MI.isDebugInstr()) 1037*0b57cec5SDimitry Andric return true; 1038*0b57cec5SDimitry Andric 1039*0b57cec5SDimitry Andric if (MI.isCFIInstruction()) 1040*0b57cec5SDimitry Andric return false; 1041*0b57cec5SDimitry Andric 1042*0b57cec5SDimitry Andric // We must print out inline assembly. 1043*0b57cec5SDimitry Andric if (MI.isInlineAsm()) 1044*0b57cec5SDimitry Andric return false; 1045*0b57cec5SDimitry Andric 1046*0b57cec5SDimitry Andric if (MI.isImplicitDef()) 1047*0b57cec5SDimitry Andric return false; 1048*0b57cec5SDimitry Andric 1049*0b57cec5SDimitry Andric // We check if MI has any functional units mapped to it. If it doesn't, 1050*0b57cec5SDimitry Andric // we ignore the instruction. 1051*0b57cec5SDimitry Andric const MCInstrDesc& TID = MI.getDesc(); 1052*0b57cec5SDimitry Andric auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); 1053*0b57cec5SDimitry Andric unsigned FuncUnits = IS->getUnits(); 1054*0b57cec5SDimitry Andric return !FuncUnits; 1055*0b57cec5SDimitry Andric } 1056*0b57cec5SDimitry Andric 1057*0b57cec5SDimitry Andric bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { 1058*0b57cec5SDimitry Andric // Ensure any bundles created by gather packetize remain seperate. 1059*0b57cec5SDimitry Andric if (MI.isBundle()) 1060*0b57cec5SDimitry Andric return true; 1061*0b57cec5SDimitry Andric 1062*0b57cec5SDimitry Andric if (MI.isEHLabel() || MI.isCFIInstruction()) 1063*0b57cec5SDimitry Andric return true; 1064*0b57cec5SDimitry Andric 1065*0b57cec5SDimitry Andric // Consider inline asm to not be a solo instruction by default. 1066*0b57cec5SDimitry Andric // Inline asm will be put in a packet temporarily, but then it will be 1067*0b57cec5SDimitry Andric // removed, and placed outside of the packet (before or after, depending 1068*0b57cec5SDimitry Andric // on dependencies). This is to reduce the impact of inline asm as a 1069*0b57cec5SDimitry Andric // "packet splitting" instruction. 1070*0b57cec5SDimitry Andric if (MI.isInlineAsm() && !ScheduleInlineAsm) 1071*0b57cec5SDimitry Andric return true; 1072*0b57cec5SDimitry Andric 1073*0b57cec5SDimitry Andric if (isSchedBarrier(MI)) 1074*0b57cec5SDimitry Andric return true; 1075*0b57cec5SDimitry Andric 1076*0b57cec5SDimitry Andric if (HII->isSolo(MI)) 1077*0b57cec5SDimitry Andric return true; 1078*0b57cec5SDimitry Andric 1079*0b57cec5SDimitry Andric if (MI.getOpcode() == Hexagon::A2_nop) 1080*0b57cec5SDimitry Andric return true; 1081*0b57cec5SDimitry Andric 1082*0b57cec5SDimitry Andric return false; 1083*0b57cec5SDimitry Andric } 1084*0b57cec5SDimitry Andric 1085*0b57cec5SDimitry Andric // Quick check if instructions MI and MJ cannot coexist in the same packet. 1086*0b57cec5SDimitry Andric // Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", 1087*0b57cec5SDimitry Andric // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". 1088*0b57cec5SDimitry Andric // For full test call this function twice: 1089*0b57cec5SDimitry Andric // cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) 1090*0b57cec5SDimitry Andric // Doing the test only one way saves the amount of code in this function, 1091*0b57cec5SDimitry Andric // since every test would need to be repeated with the MI and MJ reversed. 1092*0b57cec5SDimitry Andric static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, 1093*0b57cec5SDimitry Andric const HexagonInstrInfo &HII) { 1094*0b57cec5SDimitry Andric const MachineFunction *MF = MI.getParent()->getParent(); 1095*0b57cec5SDimitry Andric if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() && 1096*0b57cec5SDimitry Andric HII.isHVXMemWithAIndirect(MI, MJ)) 1097*0b57cec5SDimitry Andric return true; 1098*0b57cec5SDimitry Andric 1099*0b57cec5SDimitry Andric // An inline asm cannot be together with a branch, because we may not be 1100*0b57cec5SDimitry Andric // able to remove the asm out after packetizing (i.e. if the asm must be 1101*0b57cec5SDimitry Andric // moved past the bundle). Similarly, two asms cannot be together to avoid 1102*0b57cec5SDimitry Andric // complications when determining their relative order outside of a bundle. 1103*0b57cec5SDimitry Andric if (MI.isInlineAsm()) 1104*0b57cec5SDimitry Andric return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() || 1105*0b57cec5SDimitry Andric MJ.isCall() || MJ.isTerminator(); 1106*0b57cec5SDimitry Andric 1107*0b57cec5SDimitry Andric // New-value stores cannot coexist with any other stores. 1108*0b57cec5SDimitry Andric if (HII.isNewValueStore(MI) && MJ.mayStore()) 1109*0b57cec5SDimitry Andric return true; 1110*0b57cec5SDimitry Andric 1111*0b57cec5SDimitry Andric switch (MI.getOpcode()) { 1112*0b57cec5SDimitry Andric case Hexagon::S2_storew_locked: 1113*0b57cec5SDimitry Andric case Hexagon::S4_stored_locked: 1114*0b57cec5SDimitry Andric case Hexagon::L2_loadw_locked: 1115*0b57cec5SDimitry Andric case Hexagon::L4_loadd_locked: 1116*0b57cec5SDimitry Andric case Hexagon::Y2_dccleana: 1117*0b57cec5SDimitry Andric case Hexagon::Y2_dccleaninva: 1118*0b57cec5SDimitry Andric case Hexagon::Y2_dcinva: 1119*0b57cec5SDimitry Andric case Hexagon::Y2_dczeroa: 1120*0b57cec5SDimitry Andric case Hexagon::Y4_l2fetch: 1121*0b57cec5SDimitry Andric case Hexagon::Y5_l2fetch: { 1122*0b57cec5SDimitry Andric // These instructions can only be grouped with ALU32 or non-floating-point 1123*0b57cec5SDimitry Andric // XTYPE instructions. Since there is no convenient way of identifying fp 1124*0b57cec5SDimitry Andric // XTYPE instructions, only allow grouping with ALU32 for now. 1125*0b57cec5SDimitry Andric unsigned TJ = HII.getType(MJ); 1126*0b57cec5SDimitry Andric if (TJ != HexagonII::TypeALU32_2op && 1127*0b57cec5SDimitry Andric TJ != HexagonII::TypeALU32_3op && 1128*0b57cec5SDimitry Andric TJ != HexagonII::TypeALU32_ADDI) 1129*0b57cec5SDimitry Andric return true; 1130*0b57cec5SDimitry Andric break; 1131*0b57cec5SDimitry Andric } 1132*0b57cec5SDimitry Andric default: 1133*0b57cec5SDimitry Andric break; 1134*0b57cec5SDimitry Andric } 1135*0b57cec5SDimitry Andric 1136*0b57cec5SDimitry Andric // "False" really means that the quick check failed to determine if 1137*0b57cec5SDimitry Andric // I and J cannot coexist. 1138*0b57cec5SDimitry Andric return false; 1139*0b57cec5SDimitry Andric } 1140*0b57cec5SDimitry Andric 1141*0b57cec5SDimitry Andric // Full, symmetric check. 1142*0b57cec5SDimitry Andric bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI, 1143*0b57cec5SDimitry Andric const MachineInstr &MJ) { 1144*0b57cec5SDimitry Andric return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); 1145*0b57cec5SDimitry Andric } 1146*0b57cec5SDimitry Andric 1147*0b57cec5SDimitry Andric void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { 1148*0b57cec5SDimitry Andric for (auto &B : MF) { 1149*0b57cec5SDimitry Andric MachineBasicBlock::iterator BundleIt; 1150*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator NextI; 1151*0b57cec5SDimitry Andric for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { 1152*0b57cec5SDimitry Andric NextI = std::next(I); 1153*0b57cec5SDimitry Andric MachineInstr &MI = *I; 1154*0b57cec5SDimitry Andric if (MI.isBundle()) 1155*0b57cec5SDimitry Andric BundleIt = I; 1156*0b57cec5SDimitry Andric if (!MI.isInsideBundle()) 1157*0b57cec5SDimitry Andric continue; 1158*0b57cec5SDimitry Andric 1159*0b57cec5SDimitry Andric // Decide on where to insert the instruction that we are pulling out. 1160*0b57cec5SDimitry Andric // Debug instructions always go before the bundle, but the placement of 1161*0b57cec5SDimitry Andric // INLINE_ASM depends on potential dependencies. By default, try to 1162*0b57cec5SDimitry Andric // put it before the bundle, but if the asm writes to a register that 1163*0b57cec5SDimitry Andric // other instructions in the bundle read, then we need to place it 1164*0b57cec5SDimitry Andric // after the bundle (to preserve the bundle semantics). 1165*0b57cec5SDimitry Andric bool InsertBeforeBundle; 1166*0b57cec5SDimitry Andric if (MI.isInlineAsm()) 1167*0b57cec5SDimitry Andric InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI); 1168*0b57cec5SDimitry Andric else if (MI.isDebugValue()) 1169*0b57cec5SDimitry Andric InsertBeforeBundle = true; 1170*0b57cec5SDimitry Andric else 1171*0b57cec5SDimitry Andric continue; 1172*0b57cec5SDimitry Andric 1173*0b57cec5SDimitry Andric BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); 1174*0b57cec5SDimitry Andric } 1175*0b57cec5SDimitry Andric } 1176*0b57cec5SDimitry Andric } 1177*0b57cec5SDimitry Andric 1178*0b57cec5SDimitry Andric // Check if a given instruction is of class "system". 1179*0b57cec5SDimitry Andric static bool isSystemInstr(const MachineInstr &MI) { 1180*0b57cec5SDimitry Andric unsigned Opc = MI.getOpcode(); 1181*0b57cec5SDimitry Andric switch (Opc) { 1182*0b57cec5SDimitry Andric case Hexagon::Y2_barrier: 1183*0b57cec5SDimitry Andric case Hexagon::Y2_dcfetchbo: 1184*0b57cec5SDimitry Andric case Hexagon::Y4_l2fetch: 1185*0b57cec5SDimitry Andric case Hexagon::Y5_l2fetch: 1186*0b57cec5SDimitry Andric return true; 1187*0b57cec5SDimitry Andric } 1188*0b57cec5SDimitry Andric return false; 1189*0b57cec5SDimitry Andric } 1190*0b57cec5SDimitry Andric 1191*0b57cec5SDimitry Andric bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I, 1192*0b57cec5SDimitry Andric const MachineInstr &J) { 1193*0b57cec5SDimitry Andric // The dependence graph may not include edges between dead definitions, 1194*0b57cec5SDimitry Andric // so without extra checks, we could end up packetizing two instruction 1195*0b57cec5SDimitry Andric // defining the same (dead) register. 1196*0b57cec5SDimitry Andric if (I.isCall() || J.isCall()) 1197*0b57cec5SDimitry Andric return false; 1198*0b57cec5SDimitry Andric if (HII->isPredicated(I) || HII->isPredicated(J)) 1199*0b57cec5SDimitry Andric return false; 1200*0b57cec5SDimitry Andric 1201*0b57cec5SDimitry Andric BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); 1202*0b57cec5SDimitry Andric for (auto &MO : I.operands()) { 1203*0b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1204*0b57cec5SDimitry Andric continue; 1205*0b57cec5SDimitry Andric DeadDefs[MO.getReg()] = true; 1206*0b57cec5SDimitry Andric } 1207*0b57cec5SDimitry Andric 1208*0b57cec5SDimitry Andric for (auto &MO : J.operands()) { 1209*0b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1210*0b57cec5SDimitry Andric continue; 1211*0b57cec5SDimitry Andric unsigned R = MO.getReg(); 1212*0b57cec5SDimitry Andric if (R != Hexagon::USR_OVF && DeadDefs[R]) 1213*0b57cec5SDimitry Andric return true; 1214*0b57cec5SDimitry Andric } 1215*0b57cec5SDimitry Andric return false; 1216*0b57cec5SDimitry Andric } 1217*0b57cec5SDimitry Andric 1218*0b57cec5SDimitry Andric bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I, 1219*0b57cec5SDimitry Andric const MachineInstr &J) { 1220*0b57cec5SDimitry Andric // A save callee-save register function call can only be in a packet 1221*0b57cec5SDimitry Andric // with instructions that don't write to the callee-save registers. 1222*0b57cec5SDimitry Andric if ((HII->isSaveCalleeSavedRegsCall(I) && 1223*0b57cec5SDimitry Andric doesModifyCalleeSavedReg(J, HRI)) || 1224*0b57cec5SDimitry Andric (HII->isSaveCalleeSavedRegsCall(J) && 1225*0b57cec5SDimitry Andric doesModifyCalleeSavedReg(I, HRI))) 1226*0b57cec5SDimitry Andric return true; 1227*0b57cec5SDimitry Andric 1228*0b57cec5SDimitry Andric // Two control flow instructions cannot go in the same packet. 1229*0b57cec5SDimitry Andric if (isControlFlow(I) && isControlFlow(J)) 1230*0b57cec5SDimitry Andric return true; 1231*0b57cec5SDimitry Andric 1232*0b57cec5SDimitry Andric // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot 1233*0b57cec5SDimitry Andric // contain a speculative indirect jump, 1234*0b57cec5SDimitry Andric // a new-value compare jump or a dealloc_return. 1235*0b57cec5SDimitry Andric auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool { 1236*0b57cec5SDimitry Andric if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) 1237*0b57cec5SDimitry Andric return true; 1238*0b57cec5SDimitry Andric if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) 1239*0b57cec5SDimitry Andric return true; 1240*0b57cec5SDimitry Andric return false; 1241*0b57cec5SDimitry Andric }; 1242*0b57cec5SDimitry Andric 1243*0b57cec5SDimitry Andric if (HII->isLoopN(I) && isBadForLoopN(J)) 1244*0b57cec5SDimitry Andric return true; 1245*0b57cec5SDimitry Andric if (HII->isLoopN(J) && isBadForLoopN(I)) 1246*0b57cec5SDimitry Andric return true; 1247*0b57cec5SDimitry Andric 1248*0b57cec5SDimitry Andric // dealloc_return cannot appear in the same packet as a conditional or 1249*0b57cec5SDimitry Andric // unconditional jump. 1250*0b57cec5SDimitry Andric return HII->isDeallocRet(I) && 1251*0b57cec5SDimitry Andric (J.isBranch() || J.isCall() || J.isBarrier()); 1252*0b57cec5SDimitry Andric } 1253*0b57cec5SDimitry Andric 1254*0b57cec5SDimitry Andric bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I, 1255*0b57cec5SDimitry Andric const MachineInstr &J) { 1256*0b57cec5SDimitry Andric // Adding I to a packet that has J. 1257*0b57cec5SDimitry Andric 1258*0b57cec5SDimitry Andric // Regmasks are not reflected in the scheduling dependency graph, so 1259*0b57cec5SDimitry Andric // we need to check them manually. This code assumes that regmasks only 1260*0b57cec5SDimitry Andric // occur on calls, and the problematic case is when we add an instruction 1261*0b57cec5SDimitry Andric // defining a register R to a packet that has a call that clobbers R via 1262*0b57cec5SDimitry Andric // a regmask. Those cannot be packetized together, because the call will 1263*0b57cec5SDimitry Andric // be executed last. That's also a reson why it is ok to add a call 1264*0b57cec5SDimitry Andric // clobbering R to a packet that defines R. 1265*0b57cec5SDimitry Andric 1266*0b57cec5SDimitry Andric // Look for regmasks in J. 1267*0b57cec5SDimitry Andric for (const MachineOperand &OpJ : J.operands()) { 1268*0b57cec5SDimitry Andric if (!OpJ.isRegMask()) 1269*0b57cec5SDimitry Andric continue; 1270*0b57cec5SDimitry Andric assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call"); 1271*0b57cec5SDimitry Andric for (const MachineOperand &OpI : I.operands()) { 1272*0b57cec5SDimitry Andric if (OpI.isReg()) { 1273*0b57cec5SDimitry Andric if (OpJ.clobbersPhysReg(OpI.getReg())) 1274*0b57cec5SDimitry Andric return true; 1275*0b57cec5SDimitry Andric } else if (OpI.isRegMask()) { 1276*0b57cec5SDimitry Andric // Both are regmasks. Assume that they intersect. 1277*0b57cec5SDimitry Andric return true; 1278*0b57cec5SDimitry Andric } 1279*0b57cec5SDimitry Andric } 1280*0b57cec5SDimitry Andric } 1281*0b57cec5SDimitry Andric return false; 1282*0b57cec5SDimitry Andric } 1283*0b57cec5SDimitry Andric 1284*0b57cec5SDimitry Andric bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I, 1285*0b57cec5SDimitry Andric const MachineInstr &J) { 1286*0b57cec5SDimitry Andric bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); 1287*0b57cec5SDimitry Andric bool StoreI = I.mayStore(), StoreJ = J.mayStore(); 1288*0b57cec5SDimitry Andric if ((SysI && StoreJ) || (SysJ && StoreI)) 1289*0b57cec5SDimitry Andric return true; 1290*0b57cec5SDimitry Andric 1291*0b57cec5SDimitry Andric if (StoreI && StoreJ) { 1292*0b57cec5SDimitry Andric if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) 1293*0b57cec5SDimitry Andric return true; 1294*0b57cec5SDimitry Andric } else { 1295*0b57cec5SDimitry Andric // A memop cannot be in the same packet with another memop or a store. 1296*0b57cec5SDimitry Andric // Two stores can be together, but here I and J cannot both be stores. 1297*0b57cec5SDimitry Andric bool MopStI = HII->isMemOp(I) || StoreI; 1298*0b57cec5SDimitry Andric bool MopStJ = HII->isMemOp(J) || StoreJ; 1299*0b57cec5SDimitry Andric if (MopStI && MopStJ) 1300*0b57cec5SDimitry Andric return true; 1301*0b57cec5SDimitry Andric } 1302*0b57cec5SDimitry Andric 1303*0b57cec5SDimitry Andric return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); 1304*0b57cec5SDimitry Andric } 1305*0b57cec5SDimitry Andric 1306*0b57cec5SDimitry Andric // SUI is the current instruction that is out side of the current packet. 1307*0b57cec5SDimitry Andric // SUJ is the current instruction inside the current packet against which that 1308*0b57cec5SDimitry Andric // SUI will be packetized. 1309*0b57cec5SDimitry Andric bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { 1310*0b57cec5SDimitry Andric assert(SUI->getInstr() && SUJ->getInstr()); 1311*0b57cec5SDimitry Andric MachineInstr &I = *SUI->getInstr(); 1312*0b57cec5SDimitry Andric MachineInstr &J = *SUJ->getInstr(); 1313*0b57cec5SDimitry Andric 1314*0b57cec5SDimitry Andric // Clear IgnoreDepMIs when Packet starts. 1315*0b57cec5SDimitry Andric if (CurrentPacketMIs.size() == 1) 1316*0b57cec5SDimitry Andric IgnoreDepMIs.clear(); 1317*0b57cec5SDimitry Andric 1318*0b57cec5SDimitry Andric MachineBasicBlock::iterator II = I.getIterator(); 1319*0b57cec5SDimitry Andric 1320*0b57cec5SDimitry Andric // Solo instructions cannot go in the packet. 1321*0b57cec5SDimitry Andric assert(!isSoloInstruction(I) && "Unexpected solo instr!"); 1322*0b57cec5SDimitry Andric 1323*0b57cec5SDimitry Andric if (cannotCoexist(I, J)) 1324*0b57cec5SDimitry Andric return false; 1325*0b57cec5SDimitry Andric 1326*0b57cec5SDimitry Andric Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); 1327*0b57cec5SDimitry Andric if (Dependence) 1328*0b57cec5SDimitry Andric return false; 1329*0b57cec5SDimitry Andric 1330*0b57cec5SDimitry Andric // Regmasks are not accounted for in the scheduling graph, so we need 1331*0b57cec5SDimitry Andric // to explicitly check for dependencies caused by them. They should only 1332*0b57cec5SDimitry Andric // appear on calls, so it's not too pessimistic to reject all regmask 1333*0b57cec5SDimitry Andric // dependencies. 1334*0b57cec5SDimitry Andric Dependence = hasRegMaskDependence(I, J); 1335*0b57cec5SDimitry Andric if (Dependence) 1336*0b57cec5SDimitry Andric return false; 1337*0b57cec5SDimitry Andric 1338*0b57cec5SDimitry Andric // Dual-store does not allow second store, if the first store is not 1339*0b57cec5SDimitry Andric // in SLOT0. New value store, new value jump, dealloc_return and memop 1340*0b57cec5SDimitry Andric // always take SLOT0. Arch spec 3.4.4.2. 1341*0b57cec5SDimitry Andric Dependence = hasDualStoreDependence(I, J); 1342*0b57cec5SDimitry Andric if (Dependence) 1343*0b57cec5SDimitry Andric return false; 1344*0b57cec5SDimitry Andric 1345*0b57cec5SDimitry Andric // If an instruction feeds new value jump, glue it. 1346*0b57cec5SDimitry Andric MachineBasicBlock::iterator NextMII = I.getIterator(); 1347*0b57cec5SDimitry Andric ++NextMII; 1348*0b57cec5SDimitry Andric if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) { 1349*0b57cec5SDimitry Andric MachineInstr &NextMI = *NextMII; 1350*0b57cec5SDimitry Andric 1351*0b57cec5SDimitry Andric bool secondRegMatch = false; 1352*0b57cec5SDimitry Andric const MachineOperand &NOp0 = NextMI.getOperand(0); 1353*0b57cec5SDimitry Andric const MachineOperand &NOp1 = NextMI.getOperand(1); 1354*0b57cec5SDimitry Andric 1355*0b57cec5SDimitry Andric if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg()) 1356*0b57cec5SDimitry Andric secondRegMatch = true; 1357*0b57cec5SDimitry Andric 1358*0b57cec5SDimitry Andric for (MachineInstr *PI : CurrentPacketMIs) { 1359*0b57cec5SDimitry Andric // NVJ can not be part of the dual jump - Arch Spec: section 7.8. 1360*0b57cec5SDimitry Andric if (PI->isCall()) { 1361*0b57cec5SDimitry Andric Dependence = true; 1362*0b57cec5SDimitry Andric break; 1363*0b57cec5SDimitry Andric } 1364*0b57cec5SDimitry Andric // Validate: 1365*0b57cec5SDimitry Andric // 1. Packet does not have a store in it. 1366*0b57cec5SDimitry Andric // 2. If the first operand of the nvj is newified, and the second 1367*0b57cec5SDimitry Andric // operand is also a reg, it (second reg) is not defined in 1368*0b57cec5SDimitry Andric // the same packet. 1369*0b57cec5SDimitry Andric // 3. If the second operand of the nvj is newified, (which means 1370*0b57cec5SDimitry Andric // first operand is also a reg), first reg is not defined in 1371*0b57cec5SDimitry Andric // the same packet. 1372*0b57cec5SDimitry Andric if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || 1373*0b57cec5SDimitry Andric HII->isLoopN(*PI)) { 1374*0b57cec5SDimitry Andric Dependence = true; 1375*0b57cec5SDimitry Andric break; 1376*0b57cec5SDimitry Andric } 1377*0b57cec5SDimitry Andric // Check #2/#3. 1378*0b57cec5SDimitry Andric const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; 1379*0b57cec5SDimitry Andric if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { 1380*0b57cec5SDimitry Andric Dependence = true; 1381*0b57cec5SDimitry Andric break; 1382*0b57cec5SDimitry Andric } 1383*0b57cec5SDimitry Andric } 1384*0b57cec5SDimitry Andric 1385*0b57cec5SDimitry Andric GlueToNewValueJump = true; 1386*0b57cec5SDimitry Andric if (Dependence) 1387*0b57cec5SDimitry Andric return false; 1388*0b57cec5SDimitry Andric } 1389*0b57cec5SDimitry Andric 1390*0b57cec5SDimitry Andric // There no dependency between a prolog instruction and its successor. 1391*0b57cec5SDimitry Andric if (!SUJ->isSucc(SUI)) 1392*0b57cec5SDimitry Andric return true; 1393*0b57cec5SDimitry Andric 1394*0b57cec5SDimitry Andric for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { 1395*0b57cec5SDimitry Andric if (FoundSequentialDependence) 1396*0b57cec5SDimitry Andric break; 1397*0b57cec5SDimitry Andric 1398*0b57cec5SDimitry Andric if (SUJ->Succs[i].getSUnit() != SUI) 1399*0b57cec5SDimitry Andric continue; 1400*0b57cec5SDimitry Andric 1401*0b57cec5SDimitry Andric SDep::Kind DepType = SUJ->Succs[i].getKind(); 1402*0b57cec5SDimitry Andric // For direct calls: 1403*0b57cec5SDimitry Andric // Ignore register dependences for call instructions for packetization 1404*0b57cec5SDimitry Andric // purposes except for those due to r31 and predicate registers. 1405*0b57cec5SDimitry Andric // 1406*0b57cec5SDimitry Andric // For indirect calls: 1407*0b57cec5SDimitry Andric // Same as direct calls + check for true dependences to the register 1408*0b57cec5SDimitry Andric // used in the indirect call. 1409*0b57cec5SDimitry Andric // 1410*0b57cec5SDimitry Andric // We completely ignore Order dependences for call instructions. 1411*0b57cec5SDimitry Andric // 1412*0b57cec5SDimitry Andric // For returns: 1413*0b57cec5SDimitry Andric // Ignore register dependences for return instructions like jumpr, 1414*0b57cec5SDimitry Andric // dealloc return unless we have dependencies on the explicit uses 1415*0b57cec5SDimitry Andric // of the registers used by jumpr (like r31) or dealloc return 1416*0b57cec5SDimitry Andric // (like r29 or r30). 1417*0b57cec5SDimitry Andric unsigned DepReg = 0; 1418*0b57cec5SDimitry Andric const TargetRegisterClass *RC = nullptr; 1419*0b57cec5SDimitry Andric if (DepType == SDep::Data) { 1420*0b57cec5SDimitry Andric DepReg = SUJ->Succs[i].getReg(); 1421*0b57cec5SDimitry Andric RC = HRI->getMinimalPhysRegClass(DepReg); 1422*0b57cec5SDimitry Andric } 1423*0b57cec5SDimitry Andric 1424*0b57cec5SDimitry Andric if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) { 1425*0b57cec5SDimitry Andric if (!isRegDependence(DepType)) 1426*0b57cec5SDimitry Andric continue; 1427*0b57cec5SDimitry Andric if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) 1428*0b57cec5SDimitry Andric continue; 1429*0b57cec5SDimitry Andric } 1430*0b57cec5SDimitry Andric 1431*0b57cec5SDimitry Andric if (DepType == SDep::Data) { 1432*0b57cec5SDimitry Andric if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) 1433*0b57cec5SDimitry Andric if (promoteToDotCur(J, DepType, II, RC)) 1434*0b57cec5SDimitry Andric continue; 1435*0b57cec5SDimitry Andric } 1436*0b57cec5SDimitry Andric 1437*0b57cec5SDimitry Andric // Data dpendence ok if we have load.cur. 1438*0b57cec5SDimitry Andric if (DepType == SDep::Data && HII->isDotCurInst(J)) { 1439*0b57cec5SDimitry Andric if (HII->isHVXVec(I)) 1440*0b57cec5SDimitry Andric continue; 1441*0b57cec5SDimitry Andric } 1442*0b57cec5SDimitry Andric 1443*0b57cec5SDimitry Andric // For instructions that can be promoted to dot-new, try to promote. 1444*0b57cec5SDimitry Andric if (DepType == SDep::Data) { 1445*0b57cec5SDimitry Andric if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { 1446*0b57cec5SDimitry Andric if (promoteToDotNew(I, DepType, II, RC)) { 1447*0b57cec5SDimitry Andric PromotedToDotNew = true; 1448*0b57cec5SDimitry Andric if (cannotCoexist(I, J)) 1449*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1450*0b57cec5SDimitry Andric continue; 1451*0b57cec5SDimitry Andric } 1452*0b57cec5SDimitry Andric } 1453*0b57cec5SDimitry Andric if (HII->isNewValueJump(I)) 1454*0b57cec5SDimitry Andric continue; 1455*0b57cec5SDimitry Andric } 1456*0b57cec5SDimitry Andric 1457*0b57cec5SDimitry Andric // For predicated instructions, if the predicates are complements then 1458*0b57cec5SDimitry Andric // there can be no dependence. 1459*0b57cec5SDimitry Andric if (HII->isPredicated(I) && HII->isPredicated(J) && 1460*0b57cec5SDimitry Andric arePredicatesComplements(I, J)) { 1461*0b57cec5SDimitry Andric // Not always safe to do this translation. 1462*0b57cec5SDimitry Andric // DAG Builder attempts to reduce dependence edges using transitive 1463*0b57cec5SDimitry Andric // nature of dependencies. Here is an example: 1464*0b57cec5SDimitry Andric // 1465*0b57cec5SDimitry Andric // r0 = tfr_pt ... (1) 1466*0b57cec5SDimitry Andric // r0 = tfr_pf ... (2) 1467*0b57cec5SDimitry Andric // r0 = tfr_pt ... (3) 1468*0b57cec5SDimitry Andric // 1469*0b57cec5SDimitry Andric // There will be an output dependence between (1)->(2) and (2)->(3). 1470*0b57cec5SDimitry Andric // However, there is no dependence edge between (1)->(3). This results 1471*0b57cec5SDimitry Andric // in all 3 instructions going in the same packet. We ignore dependce 1472*0b57cec5SDimitry Andric // only once to avoid this situation. 1473*0b57cec5SDimitry Andric auto Itr = find(IgnoreDepMIs, &J); 1474*0b57cec5SDimitry Andric if (Itr != IgnoreDepMIs.end()) { 1475*0b57cec5SDimitry Andric Dependence = true; 1476*0b57cec5SDimitry Andric return false; 1477*0b57cec5SDimitry Andric } 1478*0b57cec5SDimitry Andric IgnoreDepMIs.push_back(&I); 1479*0b57cec5SDimitry Andric continue; 1480*0b57cec5SDimitry Andric } 1481*0b57cec5SDimitry Andric 1482*0b57cec5SDimitry Andric // Ignore Order dependences between unconditional direct branches 1483*0b57cec5SDimitry Andric // and non-control-flow instructions. 1484*0b57cec5SDimitry Andric if (isDirectJump(I) && !J.isBranch() && !J.isCall() && 1485*0b57cec5SDimitry Andric DepType == SDep::Order) 1486*0b57cec5SDimitry Andric continue; 1487*0b57cec5SDimitry Andric 1488*0b57cec5SDimitry Andric // Ignore all dependences for jumps except for true and output 1489*0b57cec5SDimitry Andric // dependences. 1490*0b57cec5SDimitry Andric if (I.isConditionalBranch() && DepType != SDep::Data && 1491*0b57cec5SDimitry Andric DepType != SDep::Output) 1492*0b57cec5SDimitry Andric continue; 1493*0b57cec5SDimitry Andric 1494*0b57cec5SDimitry Andric if (DepType == SDep::Output) { 1495*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1496*0b57cec5SDimitry Andric break; 1497*0b57cec5SDimitry Andric } 1498*0b57cec5SDimitry Andric 1499*0b57cec5SDimitry Andric // For Order dependences: 1500*0b57cec5SDimitry Andric // 1. Volatile loads/stores can be packetized together, unless other 1501*0b57cec5SDimitry Andric // rules prevent is. 1502*0b57cec5SDimitry Andric // 2. Store followed by a load is not allowed. 1503*0b57cec5SDimitry Andric // 3. Store followed by a store is valid. 1504*0b57cec5SDimitry Andric // 4. Load followed by any memory operation is allowed. 1505*0b57cec5SDimitry Andric if (DepType == SDep::Order) { 1506*0b57cec5SDimitry Andric if (!PacketizeVolatiles) { 1507*0b57cec5SDimitry Andric bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef(); 1508*0b57cec5SDimitry Andric if (OrdRefs) { 1509*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1510*0b57cec5SDimitry Andric break; 1511*0b57cec5SDimitry Andric } 1512*0b57cec5SDimitry Andric } 1513*0b57cec5SDimitry Andric // J is first, I is second. 1514*0b57cec5SDimitry Andric bool LoadJ = J.mayLoad(), StoreJ = J.mayStore(); 1515*0b57cec5SDimitry Andric bool LoadI = I.mayLoad(), StoreI = I.mayStore(); 1516*0b57cec5SDimitry Andric bool NVStoreJ = HII->isNewValueStore(J); 1517*0b57cec5SDimitry Andric bool NVStoreI = HII->isNewValueStore(I); 1518*0b57cec5SDimitry Andric bool IsVecJ = HII->isHVXVec(J); 1519*0b57cec5SDimitry Andric bool IsVecI = HII->isHVXVec(I); 1520*0b57cec5SDimitry Andric 1521*0b57cec5SDimitry Andric if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() && 1522*0b57cec5SDimitry Andric ((LoadJ && StoreI && !NVStoreI) || 1523*0b57cec5SDimitry Andric (StoreJ && LoadI && !NVStoreJ)) && 1524*0b57cec5SDimitry Andric (J.getOpcode() != Hexagon::S2_allocframe && 1525*0b57cec5SDimitry Andric I.getOpcode() != Hexagon::S2_allocframe) && 1526*0b57cec5SDimitry Andric (J.getOpcode() != Hexagon::L2_deallocframe && 1527*0b57cec5SDimitry Andric I.getOpcode() != Hexagon::L2_deallocframe) && 1528*0b57cec5SDimitry Andric (!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI)) 1529*0b57cec5SDimitry Andric setmemShufDisabled(true); 1530*0b57cec5SDimitry Andric else 1531*0b57cec5SDimitry Andric if (StoreJ && LoadI && alias(J, I)) { 1532*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1533*0b57cec5SDimitry Andric break; 1534*0b57cec5SDimitry Andric } 1535*0b57cec5SDimitry Andric 1536*0b57cec5SDimitry Andric if (!StoreJ) 1537*0b57cec5SDimitry Andric if (!LoadJ || (!LoadI && !StoreI)) { 1538*0b57cec5SDimitry Andric // If J is neither load nor store, assume a dependency. 1539*0b57cec5SDimitry Andric // If J is a load, but I is neither, also assume a dependency. 1540*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1541*0b57cec5SDimitry Andric break; 1542*0b57cec5SDimitry Andric } 1543*0b57cec5SDimitry Andric // Store followed by store: not OK on V2. 1544*0b57cec5SDimitry Andric // Store followed by load: not OK on all. 1545*0b57cec5SDimitry Andric // Load followed by store: OK on all. 1546*0b57cec5SDimitry Andric // Load followed by load: OK on all. 1547*0b57cec5SDimitry Andric continue; 1548*0b57cec5SDimitry Andric } 1549*0b57cec5SDimitry Andric 1550*0b57cec5SDimitry Andric // Special case for ALLOCFRAME: even though there is dependency 1551*0b57cec5SDimitry Andric // between ALLOCFRAME and subsequent store, allow it to be packetized 1552*0b57cec5SDimitry Andric // in a same packet. This implies that the store is using the caller's 1553*0b57cec5SDimitry Andric // SP. Hence, offset needs to be updated accordingly. 1554*0b57cec5SDimitry Andric if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) { 1555*0b57cec5SDimitry Andric unsigned Opc = I.getOpcode(); 1556*0b57cec5SDimitry Andric switch (Opc) { 1557*0b57cec5SDimitry Andric case Hexagon::S2_storerd_io: 1558*0b57cec5SDimitry Andric case Hexagon::S2_storeri_io: 1559*0b57cec5SDimitry Andric case Hexagon::S2_storerh_io: 1560*0b57cec5SDimitry Andric case Hexagon::S2_storerb_io: 1561*0b57cec5SDimitry Andric if (I.getOperand(0).getReg() == HRI->getStackRegister()) { 1562*0b57cec5SDimitry Andric // Since this store is to be glued with allocframe in the same 1563*0b57cec5SDimitry Andric // packet, it will use SP of the previous stack frame, i.e. 1564*0b57cec5SDimitry Andric // caller's SP. Therefore, we need to recalculate offset 1565*0b57cec5SDimitry Andric // according to this change. 1566*0b57cec5SDimitry Andric GlueAllocframeStore = useCallersSP(I); 1567*0b57cec5SDimitry Andric if (GlueAllocframeStore) 1568*0b57cec5SDimitry Andric continue; 1569*0b57cec5SDimitry Andric } 1570*0b57cec5SDimitry Andric break; 1571*0b57cec5SDimitry Andric default: 1572*0b57cec5SDimitry Andric break; 1573*0b57cec5SDimitry Andric } 1574*0b57cec5SDimitry Andric } 1575*0b57cec5SDimitry Andric 1576*0b57cec5SDimitry Andric // There are certain anti-dependencies that cannot be ignored. 1577*0b57cec5SDimitry Andric // Specifically: 1578*0b57cec5SDimitry Andric // J2_call ... implicit-def %r0 ; SUJ 1579*0b57cec5SDimitry Andric // R0 = ... ; SUI 1580*0b57cec5SDimitry Andric // Those cannot be packetized together, since the call will observe 1581*0b57cec5SDimitry Andric // the effect of the assignment to R0. 1582*0b57cec5SDimitry Andric if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) { 1583*0b57cec5SDimitry Andric // Check if I defines any volatile register. We should also check 1584*0b57cec5SDimitry Andric // registers that the call may read, but these happen to be a 1585*0b57cec5SDimitry Andric // subset of the volatile register set. 1586*0b57cec5SDimitry Andric for (const MachineOperand &Op : I.operands()) { 1587*0b57cec5SDimitry Andric if (Op.isReg() && Op.isDef()) { 1588*0b57cec5SDimitry Andric unsigned R = Op.getReg(); 1589*0b57cec5SDimitry Andric if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI)) 1590*0b57cec5SDimitry Andric continue; 1591*0b57cec5SDimitry Andric } else if (!Op.isRegMask()) { 1592*0b57cec5SDimitry Andric // If I has a regmask assume dependency. 1593*0b57cec5SDimitry Andric continue; 1594*0b57cec5SDimitry Andric } 1595*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1596*0b57cec5SDimitry Andric break; 1597*0b57cec5SDimitry Andric } 1598*0b57cec5SDimitry Andric } 1599*0b57cec5SDimitry Andric 1600*0b57cec5SDimitry Andric // Skip over remaining anti-dependences. Two instructions that are 1601*0b57cec5SDimitry Andric // anti-dependent can share a packet, since in most such cases all 1602*0b57cec5SDimitry Andric // operands are read before any modifications take place. 1603*0b57cec5SDimitry Andric // The exceptions are branch and call instructions, since they are 1604*0b57cec5SDimitry Andric // executed after all other instructions have completed (at least 1605*0b57cec5SDimitry Andric // conceptually). 1606*0b57cec5SDimitry Andric if (DepType != SDep::Anti) { 1607*0b57cec5SDimitry Andric FoundSequentialDependence = true; 1608*0b57cec5SDimitry Andric break; 1609*0b57cec5SDimitry Andric } 1610*0b57cec5SDimitry Andric } 1611*0b57cec5SDimitry Andric 1612*0b57cec5SDimitry Andric if (FoundSequentialDependence) { 1613*0b57cec5SDimitry Andric Dependence = true; 1614*0b57cec5SDimitry Andric return false; 1615*0b57cec5SDimitry Andric } 1616*0b57cec5SDimitry Andric 1617*0b57cec5SDimitry Andric return true; 1618*0b57cec5SDimitry Andric } 1619*0b57cec5SDimitry Andric 1620*0b57cec5SDimitry Andric bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { 1621*0b57cec5SDimitry Andric assert(SUI->getInstr() && SUJ->getInstr()); 1622*0b57cec5SDimitry Andric MachineInstr &I = *SUI->getInstr(); 1623*0b57cec5SDimitry Andric MachineInstr &J = *SUJ->getInstr(); 1624*0b57cec5SDimitry Andric 1625*0b57cec5SDimitry Andric bool Coexist = !cannotCoexist(I, J); 1626*0b57cec5SDimitry Andric 1627*0b57cec5SDimitry Andric if (Coexist && !Dependence) 1628*0b57cec5SDimitry Andric return true; 1629*0b57cec5SDimitry Andric 1630*0b57cec5SDimitry Andric // Check if the instruction was promoted to a dot-new. If so, demote it 1631*0b57cec5SDimitry Andric // back into a dot-old. 1632*0b57cec5SDimitry Andric if (PromotedToDotNew) 1633*0b57cec5SDimitry Andric demoteToDotOld(I); 1634*0b57cec5SDimitry Andric 1635*0b57cec5SDimitry Andric cleanUpDotCur(); 1636*0b57cec5SDimitry Andric // Check if the instruction (must be a store) was glued with an allocframe 1637*0b57cec5SDimitry Andric // instruction. If so, restore its offset to its original value, i.e. use 1638*0b57cec5SDimitry Andric // current SP instead of caller's SP. 1639*0b57cec5SDimitry Andric if (GlueAllocframeStore) { 1640*0b57cec5SDimitry Andric useCalleesSP(I); 1641*0b57cec5SDimitry Andric GlueAllocframeStore = false; 1642*0b57cec5SDimitry Andric } 1643*0b57cec5SDimitry Andric 1644*0b57cec5SDimitry Andric if (ChangedOffset != INT64_MAX) 1645*0b57cec5SDimitry Andric undoChangedOffset(I); 1646*0b57cec5SDimitry Andric 1647*0b57cec5SDimitry Andric if (GlueToNewValueJump) { 1648*0b57cec5SDimitry Andric // Putting I and J together would prevent the new-value jump from being 1649*0b57cec5SDimitry Andric // packetized with the producer. In that case I and J must be separated. 1650*0b57cec5SDimitry Andric GlueToNewValueJump = false; 1651*0b57cec5SDimitry Andric return false; 1652*0b57cec5SDimitry Andric } 1653*0b57cec5SDimitry Andric 1654*0b57cec5SDimitry Andric if (!Coexist) 1655*0b57cec5SDimitry Andric return false; 1656*0b57cec5SDimitry Andric 1657*0b57cec5SDimitry Andric if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) { 1658*0b57cec5SDimitry Andric FoundSequentialDependence = false; 1659*0b57cec5SDimitry Andric Dependence = false; 1660*0b57cec5SDimitry Andric return true; 1661*0b57cec5SDimitry Andric } 1662*0b57cec5SDimitry Andric 1663*0b57cec5SDimitry Andric return false; 1664*0b57cec5SDimitry Andric } 1665*0b57cec5SDimitry Andric 1666*0b57cec5SDimitry Andric 1667*0b57cec5SDimitry Andric bool HexagonPacketizerList::foundLSInPacket() { 1668*0b57cec5SDimitry Andric bool FoundLoad = false; 1669*0b57cec5SDimitry Andric bool FoundStore = false; 1670*0b57cec5SDimitry Andric 1671*0b57cec5SDimitry Andric for (auto MJ : CurrentPacketMIs) { 1672*0b57cec5SDimitry Andric unsigned Opc = MJ->getOpcode(); 1673*0b57cec5SDimitry Andric if (Opc == Hexagon::S2_allocframe || Opc == Hexagon::L2_deallocframe) 1674*0b57cec5SDimitry Andric continue; 1675*0b57cec5SDimitry Andric if (HII->isMemOp(*MJ)) 1676*0b57cec5SDimitry Andric continue; 1677*0b57cec5SDimitry Andric if (MJ->mayLoad()) 1678*0b57cec5SDimitry Andric FoundLoad = true; 1679*0b57cec5SDimitry Andric if (MJ->mayStore() && !HII->isNewValueStore(*MJ)) 1680*0b57cec5SDimitry Andric FoundStore = true; 1681*0b57cec5SDimitry Andric } 1682*0b57cec5SDimitry Andric return FoundLoad && FoundStore; 1683*0b57cec5SDimitry Andric } 1684*0b57cec5SDimitry Andric 1685*0b57cec5SDimitry Andric 1686*0b57cec5SDimitry Andric MachineBasicBlock::iterator 1687*0b57cec5SDimitry Andric HexagonPacketizerList::addToPacket(MachineInstr &MI) { 1688*0b57cec5SDimitry Andric MachineBasicBlock::iterator MII = MI.getIterator(); 1689*0b57cec5SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 1690*0b57cec5SDimitry Andric 1691*0b57cec5SDimitry Andric if (CurrentPacketMIs.empty()) 1692*0b57cec5SDimitry Andric PacketStalls = false; 1693*0b57cec5SDimitry Andric PacketStalls |= producesStall(MI); 1694*0b57cec5SDimitry Andric 1695*0b57cec5SDimitry Andric if (MI.isImplicitDef()) { 1696*0b57cec5SDimitry Andric // Add to the packet to allow subsequent instructions to be checked 1697*0b57cec5SDimitry Andric // properly. 1698*0b57cec5SDimitry Andric CurrentPacketMIs.push_back(&MI); 1699*0b57cec5SDimitry Andric return MII; 1700*0b57cec5SDimitry Andric } 1701*0b57cec5SDimitry Andric assert(ResourceTracker->canReserveResources(MI)); 1702*0b57cec5SDimitry Andric 1703*0b57cec5SDimitry Andric bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); 1704*0b57cec5SDimitry Andric bool Good = true; 1705*0b57cec5SDimitry Andric 1706*0b57cec5SDimitry Andric if (GlueToNewValueJump) { 1707*0b57cec5SDimitry Andric MachineInstr &NvjMI = *++MII; 1708*0b57cec5SDimitry Andric // We need to put both instructions in the same packet: MI and NvjMI. 1709*0b57cec5SDimitry Andric // Either of them can require a constant extender. Try to add both to 1710*0b57cec5SDimitry Andric // the current packet, and if that fails, end the packet and start a 1711*0b57cec5SDimitry Andric // new one. 1712*0b57cec5SDimitry Andric ResourceTracker->reserveResources(MI); 1713*0b57cec5SDimitry Andric if (ExtMI) 1714*0b57cec5SDimitry Andric Good = tryAllocateResourcesForConstExt(true); 1715*0b57cec5SDimitry Andric 1716*0b57cec5SDimitry Andric bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); 1717*0b57cec5SDimitry Andric if (Good) { 1718*0b57cec5SDimitry Andric if (ResourceTracker->canReserveResources(NvjMI)) 1719*0b57cec5SDimitry Andric ResourceTracker->reserveResources(NvjMI); 1720*0b57cec5SDimitry Andric else 1721*0b57cec5SDimitry Andric Good = false; 1722*0b57cec5SDimitry Andric } 1723*0b57cec5SDimitry Andric if (Good && ExtNvjMI) 1724*0b57cec5SDimitry Andric Good = tryAllocateResourcesForConstExt(true); 1725*0b57cec5SDimitry Andric 1726*0b57cec5SDimitry Andric if (!Good) { 1727*0b57cec5SDimitry Andric endPacket(MBB, MI); 1728*0b57cec5SDimitry Andric assert(ResourceTracker->canReserveResources(MI)); 1729*0b57cec5SDimitry Andric ResourceTracker->reserveResources(MI); 1730*0b57cec5SDimitry Andric if (ExtMI) { 1731*0b57cec5SDimitry Andric assert(canReserveResourcesForConstExt()); 1732*0b57cec5SDimitry Andric tryAllocateResourcesForConstExt(true); 1733*0b57cec5SDimitry Andric } 1734*0b57cec5SDimitry Andric assert(ResourceTracker->canReserveResources(NvjMI)); 1735*0b57cec5SDimitry Andric ResourceTracker->reserveResources(NvjMI); 1736*0b57cec5SDimitry Andric if (ExtNvjMI) { 1737*0b57cec5SDimitry Andric assert(canReserveResourcesForConstExt()); 1738*0b57cec5SDimitry Andric reserveResourcesForConstExt(); 1739*0b57cec5SDimitry Andric } 1740*0b57cec5SDimitry Andric } 1741*0b57cec5SDimitry Andric CurrentPacketMIs.push_back(&MI); 1742*0b57cec5SDimitry Andric CurrentPacketMIs.push_back(&NvjMI); 1743*0b57cec5SDimitry Andric return MII; 1744*0b57cec5SDimitry Andric } 1745*0b57cec5SDimitry Andric 1746*0b57cec5SDimitry Andric ResourceTracker->reserveResources(MI); 1747*0b57cec5SDimitry Andric if (ExtMI && !tryAllocateResourcesForConstExt(true)) { 1748*0b57cec5SDimitry Andric endPacket(MBB, MI); 1749*0b57cec5SDimitry Andric if (PromotedToDotNew) 1750*0b57cec5SDimitry Andric demoteToDotOld(MI); 1751*0b57cec5SDimitry Andric if (GlueAllocframeStore) { 1752*0b57cec5SDimitry Andric useCalleesSP(MI); 1753*0b57cec5SDimitry Andric GlueAllocframeStore = false; 1754*0b57cec5SDimitry Andric } 1755*0b57cec5SDimitry Andric ResourceTracker->reserveResources(MI); 1756*0b57cec5SDimitry Andric reserveResourcesForConstExt(); 1757*0b57cec5SDimitry Andric } 1758*0b57cec5SDimitry Andric 1759*0b57cec5SDimitry Andric CurrentPacketMIs.push_back(&MI); 1760*0b57cec5SDimitry Andric return MII; 1761*0b57cec5SDimitry Andric } 1762*0b57cec5SDimitry Andric 1763*0b57cec5SDimitry Andric void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, 1764*0b57cec5SDimitry Andric MachineBasicBlock::iterator EndMI) { 1765*0b57cec5SDimitry Andric // Replace VLIWPacketizerList::endPacket(MBB, EndMI). 1766*0b57cec5SDimitry Andric 1767*0b57cec5SDimitry Andric bool memShufDisabled = getmemShufDisabled(); 1768*0b57cec5SDimitry Andric if (memShufDisabled && !foundLSInPacket()) { 1769*0b57cec5SDimitry Andric setmemShufDisabled(false); 1770*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Not added to NoShufPacket\n"); 1771*0b57cec5SDimitry Andric } 1772*0b57cec5SDimitry Andric memShufDisabled = getmemShufDisabled(); 1773*0b57cec5SDimitry Andric 1774*0b57cec5SDimitry Andric OldPacketMIs.clear(); 1775*0b57cec5SDimitry Andric for (MachineInstr *MI : CurrentPacketMIs) { 1776*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator NextMI = std::next(MI->getIterator()); 1777*0b57cec5SDimitry Andric for (auto &I : make_range(HII->expandVGatherPseudo(*MI), NextMI)) 1778*0b57cec5SDimitry Andric OldPacketMIs.push_back(&I); 1779*0b57cec5SDimitry Andric } 1780*0b57cec5SDimitry Andric CurrentPacketMIs.clear(); 1781*0b57cec5SDimitry Andric 1782*0b57cec5SDimitry Andric if (OldPacketMIs.size() > 1) { 1783*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front()); 1784*0b57cec5SDimitry Andric MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator()); 1785*0b57cec5SDimitry Andric finalizeBundle(*MBB, FirstMI, LastMI); 1786*0b57cec5SDimitry Andric auto BundleMII = std::prev(FirstMI); 1787*0b57cec5SDimitry Andric if (memShufDisabled) 1788*0b57cec5SDimitry Andric HII->setBundleNoShuf(BundleMII); 1789*0b57cec5SDimitry Andric 1790*0b57cec5SDimitry Andric setmemShufDisabled(false); 1791*0b57cec5SDimitry Andric } 1792*0b57cec5SDimitry Andric 1793*0b57cec5SDimitry Andric ResourceTracker->clearResources(); 1794*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "End packet\n"); 1795*0b57cec5SDimitry Andric } 1796*0b57cec5SDimitry Andric 1797*0b57cec5SDimitry Andric bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { 1798*0b57cec5SDimitry Andric if (Minimal) 1799*0b57cec5SDimitry Andric return false; 1800*0b57cec5SDimitry Andric return !producesStall(MI); 1801*0b57cec5SDimitry Andric } 1802*0b57cec5SDimitry Andric 1803*0b57cec5SDimitry Andric // V60 forward scheduling. 1804*0b57cec5SDimitry Andric bool HexagonPacketizerList::producesStall(const MachineInstr &I) { 1805*0b57cec5SDimitry Andric // If the packet already stalls, then ignore the stall from a subsequent 1806*0b57cec5SDimitry Andric // instruction in the same packet. 1807*0b57cec5SDimitry Andric if (PacketStalls) 1808*0b57cec5SDimitry Andric return false; 1809*0b57cec5SDimitry Andric 1810*0b57cec5SDimitry Andric // Check whether the previous packet is in a different loop. If this is the 1811*0b57cec5SDimitry Andric // case, there is little point in trying to avoid a stall because that would 1812*0b57cec5SDimitry Andric // favor the rare case (loop entry) over the common case (loop iteration). 1813*0b57cec5SDimitry Andric // 1814*0b57cec5SDimitry Andric // TODO: We should really be able to check all the incoming edges if this is 1815*0b57cec5SDimitry Andric // the first packet in a basic block, so we can avoid stalls from the loop 1816*0b57cec5SDimitry Andric // backedge. 1817*0b57cec5SDimitry Andric if (!OldPacketMIs.empty()) { 1818*0b57cec5SDimitry Andric auto *OldBB = OldPacketMIs.front()->getParent(); 1819*0b57cec5SDimitry Andric auto *ThisBB = I.getParent(); 1820*0b57cec5SDimitry Andric if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) 1821*0b57cec5SDimitry Andric return false; 1822*0b57cec5SDimitry Andric } 1823*0b57cec5SDimitry Andric 1824*0b57cec5SDimitry Andric SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)]; 1825*0b57cec5SDimitry Andric 1826*0b57cec5SDimitry Andric // If the latency is 0 and there is a data dependence between this 1827*0b57cec5SDimitry Andric // instruction and any instruction in the current packet, we disregard any 1828*0b57cec5SDimitry Andric // potential stalls due to the instructions in the previous packet. Most of 1829*0b57cec5SDimitry Andric // the instruction pairs that can go together in the same packet have 0 1830*0b57cec5SDimitry Andric // latency between them. The exceptions are 1831*0b57cec5SDimitry Andric // 1. NewValueJumps as they're generated much later and the latencies can't 1832*0b57cec5SDimitry Andric // be changed at that point. 1833*0b57cec5SDimitry Andric // 2. .cur instructions, if its consumer has a 0 latency successor (such as 1834*0b57cec5SDimitry Andric // .new). In this case, the latency between .cur and the consumer stays 1835*0b57cec5SDimitry Andric // non-zero even though we can have both .cur and .new in the same packet. 1836*0b57cec5SDimitry Andric // Changing the latency to 0 is not an option as it causes software pipeliner 1837*0b57cec5SDimitry Andric // to not pipeline in some cases. 1838*0b57cec5SDimitry Andric 1839*0b57cec5SDimitry Andric // For Example: 1840*0b57cec5SDimitry Andric // { 1841*0b57cec5SDimitry Andric // I1: v6.cur = vmem(r0++#1) 1842*0b57cec5SDimitry Andric // I2: v7 = valign(v6,v4,r2) 1843*0b57cec5SDimitry Andric // I3: vmem(r5++#1) = v7.new 1844*0b57cec5SDimitry Andric // } 1845*0b57cec5SDimitry Andric // Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2. 1846*0b57cec5SDimitry Andric 1847*0b57cec5SDimitry Andric for (auto J : CurrentPacketMIs) { 1848*0b57cec5SDimitry Andric SUnit *SUJ = MIToSUnit[J]; 1849*0b57cec5SDimitry Andric for (auto &Pred : SUI->Preds) 1850*0b57cec5SDimitry Andric if (Pred.getSUnit() == SUJ) 1851*0b57cec5SDimitry Andric if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) || 1852*0b57cec5SDimitry Andric HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I)) 1853*0b57cec5SDimitry Andric return false; 1854*0b57cec5SDimitry Andric } 1855*0b57cec5SDimitry Andric 1856*0b57cec5SDimitry Andric // Check if the latency is greater than one between this instruction and any 1857*0b57cec5SDimitry Andric // instruction in the previous packet. 1858*0b57cec5SDimitry Andric for (auto J : OldPacketMIs) { 1859*0b57cec5SDimitry Andric SUnit *SUJ = MIToSUnit[J]; 1860*0b57cec5SDimitry Andric for (auto &Pred : SUI->Preds) 1861*0b57cec5SDimitry Andric if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) 1862*0b57cec5SDimitry Andric return true; 1863*0b57cec5SDimitry Andric } 1864*0b57cec5SDimitry Andric 1865*0b57cec5SDimitry Andric return false; 1866*0b57cec5SDimitry Andric } 1867*0b57cec5SDimitry Andric 1868*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1869*0b57cec5SDimitry Andric // Public Constructor Functions 1870*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1871*0b57cec5SDimitry Andric 1872*0b57cec5SDimitry Andric FunctionPass *llvm::createHexagonPacketizer(bool Minimal) { 1873*0b57cec5SDimitry Andric return new HexagonPacketizer(Minimal); 1874*0b57cec5SDimitry Andric } 1875