Lines Matching +full:mi +full:- +full:v
1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// re-scheduling of MachineInstrs.
12 //===----------------------------------------------------------------------===//
40 #include "llvm/Config/llvm-config.h"
62 #define DEBUG_TYPE "machine-scheduler"
65 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
66 cl::desc("Enable use of AA during MI DAG construction"));
68 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
69 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
73 // reached means best-effort, but may be slow.
77 static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
79 "prior to scheduling, at which point a trade-off "
83 "dag-maps-reduction-size", cl::Hidden,
89 "sched-print-cycles", cl::Hidden, cl::init(false),
105 dbgs() << "SU(" << SU->NodeNum << ")"; in dumpSUList()
130 static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, in getUnderlyingObjectsForInstr() argument
135 for (const MachineMemOperand *MMO : MI->memoperands()) { in getUnderlyingObjectsForInstr()
137 if (MMO->isVolatile() || MMO->isAtomic()) in getUnderlyingObjectsForInstr()
140 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { in getUnderlyingObjectsForInstr()
152 if (PSV->isAliased(&MFI)) in getUnderlyingObjectsForInstr()
155 bool MayAlias = PSV->mayAlias(&MFI); in getUnderlyingObjectsForInstr()
157 } else if (const Value *V = MMO->getValue()) { in getUnderlyingObjectsForInstr() local
159 if (!getUnderlyingObjectsForCodeGen(V, Objs)) in getUnderlyingObjectsForInstr()
162 for (Value *V : Objs) { in getUnderlyingObjectsForInstr()
163 assert(isIdentifiedObject(V)); in getUnderlyingObjectsForInstr()
164 Objects.emplace_back(V, true); in getUnderlyingObjectsForInstr()
205 RegionEnd != BB->end() in addSchedBarrierDeps()
211 for (const MachineOperand &MO : ExitMI->all_uses()) { in addSchedBarrierDeps()
214 for (MCRegUnit Unit : TRI->regunits(Reg)) in addSchedBarrierDeps()
215 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); in addSchedBarrierDeps()
221 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) { in addSchedBarrierDeps()
224 for (const MachineBasicBlock *Succ : BB->successors()) { in addSchedBarrierDeps()
225 for (const auto &LI : Succ->liveins()) { in addSchedBarrierDeps()
229 Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); in addSchedBarrierDeps()
239 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); in addPhysRegDataDeps()
243 // Ask the target if address-backscheduling is desirable, and if so how much. in addPhysRegDataDeps()
246 // Only use any non-zero latency for real defs/uses, in contrast to in addPhysRegDataDeps()
248 const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc(); in addPhysRegDataDeps()
251 for (MCRegUnit Unit : TRI->regunits(Reg)) { in addPhysRegDataDeps()
254 SUnit *UseSU = I->SU; in addPhysRegDataDeps()
261 int UseOpIdx = I->OpIdx; in addPhysRegDataDeps()
269 SU->hasPhysRegDefs = true; in addPhysRegDataDeps()
271 UseInstr = UseSU->getInstr(); in addPhysRegDataDeps()
272 Register UseReg = UseInstr->getOperand(UseOpIdx).getReg(); in addPhysRegDataDeps()
273 const MCInstrDesc &UseMIDesc = UseInstr->getDesc(); in addPhysRegDataDeps()
280 Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, in addPhysRegDataDeps()
286 UseSU->addPred(Dep); in addPhysRegDataDeps()
295 MachineInstr *MI = SU->getInstr(); in addPhysRegDeps() local
296 MachineOperand &MO = MI->getOperand(OperIdx); in addPhysRegDeps()
305 // dependencies we use a latency of 0 because for a multi-issue in addPhysRegDeps()
311 for (MCRegUnit Unit : TRI->regunits(Reg)) { in addPhysRegDeps()
314 SUnit *DefSU = I->SU; in addPhysRegDeps()
317 MachineInstr *DefInstr = DefSU->getInstr(); in addPhysRegDeps()
318 MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx); in addPhysRegDeps()
324 SchedModel.computeOutputLatency(MI, OperIdx, DefInstr)); in addPhysRegDeps()
326 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep, in addPhysRegDeps()
328 DefSU->addPred(Dep); in addPhysRegDeps()
334 SU->hasPhysRegUses = true; in addPhysRegDeps()
338 for (MCRegUnit Unit : TRI->regunits(Reg)) in addPhysRegDeps()
346 for (MCRegUnit Unit : TRI->regunits(Reg)) { in addPhysRegDeps()
352 if (MO.isDead() && SU->isCall) { in addPhysRegDeps()
358 for (MCRegUnit Unit : TRI->regunits(Reg)) { in addPhysRegDeps()
363 isBegin = (--I) == B; in addPhysRegDeps()
364 if (!I->SU->isCall) in addPhysRegDeps()
372 for (MCRegUnit Unit : TRI->regunits(Reg)) in addPhysRegDeps()
388 return TRI->getSubRegIndexLaneMask(SubReg); in getLaneMaskForMO()
395 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); in deadDefHasNoUse()
405 MachineInstr *MI = SU->getInstr(); in addVRegDefDeps() local
406 MachineOperand &MO = MI->getOperand(OperIdx); in addVRegDefDeps()
414 // If we have a <read-undef> flag, none of the lane values comes from an in addVRegDefDeps()
424 llvm::drop_begin(MI->operands(), OperIdx + 1)) in addVRegDefDeps()
429 // Clear undef flag, we'll re-add it later once we know which subregister in addVRegDefDeps()
444 LaneBitmask LaneMask = I->LaneMask; in addVRegDefDeps()
452 SUnit *UseSU = I->SU; in addVRegDefDeps()
453 MachineInstr *Use = UseSU->getInstr(); in addVRegDefDeps()
455 Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, in addVRegDefDeps()
456 I->OperandIndex)); in addVRegDefDeps()
457 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep, in addVRegDefDeps()
459 UseSU->addPred(Dep); in addVRegDefDeps()
465 I->LaneMask = LaneMask; in addVRegDefDeps()
482 // is also useful if output latency exceeds def-use latency. in addVRegDefDeps()
494 // add super-register defs/uses, to imply that although we only access parts in addVRegDefDeps()
500 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); in addVRegDefDeps()
501 DefSU->addPred(Dep); in addVRegDefDeps()
505 // VReg2SUnit for the non-overlapping part. in addVRegDefDeps()
525 const MachineInstr *MI = SU->getInstr(); in addVRegUseDeps() local
526 assert(!MI->isDebugOrPseudoInstr()); in addVRegUseDeps()
528 const MachineOperand &MO = MI->getOperand(OperIdx); in addVRegUseDeps()
546 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); in addVRegUseDeps()
550 /// Returns true if MI is an instruction we are unable to reason about
552 static inline bool isGlobalMemoryObject(MachineInstr *MI) { in isGlobalMemoryObject() argument
553 return MI->isCall() || MI->hasUnmodeledSideEffects() || in isGlobalMemoryObject()
554 (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); in isGlobalMemoryObject()
559 if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) { in addChainDependency()
562 SUb->addPred(Dep); in addChainDependency()
566 /// Creates an SUnit for each real instruction, numbered in top-down
583 for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { in initSUnits()
584 if (MI.isDebugOrPseudoInstr()) in initSUnits()
587 SUnit *SU = newSUnit(&MI); in initSUnits()
588 MISUnitMap[&MI] = SU; in initSUnits()
590 SU->isCall = MI.isCall(); in initSUnits()
591 SU->isCommutable = MI.isCommutable(); in initSUnits()
593 // Assign the Latency field of SU using target-provided information. in initSUnits()
594 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); in initSUnits()
602 // require the same resources. This is used for in-order execution pipelines in initSUnits()
603 // within an out-of-order core. These are identified by BufferSize=1. in initSUnits()
609 switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) { in initSUnits()
611 SU->hasReservedResource = true; in initSUnits()
614 SU->isUnbuffered = true; in initSUnits()
639 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
641 void inline insert(SUnit *SU, ValueType V) { in insert() argument
642 MapVector::operator[](V).push_back(SU); in insert()
646 /// Clears the list of SUs mapped to V.
647 void inline clearList(ValueType V) { in clearList() argument
648 iterator Itr = find(V); in clearList()
650 assert(NumNodes >= Itr->second.size()); in clearList()
651 NumNodes -= Itr->second.size(); in clearList()
653 Itr->second.clear(); in clearList()
688 ValueType V) { in addChainDependencies() argument
689 Value2SUsMap::iterator Itr = Val2SUsMap.find(V); in addChainDependencies()
691 addChainDependencies(SU, Itr->second, in addChainDependencies()
698 for (auto &[V, SUs] : map) { in addBarrierChain()
699 (void)V; in addBarrierChain()
701 SU->addPredBarrier(BarrierChain); in addBarrierChain()
712 SUList &sus = CurrItr->second; in insertBarrierChain()
716 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum) in insertBarrierChain()
719 (*SUItr)->addPredBarrier(BarrierChain); in insertBarrierChain()
751 this->TrackLaneMasks = TrackLaneMasks; in buildSchedGraph()
759 PDiffs->init(SUnits.size()); in buildSchedGraph()
768 // non-aliasing if they both depend on only identified Values and do in buildSchedGraph()
781 // Track all instructions that may raise floating-point exceptions. in buildSchedGraph()
796 Defs.setUniverse(TRI->getNumRegs()); in buildSchedGraph()
797 Uses.setUniverse(TRI->getNumRegs()); in buildSchedGraph()
812 MII != MIE; --MII) { in buildSchedGraph()
813 MachineInstr &MI = *std::prev(MII); in buildSchedGraph() local
815 DbgValues.emplace_back(DbgMI, &MI); in buildSchedGraph()
819 if (MI.isDebugValue() || MI.isDebugPHI()) { in buildSchedGraph()
820 DbgMI = &MI; in buildSchedGraph()
824 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe()) in buildSchedGraph()
827 SUnit *SU = MISUnitMap[&MI]; in buildSchedGraph()
828 assert(SU && "No SUnit mapped to this MI"); in buildSchedGraph()
832 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false); in buildSchedGraph()
834 SlotIndex SlotIdx = LIS->getInstructionIndex(MI); in buildSchedGraph()
838 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); in buildSchedGraph()
840 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI) in buildSchedGraph()
841 RPTracker->recedeSkipDebugValues(); in buildSchedGraph()
842 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); in buildSchedGraph()
843 RPTracker->recede(RegOpers); in buildSchedGraph()
847 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) && in buildSchedGraph()
850 // Add register-based dependencies (data, anti, and output). in buildSchedGraph()
851 // For some instructions (calls, returns, inline-asm, etc.) there can in buildSchedGraph()
856 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { in buildSchedGraph()
857 const MachineOperand &MO = MI.getOperand(j); in buildSchedGraph()
869 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { in buildSchedGraph()
870 const MachineOperand &MO = MI.getOperand(j); in buildSchedGraph()
886 // dependence edge to ExitSU to model the live-out latency. This is required in buildSchedGraph()
887 // for vreg defs with no in-region use, and prefetches with no vreg def. in buildSchedGraph()
891 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) { in buildSchedGraph()
893 Dep.setLatency(SU->Latency - 1); in buildSchedGraph()
902 if (isGlobalMemoryObject(&MI)) { in buildSchedGraph()
906 BarrierChain->addPredBarrier(SU); in buildSchedGraph()
910 << BarrierChain->NodeNum << ").\n";); in buildSchedGraph()
924 if (MI.mayRaiseFPException()) { in buildSchedGraph()
926 BarrierChain->addPredBarrier(SU); in buildSchedGraph()
938 if (!MI.mayStore() && in buildSchedGraph()
939 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad())) in buildSchedGraph()
944 BarrierChain->addPredBarrier(SU); in buildSchedGraph()
946 // Find the underlying objects for MI. The Objs vector is either in buildSchedGraph()
950 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs, in buildSchedGraph()
953 if (MI.mayStore()) { in buildSchedGraph()
967 ValueType V = UnderlObj.getValue(); in buildSchedGraph() local
970 // Add dependencies to previous stores and loads mapped to V. in buildSchedGraph()
971 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); in buildSchedGraph()
972 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); in buildSchedGraph()
975 // self-loop edge if multiple underlying objects are present. in buildSchedGraph()
977 ValueType V = UnderlObj.getValue(); in buildSchedGraph() local
980 // Map this store to V. in buildSchedGraph()
981 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V); in buildSchedGraph()
997 ValueType V = UnderlObj.getValue(); in buildSchedGraph() local
1002 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); in buildSchedGraph()
1004 // Map this load to V. in buildSchedGraph()
1005 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V); in buildSchedGraph()
1036 PSV->printCustom(OS); in operator <<()
1043 const Value *V = cast<const Value *>(ValType); in dump() local
1044 if (isa<UndefValue>(V)) in dump()
1047 V->printAsOperand(dbgs()); in dump()
1066 for (const auto &[V, SUs] : stores) { in reduceHugeMemNodeMaps()
1067 (void)V; in reduceHugeMemNodeMaps()
1069 NodeNums.push_back(SU->NodeNum); in reduceHugeMemNodeMaps()
1071 for (const auto &[V, SUs] : loads) { in reduceHugeMemNodeMaps()
1072 (void)V; in reduceHugeMemNodeMaps()
1074 NodeNums.push_back(SU->NodeNum); in reduceHugeMemNodeMaps()
1082 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; in reduceHugeMemNodeMaps()
1084 // The aliasing and non-aliasing maps reduce independently of each in reduceHugeMemNodeMaps()
1088 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { in reduceHugeMemNodeMaps()
1089 BarrierChain->addPredBarrier(newBarrierChain); in reduceHugeMemNodeMaps()
1092 << BarrierChain->NodeNum << ").\n";); in reduceHugeMemNodeMaps()
1096 << BarrierChain->NodeNum << ").\n";); in reduceHugeMemNodeMaps()
1109 MachineInstr &MI, bool addToLiveRegs) { in toggleKills() argument
1110 for (MachineOperand &MO : MI.operands()) { in toggleKills()
1134 for (MachineInstr &MI : llvm::reverse(MBB)) { in fixupKills()
1135 if (MI.isDebugOrPseudoInstr()) in fixupKills()
1141 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { in fixupKills()
1156 if (!MI.isBundled()) { in fixupKills()
1157 toggleKills(MRI, LiveRegs, MI, true); in fixupKills()
1159 MachineBasicBlock::instr_iterator Bundle = MI.getIterator(); in fixupKills()
1160 if (MI.isBundle()) in fixupKills()
1161 toggleKills(MRI, LiveRegs, MI, false); in fixupKills()
1167 while (I->isBundledWithSucc()) in fixupKills()
1170 if (!I->isDebugOrPseudoInstr()) in fixupKills()
1172 --I; in fixupKills()
1185 SU.getInstr()->dump(); in dumpNode()
1208 SU->getInstr()->print(oss, /*IsStandalone=*/true); in getGraphNodeLabel()
1215 return "dag." + BB->getFullName(); in getDAGName()
1230 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); in addEdge()
1235 //===----------------------------------------------------------------------===//
1237 //===----------------------------------------------------------------------===//
1274 return R.DFSNodeData[SU->NodeNum].SubtreeID in isVisited()
1281 R.DFSNodeData[SU->NodeNum].InstrCount = in visitPreorder()
1282 SU->getInstr()->isTransient() ? 0 : 1; in visitPreorder()
1291 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; in visitPostorderNode()
1292 RootData RData(SU->NodeNum); in visitPostorderNode()
1293 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; in visitPostorderNode()
1299 // only useful if multiple high-pressure paths are possible. in visitPostorderNode()
1300 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; in visitPostorderNode()
1301 for (const SDep &PredDep : SU->Preds) { in visitPostorderNode()
1304 unsigned PredNum = PredDep.getSUnit()->NodeNum; in visitPostorderNode()
1305 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) in visitPostorderNode()
1313 RootSet[PredNum].ParentNodeID = SU->NodeNum; in visitPostorderNode()
1324 RootSet[SU->NodeNum] = RData; in visitPostorderNode()
1331 R.DFSNodeData[Succ->NodeNum].InstrCount in visitPostorderEdge()
1332 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; in visitPostorderEdge()
1367 unsigned PredTree = SubtreeClasses[Pred->NodeNum]; in finalize()
1368 unsigned SuccTree = SubtreeClasses[Succ->NodeNum]; in finalize()
1371 unsigned Depth = Pred->getDepth(); in finalize()
1386 unsigned PredNum = PredSU->NodeNum; in joinPredSubtree()
1393 for (const SDep &SuccDep : PredSU->Succs) { in joinPredSubtree()
1401 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; in joinPredSubtree()
1402 SubtreeClasses.join(Succ->NodeNum, PredNum); in joinPredSubtree()
1430 /// Manage the stack used by a reverse depth-first search over the DAG.
1438 DFSStack.emplace_back(SU, SU->Preds.begin()); in follow()
1452 return getCurr()->Preds.end(); in getPredEnd()
1459 for (const SDep &SuccDep : SU->Succs) { in hasDataSucc()
1461 !SuccDep.getSUnit()->isBoundaryNode()) in hasDataSucc()
1467 /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1471 llvm_unreachable("Top-down ILP metric is unimplemented"); in compute()
1486 // Ignore non-data edges. in compute()
1488 || PredDep.getSUnit()->isBoundaryNode()) { in compute()