10b57cec5SDimitry Andric //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// 100b57cec5SDimitry Andric /// This file implements the functionalities used by the BottleneckAnalysis 110b57cec5SDimitry Andric /// to report bottleneck info. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "Views/BottleneckAnalysis.h" 160b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 170b57cec5SDimitry Andric #include "llvm/MCA/Support.h" 180b57cec5SDimitry Andric #include "llvm/Support/Format.h" 190b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h" 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric namespace llvm { 220b57cec5SDimitry Andric namespace mca { 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric #define DEBUG_TYPE "llvm-mca" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric PressureTracker::PressureTracker(const MCSchedModel &Model) 270b57cec5SDimitry Andric : SM(Model), 280b57cec5SDimitry Andric ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 290b57cec5SDimitry Andric ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 300b57cec5SDimitry Andric ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 310b57cec5SDimitry Andric ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 320b57cec5SDimitry Andric computeProcResourceMasks(SM, ProcResID2Mask); 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric // Ignore the invalid resource at index zero. 350b57cec5SDimitry Andric unsigned NextResourceUsersIdx = 0; 360b57cec5SDimitry Andric for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 370b57cec5SDimitry Andric const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 380b57cec5SDimitry Andric ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 390b57cec5SDimitry Andric NextResourceUsersIdx += ProcResource.NumUnits; 400b57cec5SDimitry Andric uint64_t ResourceMask = ProcResID2Mask[I]; 410b57cec5SDimitry Andric ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric ResourceUsers.resize(NextResourceUsersIdx); 450b57cec5SDimitry Andric std::fill(ResourceUsers.begin(), ResourceUsers.end(), 460b57cec5SDimitry Andric std::make_pair<unsigned, unsigned>(~0U, 0U)); 470b57cec5SDimitry Andric } 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric void PressureTracker::getResourceUsers(uint64_t ResourceMask, 500b57cec5SDimitry Andric SmallVectorImpl<User> &Users) const { 510b57cec5SDimitry Andric unsigned Index = getResourceStateIndex(ResourceMask); 520b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[Index]; 530b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 540b57cec5SDimitry Andric for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 550b57cec5SDimitry Andric const User U = getResourceUser(ProcResID, I); 560b57cec5SDimitry Andric if (U.second && IPI.find(U.first) != IPI.end()) 570b57cec5SDimitry Andric Users.emplace_back(U); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric void PressureTracker::onInstructionDispatched(unsigned IID) { 620b57cec5SDimitry Andric IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric void PressureTracker::handleInstructionIssuedEvent( 680b57cec5SDimitry Andric const HWInstructionIssuedEvent &Event) { 690b57cec5SDimitry Andric unsigned IID = Event.IR.getSourceIndex(); 700b57cec5SDimitry Andric using ResourceRef = HWInstructionIssuedEvent::ResourceRef; 710b57cec5SDimitry Andric using ResourceUse = std::pair<ResourceRef, ResourceCycles>; 720b57cec5SDimitry Andric for (const ResourceUse &Use : Event.UsedResources) { 730b57cec5SDimitry Andric const ResourceRef &RR = Use.first; 740b57cec5SDimitry Andric unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 750b57cec5SDimitry Andric Index += countTrailingZeros(RR.second); 760b57cec5SDimitry Andric ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric void PressureTracker::updateResourcePressureDistribution( 810b57cec5SDimitry Andric uint64_t CumulativeMask) { 820b57cec5SDimitry Andric while (CumulativeMask) { 830b57cec5SDimitry Andric uint64_t Current = CumulativeMask & (-CumulativeMask); 840b57cec5SDimitry Andric unsigned ResIdx = getResourceStateIndex(Current); 850b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 860b57cec5SDimitry Andric uint64_t Mask = ProcResID2Mask[ProcResID]; 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric if (Mask == Current) { 890b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 900b57cec5SDimitry Andric CumulativeMask ^= Current; 910b57cec5SDimitry Andric continue; 920b57cec5SDimitry Andric } 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric Mask ^= Current; 950b57cec5SDimitry Andric while (Mask) { 960b57cec5SDimitry Andric uint64_t SubUnit = Mask & (-Mask); 970b57cec5SDimitry Andric ResIdx = getResourceStateIndex(SubUnit); 980b57cec5SDimitry Andric ProcResID = ResIdx2ProcResID[ResIdx]; 990b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 1000b57cec5SDimitry Andric Mask ^= SubUnit; 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric CumulativeMask ^= Current; 1040b57cec5SDimitry Andric } 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 1080b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 1090b57cec5SDimitry Andric "Unexpected invalid event!"); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric switch (Event.Reason) { 1120b57cec5SDimitry Andric default: 1130b57cec5SDimitry Andric break; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: { 1160b57cec5SDimitry Andric const uint64_t ResourceMask = Event.ResourceMask; 1170b57cec5SDimitry Andric updateResourcePressureDistribution(Event.ResourceMask); 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1200b57cec5SDimitry Andric const Instruction &IS = *IR.getInstruction(); 1210b57cec5SDimitry Andric unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 1220b57cec5SDimitry Andric if (!BusyResources) 1230b57cec5SDimitry Andric continue; 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1260b57cec5SDimitry Andric IPI[IID].ResourcePressureCycles++; 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric break; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 1320b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1330b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1340b57cec5SDimitry Andric IPI[IID].RegisterPressureCycles++; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric break; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 1390b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1400b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1410b57cec5SDimitry Andric IPI[IID].MemoryPressureCycles++; 1420b57cec5SDimitry Andric } 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric #ifndef NDEBUG 1470b57cec5SDimitry Andric void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 1480b57cec5SDimitry Andric const DependencyEdge &DepEdge, 1490b57cec5SDimitry Andric MCInstPrinter &MCIP) const { 1500b57cec5SDimitry Andric unsigned FromIID = DepEdge.FromIID; 1510b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 1520b57cec5SDimitry Andric assert(FromIID < ToIID && "Graph should be acyclic!"); 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric const DependencyEdge::Dependency &DE = DepEdge.Dep; 1550b57cec5SDimitry Andric assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 1580b57cec5SDimitry Andric if (DE.Type == DependencyEdge::DT_REGISTER) { 1590b57cec5SDimitry Andric OS << " - REGISTER: "; 1600b57cec5SDimitry Andric MCIP.printRegName(OS, DE.ResourceOrRegID); 1610b57cec5SDimitry Andric } else if (DE.Type == DependencyEdge::DT_MEMORY) { 1620b57cec5SDimitry Andric OS << " - MEMORY"; 1630b57cec5SDimitry Andric } else { 1640b57cec5SDimitry Andric assert(DE.Type == DependencyEdge::DT_RESOURCE && 1650b57cec5SDimitry Andric "Unsupported dependency type!"); 1660b57cec5SDimitry Andric OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 1670b57cec5SDimitry Andric } 168*8bcb0991SDimitry Andric OS << " - COST: " << DE.Cost << '\n'; 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric #endif // NDEBUG 1710b57cec5SDimitry Andric 172*8bcb0991SDimitry Andric void DependencyGraph::pruneEdges(unsigned Iterations) { 173*8bcb0991SDimitry Andric for (DGNode &N : Nodes) { 174*8bcb0991SDimitry Andric unsigned NumPruned = 0; 175*8bcb0991SDimitry Andric const unsigned Size = N.OutgoingEdges.size(); 176*8bcb0991SDimitry Andric // Use a cut-off threshold to prune edges with a low frequency. 177*8bcb0991SDimitry Andric for (unsigned I = 0, E = Size; I < E; ++I) { 178*8bcb0991SDimitry Andric DependencyEdge &Edge = N.OutgoingEdges[I]; 179*8bcb0991SDimitry Andric if (Edge.Frequency == Iterations) 180*8bcb0991SDimitry Andric continue; 181*8bcb0991SDimitry Andric double Factor = (double)Edge.Frequency / Iterations; 182*8bcb0991SDimitry Andric if (0.10 < Factor) 183*8bcb0991SDimitry Andric continue; 184*8bcb0991SDimitry Andric Nodes[Edge.ToIID].NumPredecessors--; 185*8bcb0991SDimitry Andric std::swap(Edge, N.OutgoingEdges[E - 1]); 186*8bcb0991SDimitry Andric --E; 187*8bcb0991SDimitry Andric ++NumPruned; 188*8bcb0991SDimitry Andric } 189*8bcb0991SDimitry Andric 190*8bcb0991SDimitry Andric if (NumPruned) 191*8bcb0991SDimitry Andric N.OutgoingEdges.resize(Size - NumPruned); 192*8bcb0991SDimitry Andric } 193*8bcb0991SDimitry Andric } 194*8bcb0991SDimitry Andric 1950b57cec5SDimitry Andric void DependencyGraph::initializeRootSet( 1960b57cec5SDimitry Andric SmallVectorImpl<unsigned> &RootSet) const { 1970b57cec5SDimitry Andric for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 1980b57cec5SDimitry Andric const DGNode &N = Nodes[I]; 1990b57cec5SDimitry Andric if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 2000b57cec5SDimitry Andric RootSet.emplace_back(I); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric void DependencyGraph::propagateThroughEdges( 205*8bcb0991SDimitry Andric SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) { 2060b57cec5SDimitry Andric SmallVector<unsigned, 8> ToVisit; 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric // A critical sequence is computed as the longest path from a node of the 2090b57cec5SDimitry Andric // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 2100b57cec5SDimitry Andric // composed of nodes with at least one successor, and no predecessors. 2110b57cec5SDimitry Andric // 2120b57cec5SDimitry Andric // Each node of the graph starts with an initial default cost of zero. The 2130b57cec5SDimitry Andric // cost of a node is a measure of criticality: the higher the cost, the bigger 2140b57cec5SDimitry Andric // is the performance impact. 215*8bcb0991SDimitry Andric // For register and memory dependencies, the cost is a function of the write 216*8bcb0991SDimitry Andric // latency as well as the actual delay (in cycles) caused to users. 217*8bcb0991SDimitry Andric // For processor resource dependencies, the cost is a function of the resource 218*8bcb0991SDimitry Andric // pressure. Resource interferences with low frequency values are ignored. 2190b57cec5SDimitry Andric // 2200b57cec5SDimitry Andric // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 2210b57cec5SDimitry Andric // the inner loop selects (i.e. visits) a node N from a set of `unvisited 2220b57cec5SDimitry Andric // nodes`, and then propagates the cost of N to all its neighbors. 2230b57cec5SDimitry Andric // 2240b57cec5SDimitry Andric // The `unvisited nodes` set initially contains all the nodes from the 2250b57cec5SDimitry Andric // RootSet. A node N is added to the `unvisited nodes` if all its 2260b57cec5SDimitry Andric // predecessors have been visited already. 2270b57cec5SDimitry Andric // 2280b57cec5SDimitry Andric // For simplicity, every node tracks the number of unvisited incoming edges in 2290b57cec5SDimitry Andric // field `NumVisitedPredecessors`. When the value of that field drops to 2300b57cec5SDimitry Andric // zero, then the corresponding node is added to a `ToVisit` set. 2310b57cec5SDimitry Andric // 2320b57cec5SDimitry Andric // At the end of every iteration of the outer loop, set `ToVisit` becomes our 2330b57cec5SDimitry Andric // new `unvisited nodes` set. 2340b57cec5SDimitry Andric // 2350b57cec5SDimitry Andric // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 2360b57cec5SDimitry Andric // is empty. This algorithm works under the assumption that the graph is 2370b57cec5SDimitry Andric // acyclic. 2380b57cec5SDimitry Andric do { 2390b57cec5SDimitry Andric for (unsigned IID : RootSet) { 2400b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 2410b57cec5SDimitry Andric for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 2420b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 2430b57cec5SDimitry Andric DGNode &To = Nodes[ToIID]; 2440b57cec5SDimitry Andric uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 2450b57cec5SDimitry Andric // Check if this is the most expensive incoming edge seen so far. In 2460b57cec5SDimitry Andric // case, update the total cost of the destination node (ToIID), as well 2470b57cec5SDimitry Andric // its field `CriticalPredecessor`. 2480b57cec5SDimitry Andric if (Cost > To.Cost) { 2490b57cec5SDimitry Andric To.CriticalPredecessor = DepEdge; 2500b57cec5SDimitry Andric To.Cost = Cost; 2510b57cec5SDimitry Andric To.Depth = N.Depth + 1; 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric To.NumVisitedPredecessors++; 2540b57cec5SDimitry Andric if (To.NumVisitedPredecessors == To.NumPredecessors) 2550b57cec5SDimitry Andric ToVisit.emplace_back(ToIID); 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric std::swap(RootSet, ToVisit); 2600b57cec5SDimitry Andric ToVisit.clear(); 2610b57cec5SDimitry Andric } while (!RootSet.empty()); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric void DependencyGraph::getCriticalSequence( 2650b57cec5SDimitry Andric SmallVectorImpl<const DependencyEdge *> &Seq) const { 2660b57cec5SDimitry Andric // At this stage, nodes of the graph have been already visited, and costs have 2670b57cec5SDimitry Andric // been propagated through the edges (see method `propagateThroughEdges()`). 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // Identify the node N with the highest cost in the graph. By construction, 2700b57cec5SDimitry Andric // that node is the last instruction of our critical sequence. 2710b57cec5SDimitry Andric // Field N.Depth would tell us the total length of the sequence. 2720b57cec5SDimitry Andric // 2730b57cec5SDimitry Andric // To obtain the sequence of critical edges, we simply follow the chain of critical 2740b57cec5SDimitry Andric // predecessors starting from node N (field DGNode::CriticalPredecessor). 2750b57cec5SDimitry Andric const auto It = std::max_element( 2760b57cec5SDimitry Andric Nodes.begin(), Nodes.end(), 2770b57cec5SDimitry Andric [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 2780b57cec5SDimitry Andric unsigned IID = std::distance(Nodes.begin(), It); 2790b57cec5SDimitry Andric Seq.resize(Nodes[IID].Depth); 2800b57cec5SDimitry Andric for (unsigned I = Seq.size(), E = 0; I > E; --I) { 2810b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 2820b57cec5SDimitry Andric Seq[I - 1] = &N.CriticalPredecessor; 2830b57cec5SDimitry Andric IID = N.CriticalPredecessor.FromIID; 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric } 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric static void printInstruction(formatted_raw_ostream &FOS, 2880b57cec5SDimitry Andric const MCSubtargetInfo &STI, MCInstPrinter &MCIP, 2890b57cec5SDimitry Andric const MCInst &MCI, 2900b57cec5SDimitry Andric bool UseDifferentColor = false) { 2910b57cec5SDimitry Andric std::string Instruction; 2920b57cec5SDimitry Andric raw_string_ostream InstrStream(Instruction); 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric FOS.PadToColumn(14); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric MCIP.printInst(&MCI, InstrStream, "", STI); 2970b57cec5SDimitry Andric InstrStream.flush(); 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric if (UseDifferentColor) 3000b57cec5SDimitry Andric FOS.changeColor(raw_ostream::CYAN, true, false); 3010b57cec5SDimitry Andric FOS << StringRef(Instruction).ltrim(); 3020b57cec5SDimitry Andric if (UseDifferentColor) 3030b57cec5SDimitry Andric FOS.resetColor(); 3040b57cec5SDimitry Andric } 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 307*8bcb0991SDimitry Andric // Early exit if no bottlenecks were found during the simulation. 308*8bcb0991SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 309*8bcb0991SDimitry Andric return; 310*8bcb0991SDimitry Andric 3110b57cec5SDimitry Andric SmallVector<const DependencyEdge *, 16> Seq; 3120b57cec5SDimitry Andric DG.getCriticalSequence(Seq); 3130b57cec5SDimitry Andric if (Seq.empty()) 3140b57cec5SDimitry Andric return; 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric OS << "\nCritical sequence based on the simulation:\n\n"; 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric const DependencyEdge &FirstEdge = *Seq[0]; 3190b57cec5SDimitry Andric unsigned FromIID = FirstEdge.FromIID % Source.size(); 3200b57cec5SDimitry Andric unsigned ToIID = FirstEdge.ToIID % Source.size(); 3210b57cec5SDimitry Andric bool IsLoopCarried = FromIID >= ToIID; 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric formatted_raw_ostream FOS(OS); 3240b57cec5SDimitry Andric FOS.PadToColumn(14); 3250b57cec5SDimitry Andric FOS << "Instruction"; 3260b57cec5SDimitry Andric FOS.PadToColumn(58); 3270b57cec5SDimitry Andric FOS << "Dependency Information"; 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric bool HasColors = FOS.has_colors(); 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric unsigned CurrentIID = 0; 3320b57cec5SDimitry Andric if (IsLoopCarried) { 3330b57cec5SDimitry Andric FOS << "\n +----< " << FromIID << "."; 3340b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors); 3350b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |"; 3360b57cec5SDimitry Andric } else { 3370b57cec5SDimitry Andric while (CurrentIID < FromIID) { 3380b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 3390b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 3400b57cec5SDimitry Andric CurrentIID++; 3410b57cec5SDimitry Andric } 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric FOS << "\n +----< " << CurrentIID << "."; 3440b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 3450b57cec5SDimitry Andric CurrentIID++; 3460b57cec5SDimitry Andric } 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric for (const DependencyEdge *&DE : Seq) { 3490b57cec5SDimitry Andric ToIID = DE->ToIID % Source.size(); 3500b57cec5SDimitry Andric unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric while (CurrentIID < LastIID) { 3530b57cec5SDimitry Andric FOS << "\n | " << CurrentIID << "."; 3540b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 3550b57cec5SDimitry Andric CurrentIID++; 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric if (CurrentIID == ToIID) { 3590b57cec5SDimitry Andric FOS << "\n +----> " << ToIID << "."; 3600b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 3610b57cec5SDimitry Andric } else { 3620b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |" 3630b57cec5SDimitry Andric << "\n +----> " << ToIID << "."; 3640b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors); 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric FOS.PadToColumn(58); 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric const DependencyEdge::Dependency &Dep = DE->Dep; 3690b57cec5SDimitry Andric if (HasColors) 3700b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric if (Dep.Type == DependencyEdge::DT_REGISTER) { 3730b57cec5SDimitry Andric FOS << "## REGISTER dependency: "; 3740b57cec5SDimitry Andric if (HasColors) 3750b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 3760b57cec5SDimitry Andric MCIP.printRegName(FOS, Dep.ResourceOrRegID); 3770b57cec5SDimitry Andric } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 3780b57cec5SDimitry Andric FOS << "## MEMORY dependency."; 3790b57cec5SDimitry Andric } else { 3800b57cec5SDimitry Andric assert(Dep.Type == DependencyEdge::DT_RESOURCE && 3810b57cec5SDimitry Andric "Unsupported dependency type!"); 3820b57cec5SDimitry Andric FOS << "## RESOURCE interference: "; 3830b57cec5SDimitry Andric if (HasColors) 3840b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 3850b57cec5SDimitry Andric FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 3860b57cec5SDimitry Andric if (HasColors) { 3870b57cec5SDimitry Andric FOS.resetColor(); 3880b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 3910b57cec5SDimitry Andric << "% ]"; 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric if (HasColors) 3940b57cec5SDimitry Andric FOS.resetColor(); 3950b57cec5SDimitry Andric ++CurrentIID; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric while (CurrentIID < Source.size()) { 3990b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 4000b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 4010b57cec5SDimitry Andric CurrentIID++; 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric FOS << '\n'; 4050b57cec5SDimitry Andric FOS.flush(); 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric #ifndef NDEBUG 4090b57cec5SDimitry Andric void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 4100b57cec5SDimitry Andric OS << "\nREG DEPS\n"; 4110b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4120b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4130b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 4140b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric OS << "\nMEM DEPS\n"; 4170b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4180b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4190b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 4200b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric OS << "\nRESOURCE DEPS\n"; 4230b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4240b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4250b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 4260b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4270b57cec5SDimitry Andric } 4280b57cec5SDimitry Andric #endif // NDEBUG 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric void DependencyGraph::addDependency(unsigned From, unsigned To, 4310b57cec5SDimitry Andric DependencyEdge::Dependency &&Dep) { 4320b57cec5SDimitry Andric DGNode &NodeFrom = Nodes[From]; 4330b57cec5SDimitry Andric DGNode &NodeTo = Nodes[To]; 4340b57cec5SDimitry Andric SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 4370b57cec5SDimitry Andric return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 4380b57cec5SDimitry Andric }); 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric if (It != Vec.end()) { 4410b57cec5SDimitry Andric It->Dep.Cost += Dep.Cost; 4420b57cec5SDimitry Andric It->Frequency++; 4430b57cec5SDimitry Andric return; 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric DependencyEdge DE = {Dep, From, To, 1}; 4470b57cec5SDimitry Andric Vec.emplace_back(DE); 4480b57cec5SDimitry Andric NodeTo.NumPredecessors++; 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 4520b57cec5SDimitry Andric MCInstPrinter &Printer, 4530b57cec5SDimitry Andric ArrayRef<MCInst> S, unsigned NumIter) 4540b57cec5SDimitry Andric : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), 4550b57cec5SDimitry Andric Source(S), Iterations(NumIter), TotalCycles(0), 4560b57cec5SDimitry Andric PressureIncreasedBecauseOfResources(false), 4570b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies(false), 4580b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies(false), 4590b57cec5SDimitry Andric SeenStallCycles(false), BPI() {} 4600b57cec5SDimitry Andric 4610b57cec5SDimitry Andric void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 4620b57cec5SDimitry Andric unsigned RegID, unsigned Cost) { 4630b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 4640b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 4650b57cec5SDimitry Andric if (IsLoopCarried) { 4660b57cec5SDimitry Andric DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 4670b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 4680b57cec5SDimitry Andric return; 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 4740b57cec5SDimitry Andric unsigned Cost) { 4750b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 4760b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 4770b57cec5SDimitry Andric if (IsLoopCarried) { 4780b57cec5SDimitry Andric DG.addMemoryDep(From, To + SourceSize, Cost); 4790b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 4800b57cec5SDimitry Andric return; 4810b57cec5SDimitry Andric } 4820b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 4860b57cec5SDimitry Andric uint64_t Mask, unsigned Cost) { 4870b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 4880b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 4890b57cec5SDimitry Andric if (IsLoopCarried) { 4900b57cec5SDimitry Andric DG.addResourceDep(From, To + SourceSize, Mask, Cost); 4910b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 4920b57cec5SDimitry Andric return; 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 4980b57cec5SDimitry Andric const unsigned IID = Event.IR.getSourceIndex(); 4990b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Dispatched) { 5000b57cec5SDimitry Andric Tracker.onInstructionDispatched(IID); 5010b57cec5SDimitry Andric return; 5020b57cec5SDimitry Andric } 5030b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Executed) { 5040b57cec5SDimitry Andric Tracker.onInstructionExecuted(IID); 5050b57cec5SDimitry Andric return; 5060b57cec5SDimitry Andric } 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric if (Event.Type != HWInstructionEvent::Issued) 5090b57cec5SDimitry Andric return; 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric const Instruction &IS = *Event.IR.getInstruction(); 5120b57cec5SDimitry Andric unsigned To = IID % Source.size(); 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 5150b57cec5SDimitry Andric uint64_t ResourceMask = IS.getCriticalResourceMask(); 5160b57cec5SDimitry Andric SmallVector<std::pair<unsigned, unsigned>, 4> Users; 5170b57cec5SDimitry Andric while (ResourceMask) { 5180b57cec5SDimitry Andric uint64_t Current = ResourceMask & (-ResourceMask); 5190b57cec5SDimitry Andric Tracker.getResourceUsers(Current, Users); 5200b57cec5SDimitry Andric for (const std::pair<unsigned, unsigned> &U : Users) 5210b57cec5SDimitry Andric addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 5220b57cec5SDimitry Andric Users.clear(); 5230b57cec5SDimitry Andric ResourceMask ^= Current; 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric const CriticalDependency &RegDep = IS.getCriticalRegDep(); 5270b57cec5SDimitry Andric if (RegDep.Cycles) { 5280b57cec5SDimitry Andric Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 5290b57cec5SDimitry Andric unsigned From = RegDep.IID % Source.size(); 5300b57cec5SDimitry Andric addRegisterDep(From, To, RegDep.RegID, Cycles); 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric const CriticalDependency &MemDep = IS.getCriticalMemDep(); 5340b57cec5SDimitry Andric if (MemDep.Cycles) { 5350b57cec5SDimitry Andric Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 5360b57cec5SDimitry Andric unsigned From = MemDep.IID % Source.size(); 5370b57cec5SDimitry Andric addMemoryDep(From, To, Cycles); 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric Tracker.handleInstructionIssuedEvent( 5410b57cec5SDimitry Andric static_cast<const HWInstructionIssuedEvent &>(Event)); 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric // Check if this is the last simulated instruction. 5440b57cec5SDimitry Andric if (IID == ((Iterations * Source.size()) - 1)) 545*8bcb0991SDimitry Andric DG.finalizeGraph(Iterations); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 5490b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 5500b57cec5SDimitry Andric "Unexpected invalid event!"); 5510b57cec5SDimitry Andric 5520b57cec5SDimitry Andric Tracker.handlePressureEvent(Event); 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric switch (Event.Reason) { 5550b57cec5SDimitry Andric default: 5560b57cec5SDimitry Andric break; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: 5590b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = true; 5600b57cec5SDimitry Andric break; 5610b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 5620b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = true; 5630b57cec5SDimitry Andric break; 5640b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 5650b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = true; 5660b57cec5SDimitry Andric break; 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric void BottleneckAnalysis::onCycleEnd() { 5710b57cec5SDimitry Andric ++TotalCycles; 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric bool PressureIncreasedBecauseOfDataDependencies = 5740b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies || 5750b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies; 5760b57cec5SDimitry Andric if (!PressureIncreasedBecauseOfResources && 5770b57cec5SDimitry Andric !PressureIncreasedBecauseOfDataDependencies) 5780b57cec5SDimitry Andric return; 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric ++BPI.PressureIncreaseCycles; 5810b57cec5SDimitry Andric if (PressureIncreasedBecauseOfRegisterDependencies) 5820b57cec5SDimitry Andric ++BPI.RegisterDependencyCycles; 5830b57cec5SDimitry Andric if (PressureIncreasedBecauseOfMemoryDependencies) 5840b57cec5SDimitry Andric ++BPI.MemoryDependencyCycles; 5850b57cec5SDimitry Andric if (PressureIncreasedBecauseOfDataDependencies) 5860b57cec5SDimitry Andric ++BPI.DataDependencyCycles; 5870b57cec5SDimitry Andric if (PressureIncreasedBecauseOfResources) 5880b57cec5SDimitry Andric ++BPI.ResourcePressureCycles; 5890b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = false; 5900b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = false; 5910b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = false; 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 5950b57cec5SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 5960b57cec5SDimitry Andric OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 5970b57cec5SDimitry Andric return; 5980b57cec5SDimitry Andric } 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric double PressurePerCycle = 6010b57cec5SDimitry Andric (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 6020b57cec5SDimitry Andric double ResourcePressurePerCycle = 6030b57cec5SDimitry Andric (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 6040b57cec5SDimitry Andric double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 6050b57cec5SDimitry Andric double RegDepPressurePerCycle = 6060b57cec5SDimitry Andric (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 6070b57cec5SDimitry Andric double MemDepPressurePerCycle = 6080b57cec5SDimitry Andric (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric OS << "\n\nCycles with backend pressure increase [ " 6110b57cec5SDimitry Andric << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric OS << "\nThroughput Bottlenecks: " 6140b57cec5SDimitry Andric << "\n Resource Pressure [ " 6150b57cec5SDimitry Andric << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 6160b57cec5SDimitry Andric << "% ]"; 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric if (BPI.PressureIncreaseCycles) { 6190b57cec5SDimitry Andric ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 6200b57cec5SDimitry Andric const MCSchedModel &SM = STI.getSchedModel(); 6210b57cec5SDimitry Andric for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 6220b57cec5SDimitry Andric unsigned ResourceCycles = Distribution[I]; 6230b57cec5SDimitry Andric if (ResourceCycles) { 6240b57cec5SDimitry Andric double Frequency = (double)ResourceCycles * 100 / TotalCycles; 6250b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 6260b57cec5SDimitry Andric OS << "\n - " << PRDesc.Name << " [ " 6270b57cec5SDimitry Andric << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric } 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric OS << "\n Data Dependencies: [ " 6330b57cec5SDimitry Andric << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 6340b57cec5SDimitry Andric OS << "\n - Register Dependencies [ " 6350b57cec5SDimitry Andric << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 6360b57cec5SDimitry Andric << "% ]"; 6370b57cec5SDimitry Andric OS << "\n - Memory Dependencies [ " 6380b57cec5SDimitry Andric << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 6390b57cec5SDimitry Andric << "% ]\n"; 6400b57cec5SDimitry Andric } 6410b57cec5SDimitry Andric 6420b57cec5SDimitry Andric void BottleneckAnalysis::printView(raw_ostream &OS) const { 6430b57cec5SDimitry Andric std::string Buffer; 6440b57cec5SDimitry Andric raw_string_ostream TempStream(Buffer); 6450b57cec5SDimitry Andric printBottleneckHints(TempStream); 6460b57cec5SDimitry Andric TempStream.flush(); 6470b57cec5SDimitry Andric OS << Buffer; 6480b57cec5SDimitry Andric printCriticalSequence(OS); 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric } // namespace mca. 6520b57cec5SDimitry Andric } // namespace llvm 653