10b57cec5SDimitry Andric //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// 100b57cec5SDimitry Andric /// This file implements the functionalities used by the BottleneckAnalysis 110b57cec5SDimitry Andric /// to report bottleneck info. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "Views/BottleneckAnalysis.h" 160b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 170b57cec5SDimitry Andric #include "llvm/MCA/Support.h" 180b57cec5SDimitry Andric #include "llvm/Support/Format.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace llvm { 210b57cec5SDimitry Andric namespace mca { 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #define DEBUG_TYPE "llvm-mca" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric PressureTracker::PressureTracker(const MCSchedModel &Model) 260b57cec5SDimitry Andric : SM(Model), 270b57cec5SDimitry Andric ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 280b57cec5SDimitry Andric ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 290b57cec5SDimitry Andric ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 300b57cec5SDimitry Andric ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 310b57cec5SDimitry Andric computeProcResourceMasks(SM, ProcResID2Mask); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric // Ignore the invalid resource at index zero. 340b57cec5SDimitry Andric unsigned NextResourceUsersIdx = 0; 350b57cec5SDimitry Andric for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 360b57cec5SDimitry Andric const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 370b57cec5SDimitry Andric ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 380b57cec5SDimitry Andric NextResourceUsersIdx += ProcResource.NumUnits; 390b57cec5SDimitry Andric uint64_t ResourceMask = ProcResID2Mask[I]; 400b57cec5SDimitry Andric ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric ResourceUsers.resize(NextResourceUsersIdx); 440b57cec5SDimitry Andric std::fill(ResourceUsers.begin(), ResourceUsers.end(), 450b57cec5SDimitry Andric std::make_pair<unsigned, unsigned>(~0U, 0U)); 460b57cec5SDimitry Andric } 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric void PressureTracker::getResourceUsers(uint64_t ResourceMask, 490b57cec5SDimitry Andric SmallVectorImpl<User> &Users) const { 500b57cec5SDimitry Andric unsigned Index = getResourceStateIndex(ResourceMask); 510b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[Index]; 520b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 530b57cec5SDimitry Andric for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 540b57cec5SDimitry Andric const User U = getResourceUser(ProcResID, I); 550b57cec5SDimitry Andric if (U.second && IPI.find(U.first) != IPI.end()) 560b57cec5SDimitry Andric Users.emplace_back(U); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric void PressureTracker::onInstructionDispatched(unsigned IID) { 610b57cec5SDimitry Andric IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric void PressureTracker::handleInstructionIssuedEvent( 670b57cec5SDimitry Andric const HWInstructionIssuedEvent &Event) { 680b57cec5SDimitry Andric unsigned IID = Event.IR.getSourceIndex(); 690b57cec5SDimitry Andric for (const ResourceUse &Use : Event.UsedResources) { 700b57cec5SDimitry Andric const ResourceRef &RR = Use.first; 710b57cec5SDimitry Andric unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 720b57cec5SDimitry Andric Index += countTrailingZeros(RR.second); 730b57cec5SDimitry Andric ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric void PressureTracker::updateResourcePressureDistribution( 780b57cec5SDimitry Andric uint64_t CumulativeMask) { 790b57cec5SDimitry Andric while (CumulativeMask) { 800b57cec5SDimitry Andric uint64_t Current = CumulativeMask & (-CumulativeMask); 810b57cec5SDimitry Andric unsigned ResIdx = getResourceStateIndex(Current); 820b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 830b57cec5SDimitry Andric uint64_t Mask = ProcResID2Mask[ProcResID]; 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric if (Mask == Current) { 860b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 870b57cec5SDimitry Andric CumulativeMask ^= Current; 880b57cec5SDimitry Andric continue; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric Mask ^= Current; 920b57cec5SDimitry Andric while (Mask) { 930b57cec5SDimitry Andric uint64_t SubUnit = Mask & (-Mask); 940b57cec5SDimitry Andric ResIdx = getResourceStateIndex(SubUnit); 950b57cec5SDimitry Andric ProcResID = ResIdx2ProcResID[ResIdx]; 960b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 970b57cec5SDimitry Andric Mask ^= SubUnit; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric CumulativeMask ^= Current; 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 1050b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 1060b57cec5SDimitry Andric "Unexpected invalid event!"); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric switch (Event.Reason) { 1090b57cec5SDimitry Andric default: 1100b57cec5SDimitry Andric break; 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: { 1130b57cec5SDimitry Andric const uint64_t ResourceMask = Event.ResourceMask; 1140b57cec5SDimitry Andric updateResourcePressureDistribution(Event.ResourceMask); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1170b57cec5SDimitry Andric const Instruction &IS = *IR.getInstruction(); 1180b57cec5SDimitry Andric unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 1190b57cec5SDimitry Andric if (!BusyResources) 1200b57cec5SDimitry Andric continue; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1230b57cec5SDimitry Andric IPI[IID].ResourcePressureCycles++; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric break; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 1290b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1300b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1310b57cec5SDimitry Andric IPI[IID].RegisterPressureCycles++; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric break; 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 1360b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1370b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1380b57cec5SDimitry Andric IPI[IID].MemoryPressureCycles++; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric #ifndef NDEBUG 1440b57cec5SDimitry Andric void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 1450b57cec5SDimitry Andric const DependencyEdge &DepEdge, 1460b57cec5SDimitry Andric MCInstPrinter &MCIP) const { 1470b57cec5SDimitry Andric unsigned FromIID = DepEdge.FromIID; 1480b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 1490b57cec5SDimitry Andric assert(FromIID < ToIID && "Graph should be acyclic!"); 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric const DependencyEdge::Dependency &DE = DepEdge.Dep; 1520b57cec5SDimitry Andric assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 1550b57cec5SDimitry Andric if (DE.Type == DependencyEdge::DT_REGISTER) { 1560b57cec5SDimitry Andric OS << " - REGISTER: "; 1570b57cec5SDimitry Andric MCIP.printRegName(OS, DE.ResourceOrRegID); 1580b57cec5SDimitry Andric } else if (DE.Type == DependencyEdge::DT_MEMORY) { 1590b57cec5SDimitry Andric OS << " - MEMORY"; 1600b57cec5SDimitry Andric } else { 1610b57cec5SDimitry Andric assert(DE.Type == DependencyEdge::DT_RESOURCE && 1620b57cec5SDimitry Andric "Unsupported dependency type!"); 1630b57cec5SDimitry Andric OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 1640b57cec5SDimitry Andric } 1658bcb0991SDimitry Andric OS << " - COST: " << DE.Cost << '\n'; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric #endif // NDEBUG 1680b57cec5SDimitry Andric 1698bcb0991SDimitry Andric void DependencyGraph::pruneEdges(unsigned Iterations) { 1708bcb0991SDimitry Andric for (DGNode &N : Nodes) { 1718bcb0991SDimitry Andric unsigned NumPruned = 0; 1728bcb0991SDimitry Andric const unsigned Size = N.OutgoingEdges.size(); 1738bcb0991SDimitry Andric // Use a cut-off threshold to prune edges with a low frequency. 1748bcb0991SDimitry Andric for (unsigned I = 0, E = Size; I < E; ++I) { 1758bcb0991SDimitry Andric DependencyEdge &Edge = N.OutgoingEdges[I]; 1768bcb0991SDimitry Andric if (Edge.Frequency == Iterations) 1778bcb0991SDimitry Andric continue; 1788bcb0991SDimitry Andric double Factor = (double)Edge.Frequency / Iterations; 1798bcb0991SDimitry Andric if (0.10 < Factor) 1808bcb0991SDimitry Andric continue; 1818bcb0991SDimitry Andric Nodes[Edge.ToIID].NumPredecessors--; 1828bcb0991SDimitry Andric std::swap(Edge, N.OutgoingEdges[E - 1]); 1838bcb0991SDimitry Andric --E; 1848bcb0991SDimitry Andric ++NumPruned; 1858bcb0991SDimitry Andric } 1868bcb0991SDimitry Andric 1878bcb0991SDimitry Andric if (NumPruned) 1888bcb0991SDimitry Andric N.OutgoingEdges.resize(Size - NumPruned); 1898bcb0991SDimitry Andric } 1908bcb0991SDimitry Andric } 1918bcb0991SDimitry Andric 1920b57cec5SDimitry Andric void DependencyGraph::initializeRootSet( 1930b57cec5SDimitry Andric SmallVectorImpl<unsigned> &RootSet) const { 1940b57cec5SDimitry Andric for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 1950b57cec5SDimitry Andric const DGNode &N = Nodes[I]; 1960b57cec5SDimitry Andric if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 1970b57cec5SDimitry Andric RootSet.emplace_back(I); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric 201fe6060f1SDimitry Andric void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, 202fe6060f1SDimitry Andric unsigned Iterations) { 2030b57cec5SDimitry Andric SmallVector<unsigned, 8> ToVisit; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric // A critical sequence is computed as the longest path from a node of the 2060b57cec5SDimitry Andric // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 2070b57cec5SDimitry Andric // composed of nodes with at least one successor, and no predecessors. 2080b57cec5SDimitry Andric // 2090b57cec5SDimitry Andric // Each node of the graph starts with an initial default cost of zero. The 2100b57cec5SDimitry Andric // cost of a node is a measure of criticality: the higher the cost, the bigger 2110b57cec5SDimitry Andric // is the performance impact. 2128bcb0991SDimitry Andric // For register and memory dependencies, the cost is a function of the write 2138bcb0991SDimitry Andric // latency as well as the actual delay (in cycles) caused to users. 2148bcb0991SDimitry Andric // For processor resource dependencies, the cost is a function of the resource 2158bcb0991SDimitry Andric // pressure. Resource interferences with low frequency values are ignored. 2160b57cec5SDimitry Andric // 2170b57cec5SDimitry Andric // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 2180b57cec5SDimitry Andric // the inner loop selects (i.e. visits) a node N from a set of `unvisited 2190b57cec5SDimitry Andric // nodes`, and then propagates the cost of N to all its neighbors. 2200b57cec5SDimitry Andric // 2210b57cec5SDimitry Andric // The `unvisited nodes` set initially contains all the nodes from the 2220b57cec5SDimitry Andric // RootSet. A node N is added to the `unvisited nodes` if all its 2230b57cec5SDimitry Andric // predecessors have been visited already. 2240b57cec5SDimitry Andric // 2250b57cec5SDimitry Andric // For simplicity, every node tracks the number of unvisited incoming edges in 2260b57cec5SDimitry Andric // field `NumVisitedPredecessors`. When the value of that field drops to 2270b57cec5SDimitry Andric // zero, then the corresponding node is added to a `ToVisit` set. 2280b57cec5SDimitry Andric // 2290b57cec5SDimitry Andric // At the end of every iteration of the outer loop, set `ToVisit` becomes our 2300b57cec5SDimitry Andric // new `unvisited nodes` set. 2310b57cec5SDimitry Andric // 2320b57cec5SDimitry Andric // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 2330b57cec5SDimitry Andric // is empty. This algorithm works under the assumption that the graph is 2340b57cec5SDimitry Andric // acyclic. 2350b57cec5SDimitry Andric do { 2360b57cec5SDimitry Andric for (unsigned IID : RootSet) { 2370b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 2380b57cec5SDimitry Andric for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 2390b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 2400b57cec5SDimitry Andric DGNode &To = Nodes[ToIID]; 2410b57cec5SDimitry Andric uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 2420b57cec5SDimitry Andric // Check if this is the most expensive incoming edge seen so far. In 2430b57cec5SDimitry Andric // case, update the total cost of the destination node (ToIID), as well 2440b57cec5SDimitry Andric // its field `CriticalPredecessor`. 2450b57cec5SDimitry Andric if (Cost > To.Cost) { 2460b57cec5SDimitry Andric To.CriticalPredecessor = DepEdge; 2470b57cec5SDimitry Andric To.Cost = Cost; 2480b57cec5SDimitry Andric To.Depth = N.Depth + 1; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric To.NumVisitedPredecessors++; 2510b57cec5SDimitry Andric if (To.NumVisitedPredecessors == To.NumPredecessors) 2520b57cec5SDimitry Andric ToVisit.emplace_back(ToIID); 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric std::swap(RootSet, ToVisit); 2570b57cec5SDimitry Andric ToVisit.clear(); 2580b57cec5SDimitry Andric } while (!RootSet.empty()); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric void DependencyGraph::getCriticalSequence( 2620b57cec5SDimitry Andric SmallVectorImpl<const DependencyEdge *> &Seq) const { 2630b57cec5SDimitry Andric // At this stage, nodes of the graph have been already visited, and costs have 2640b57cec5SDimitry Andric // been propagated through the edges (see method `propagateThroughEdges()`). 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric // Identify the node N with the highest cost in the graph. By construction, 2670b57cec5SDimitry Andric // that node is the last instruction of our critical sequence. 2680b57cec5SDimitry Andric // Field N.Depth would tell us the total length of the sequence. 2690b57cec5SDimitry Andric // 270fe6060f1SDimitry Andric // To obtain the sequence of critical edges, we simply follow the chain of 271fe6060f1SDimitry Andric // critical predecessors starting from node N (field 272fe6060f1SDimitry Andric // DGNode::CriticalPredecessor). 2730b57cec5SDimitry Andric const auto It = std::max_element( 2740b57cec5SDimitry Andric Nodes.begin(), Nodes.end(), 2750b57cec5SDimitry Andric [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 2760b57cec5SDimitry Andric unsigned IID = std::distance(Nodes.begin(), It); 2770b57cec5SDimitry Andric Seq.resize(Nodes[IID].Depth); 278*0eae32dcSDimitry Andric for (const DependencyEdge *&DE : llvm::reverse(Seq)) { 2790b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 280*0eae32dcSDimitry Andric DE = &N.CriticalPredecessor; 2810b57cec5SDimitry Andric IID = N.CriticalPredecessor.FromIID; 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric 285e8d8bef9SDimitry Andric void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, 2860b57cec5SDimitry Andric const MCInst &MCI, 287e8d8bef9SDimitry Andric bool UseDifferentColor) const { 2880b57cec5SDimitry Andric FOS.PadToColumn(14); 2890b57cec5SDimitry Andric if (UseDifferentColor) 2900b57cec5SDimitry Andric FOS.changeColor(raw_ostream::CYAN, true, false); 291e8d8bef9SDimitry Andric FOS << printInstructionString(MCI); 2920b57cec5SDimitry Andric if (UseDifferentColor) 2930b57cec5SDimitry Andric FOS.resetColor(); 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 2978bcb0991SDimitry Andric // Early exit if no bottlenecks were found during the simulation. 2988bcb0991SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 2998bcb0991SDimitry Andric return; 3008bcb0991SDimitry Andric 3010b57cec5SDimitry Andric SmallVector<const DependencyEdge *, 16> Seq; 3020b57cec5SDimitry Andric DG.getCriticalSequence(Seq); 3030b57cec5SDimitry Andric if (Seq.empty()) 3040b57cec5SDimitry Andric return; 3050b57cec5SDimitry Andric 3060b57cec5SDimitry Andric OS << "\nCritical sequence based on the simulation:\n\n"; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric const DependencyEdge &FirstEdge = *Seq[0]; 309e8d8bef9SDimitry Andric ArrayRef<llvm::MCInst> Source = getSource(); 3100b57cec5SDimitry Andric unsigned FromIID = FirstEdge.FromIID % Source.size(); 3110b57cec5SDimitry Andric unsigned ToIID = FirstEdge.ToIID % Source.size(); 3120b57cec5SDimitry Andric bool IsLoopCarried = FromIID >= ToIID; 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric formatted_raw_ostream FOS(OS); 3150b57cec5SDimitry Andric FOS.PadToColumn(14); 3160b57cec5SDimitry Andric FOS << "Instruction"; 3170b57cec5SDimitry Andric FOS.PadToColumn(58); 3180b57cec5SDimitry Andric FOS << "Dependency Information"; 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric bool HasColors = FOS.has_colors(); 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric unsigned CurrentIID = 0; 3230b57cec5SDimitry Andric if (IsLoopCarried) { 3240b57cec5SDimitry Andric FOS << "\n +----< " << FromIID << "."; 325e8d8bef9SDimitry Andric printInstruction(FOS, Source[FromIID], HasColors); 3260b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |"; 3270b57cec5SDimitry Andric } else { 3280b57cec5SDimitry Andric while (CurrentIID < FromIID) { 3290b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 330e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3310b57cec5SDimitry Andric CurrentIID++; 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric FOS << "\n +----< " << CurrentIID << "."; 335e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID], HasColors); 3360b57cec5SDimitry Andric CurrentIID++; 3370b57cec5SDimitry Andric } 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric for (const DependencyEdge *&DE : Seq) { 3400b57cec5SDimitry Andric ToIID = DE->ToIID % Source.size(); 3410b57cec5SDimitry Andric unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 3420b57cec5SDimitry Andric 3430b57cec5SDimitry Andric while (CurrentIID < LastIID) { 3440b57cec5SDimitry Andric FOS << "\n | " << CurrentIID << "."; 345e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3460b57cec5SDimitry Andric CurrentIID++; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andric if (CurrentIID == ToIID) { 3500b57cec5SDimitry Andric FOS << "\n +----> " << ToIID << "."; 351e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID], HasColors); 3520b57cec5SDimitry Andric } else { 3530b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |" 3540b57cec5SDimitry Andric << "\n +----> " << ToIID << "."; 355e8d8bef9SDimitry Andric printInstruction(FOS, Source[ToIID], HasColors); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric FOS.PadToColumn(58); 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric const DependencyEdge::Dependency &Dep = DE->Dep; 3600b57cec5SDimitry Andric if (HasColors) 3610b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3620b57cec5SDimitry Andric 3630b57cec5SDimitry Andric if (Dep.Type == DependencyEdge::DT_REGISTER) { 3640b57cec5SDimitry Andric FOS << "## REGISTER dependency: "; 3650b57cec5SDimitry Andric if (HasColors) 3660b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 367e8d8bef9SDimitry Andric getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID); 3680b57cec5SDimitry Andric } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 3690b57cec5SDimitry Andric FOS << "## MEMORY dependency."; 3700b57cec5SDimitry Andric } else { 3710b57cec5SDimitry Andric assert(Dep.Type == DependencyEdge::DT_RESOURCE && 3720b57cec5SDimitry Andric "Unsupported dependency type!"); 3730b57cec5SDimitry Andric FOS << "## RESOURCE interference: "; 3740b57cec5SDimitry Andric if (HasColors) 3750b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 3760b57cec5SDimitry Andric FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 3770b57cec5SDimitry Andric if (HasColors) { 3780b57cec5SDimitry Andric FOS.resetColor(); 3790b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3800b57cec5SDimitry Andric } 3810b57cec5SDimitry Andric FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 3820b57cec5SDimitry Andric << "% ]"; 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric if (HasColors) 3850b57cec5SDimitry Andric FOS.resetColor(); 3860b57cec5SDimitry Andric ++CurrentIID; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric while (CurrentIID < Source.size()) { 3900b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 391e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3920b57cec5SDimitry Andric CurrentIID++; 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric FOS << '\n'; 3960b57cec5SDimitry Andric FOS.flush(); 3970b57cec5SDimitry Andric } 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric #ifndef NDEBUG 4000b57cec5SDimitry Andric void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 4010b57cec5SDimitry Andric OS << "\nREG DEPS\n"; 4020b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4030b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4040b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 4050b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric OS << "\nMEM DEPS\n"; 4080b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4090b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4100b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 4110b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric OS << "\nRESOURCE DEPS\n"; 4140b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4150b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4160b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 4170b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric #endif // NDEBUG 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric void DependencyGraph::addDependency(unsigned From, unsigned To, 4220b57cec5SDimitry Andric DependencyEdge::Dependency &&Dep) { 4230b57cec5SDimitry Andric DGNode &NodeFrom = Nodes[From]; 4240b57cec5SDimitry Andric DGNode &NodeTo = Nodes[To]; 4250b57cec5SDimitry Andric SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 4280b57cec5SDimitry Andric return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 4290b57cec5SDimitry Andric }); 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric if (It != Vec.end()) { 4320b57cec5SDimitry Andric It->Dep.Cost += Dep.Cost; 4330b57cec5SDimitry Andric It->Frequency++; 4340b57cec5SDimitry Andric return; 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric DependencyEdge DE = {Dep, From, To, 1}; 4380b57cec5SDimitry Andric Vec.emplace_back(DE); 4390b57cec5SDimitry Andric NodeTo.NumPredecessors++; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 4430b57cec5SDimitry Andric MCInstPrinter &Printer, 4440b57cec5SDimitry Andric ArrayRef<MCInst> S, unsigned NumIter) 445e8d8bef9SDimitry Andric : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), 446e8d8bef9SDimitry Andric DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), 4470b57cec5SDimitry Andric PressureIncreasedBecauseOfResources(false), 4480b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies(false), 4490b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies(false), 4500b57cec5SDimitry Andric SeenStallCycles(false), BPI() {} 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 4530b57cec5SDimitry Andric unsigned RegID, unsigned Cost) { 4540b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 455e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4560b57cec5SDimitry Andric if (IsLoopCarried) { 4570b57cec5SDimitry Andric DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 4580b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 4590b57cec5SDimitry Andric return; 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 4620b57cec5SDimitry Andric } 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 4650b57cec5SDimitry Andric unsigned Cost) { 4660b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 467e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4680b57cec5SDimitry Andric if (IsLoopCarried) { 4690b57cec5SDimitry Andric DG.addMemoryDep(From, To + SourceSize, Cost); 4700b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 4710b57cec5SDimitry Andric return; 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 4770b57cec5SDimitry Andric uint64_t Mask, unsigned Cost) { 4780b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 479e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4800b57cec5SDimitry Andric if (IsLoopCarried) { 4810b57cec5SDimitry Andric DG.addResourceDep(From, To + SourceSize, Mask, Cost); 4820b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 4830b57cec5SDimitry Andric return; 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 4860b57cec5SDimitry Andric } 4870b57cec5SDimitry Andric 4880b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 4890b57cec5SDimitry Andric const unsigned IID = Event.IR.getSourceIndex(); 4900b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Dispatched) { 4910b57cec5SDimitry Andric Tracker.onInstructionDispatched(IID); 4920b57cec5SDimitry Andric return; 4930b57cec5SDimitry Andric } 4940b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Executed) { 4950b57cec5SDimitry Andric Tracker.onInstructionExecuted(IID); 4960b57cec5SDimitry Andric return; 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric if (Event.Type != HWInstructionEvent::Issued) 5000b57cec5SDimitry Andric return; 5010b57cec5SDimitry Andric 502e8d8bef9SDimitry Andric ArrayRef<llvm::MCInst> Source = getSource(); 5030b57cec5SDimitry Andric const Instruction &IS = *Event.IR.getInstruction(); 5040b57cec5SDimitry Andric unsigned To = IID % Source.size(); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 5070b57cec5SDimitry Andric uint64_t ResourceMask = IS.getCriticalResourceMask(); 5080b57cec5SDimitry Andric SmallVector<std::pair<unsigned, unsigned>, 4> Users; 5090b57cec5SDimitry Andric while (ResourceMask) { 5100b57cec5SDimitry Andric uint64_t Current = ResourceMask & (-ResourceMask); 5110b57cec5SDimitry Andric Tracker.getResourceUsers(Current, Users); 5120b57cec5SDimitry Andric for (const std::pair<unsigned, unsigned> &U : Users) 5130b57cec5SDimitry Andric addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 5140b57cec5SDimitry Andric Users.clear(); 5150b57cec5SDimitry Andric ResourceMask ^= Current; 5160b57cec5SDimitry Andric } 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric const CriticalDependency &RegDep = IS.getCriticalRegDep(); 5190b57cec5SDimitry Andric if (RegDep.Cycles) { 5200b57cec5SDimitry Andric Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 5210b57cec5SDimitry Andric unsigned From = RegDep.IID % Source.size(); 5220b57cec5SDimitry Andric addRegisterDep(From, To, RegDep.RegID, Cycles); 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric const CriticalDependency &MemDep = IS.getCriticalMemDep(); 5260b57cec5SDimitry Andric if (MemDep.Cycles) { 5270b57cec5SDimitry Andric Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 5280b57cec5SDimitry Andric unsigned From = MemDep.IID % Source.size(); 5290b57cec5SDimitry Andric addMemoryDep(From, To, Cycles); 5300b57cec5SDimitry Andric } 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric Tracker.handleInstructionIssuedEvent( 5330b57cec5SDimitry Andric static_cast<const HWInstructionIssuedEvent &>(Event)); 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric // Check if this is the last simulated instruction. 5360b57cec5SDimitry Andric if (IID == ((Iterations * Source.size()) - 1)) 5378bcb0991SDimitry Andric DG.finalizeGraph(Iterations); 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 5410b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 5420b57cec5SDimitry Andric "Unexpected invalid event!"); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric Tracker.handlePressureEvent(Event); 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric switch (Event.Reason) { 5470b57cec5SDimitry Andric default: 5480b57cec5SDimitry Andric break; 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: 5510b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = true; 5520b57cec5SDimitry Andric break; 5530b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 5540b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = true; 5550b57cec5SDimitry Andric break; 5560b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 5570b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = true; 5580b57cec5SDimitry Andric break; 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric 5620b57cec5SDimitry Andric void BottleneckAnalysis::onCycleEnd() { 5630b57cec5SDimitry Andric ++TotalCycles; 5640b57cec5SDimitry Andric 5650b57cec5SDimitry Andric bool PressureIncreasedBecauseOfDataDependencies = 5660b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies || 5670b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies; 5680b57cec5SDimitry Andric if (!PressureIncreasedBecauseOfResources && 5690b57cec5SDimitry Andric !PressureIncreasedBecauseOfDataDependencies) 5700b57cec5SDimitry Andric return; 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric ++BPI.PressureIncreaseCycles; 5730b57cec5SDimitry Andric if (PressureIncreasedBecauseOfRegisterDependencies) 5740b57cec5SDimitry Andric ++BPI.RegisterDependencyCycles; 5750b57cec5SDimitry Andric if (PressureIncreasedBecauseOfMemoryDependencies) 5760b57cec5SDimitry Andric ++BPI.MemoryDependencyCycles; 5770b57cec5SDimitry Andric if (PressureIncreasedBecauseOfDataDependencies) 5780b57cec5SDimitry Andric ++BPI.DataDependencyCycles; 5790b57cec5SDimitry Andric if (PressureIncreasedBecauseOfResources) 5800b57cec5SDimitry Andric ++BPI.ResourcePressureCycles; 5810b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = false; 5820b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = false; 5830b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = false; 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 5870b57cec5SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 5880b57cec5SDimitry Andric OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 5890b57cec5SDimitry Andric return; 5900b57cec5SDimitry Andric } 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric double PressurePerCycle = 5930b57cec5SDimitry Andric (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 5940b57cec5SDimitry Andric double ResourcePressurePerCycle = 5950b57cec5SDimitry Andric (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 5960b57cec5SDimitry Andric double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 5970b57cec5SDimitry Andric double RegDepPressurePerCycle = 5980b57cec5SDimitry Andric (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 5990b57cec5SDimitry Andric double MemDepPressurePerCycle = 6000b57cec5SDimitry Andric (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 6010b57cec5SDimitry Andric 6020b57cec5SDimitry Andric OS << "\n\nCycles with backend pressure increase [ " 6030b57cec5SDimitry Andric << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric OS << "\nThroughput Bottlenecks: " 6060b57cec5SDimitry Andric << "\n Resource Pressure [ " 6070b57cec5SDimitry Andric << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 6080b57cec5SDimitry Andric << "% ]"; 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric if (BPI.PressureIncreaseCycles) { 6110b57cec5SDimitry Andric ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 612e8d8bef9SDimitry Andric const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); 6130b57cec5SDimitry Andric for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 6140b57cec5SDimitry Andric unsigned ResourceCycles = Distribution[I]; 6150b57cec5SDimitry Andric if (ResourceCycles) { 6160b57cec5SDimitry Andric double Frequency = (double)ResourceCycles * 100 / TotalCycles; 6170b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 6180b57cec5SDimitry Andric OS << "\n - " << PRDesc.Name << " [ " 6190b57cec5SDimitry Andric << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric OS << "\n Data Dependencies: [ " 6250b57cec5SDimitry Andric << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 6260b57cec5SDimitry Andric OS << "\n - Register Dependencies [ " 6270b57cec5SDimitry Andric << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 6280b57cec5SDimitry Andric << "% ]"; 6290b57cec5SDimitry Andric OS << "\n - Memory Dependencies [ " 6300b57cec5SDimitry Andric << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 6310b57cec5SDimitry Andric << "% ]\n"; 6320b57cec5SDimitry Andric } 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric void BottleneckAnalysis::printView(raw_ostream &OS) const { 6350b57cec5SDimitry Andric std::string Buffer; 6360b57cec5SDimitry Andric raw_string_ostream TempStream(Buffer); 6370b57cec5SDimitry Andric printBottleneckHints(TempStream); 6380b57cec5SDimitry Andric TempStream.flush(); 6390b57cec5SDimitry Andric OS << Buffer; 6400b57cec5SDimitry Andric printCriticalSequence(OS); 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric } // namespace mca. 6440b57cec5SDimitry Andric } // namespace llvm 645