1*0b57cec5SDimitry Andric //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric /// \file 9*0b57cec5SDimitry Andric /// 10*0b57cec5SDimitry Andric /// This file implements the functionalities used by the BottleneckAnalysis 11*0b57cec5SDimitry Andric /// to report bottleneck info. 12*0b57cec5SDimitry Andric /// 13*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 14*0b57cec5SDimitry Andric 15*0b57cec5SDimitry Andric #include "Views/BottleneckAnalysis.h" 16*0b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 17*0b57cec5SDimitry Andric #include "llvm/MCA/Support.h" 18*0b57cec5SDimitry Andric #include "llvm/Support/Format.h" 19*0b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h" 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric namespace llvm { 22*0b57cec5SDimitry Andric namespace mca { 23*0b57cec5SDimitry Andric 24*0b57cec5SDimitry Andric #define DEBUG_TYPE "llvm-mca" 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric PressureTracker::PressureTracker(const MCSchedModel &Model) 27*0b57cec5SDimitry Andric : SM(Model), 28*0b57cec5SDimitry Andric ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 29*0b57cec5SDimitry Andric ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 30*0b57cec5SDimitry Andric ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 31*0b57cec5SDimitry Andric ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 32*0b57cec5SDimitry Andric computeProcResourceMasks(SM, ProcResID2Mask); 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric // Ignore the invalid resource at index zero. 35*0b57cec5SDimitry Andric unsigned NextResourceUsersIdx = 0; 36*0b57cec5SDimitry Andric for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 37*0b57cec5SDimitry Andric const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 38*0b57cec5SDimitry Andric ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 39*0b57cec5SDimitry Andric NextResourceUsersIdx += ProcResource.NumUnits; 40*0b57cec5SDimitry Andric uint64_t ResourceMask = ProcResID2Mask[I]; 41*0b57cec5SDimitry Andric ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 42*0b57cec5SDimitry Andric } 43*0b57cec5SDimitry Andric 44*0b57cec5SDimitry Andric ResourceUsers.resize(NextResourceUsersIdx); 45*0b57cec5SDimitry Andric std::fill(ResourceUsers.begin(), ResourceUsers.end(), 46*0b57cec5SDimitry Andric std::make_pair<unsigned, unsigned>(~0U, 0U)); 47*0b57cec5SDimitry Andric } 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric void PressureTracker::getResourceUsers(uint64_t ResourceMask, 50*0b57cec5SDimitry Andric SmallVectorImpl<User> &Users) const { 51*0b57cec5SDimitry Andric unsigned Index = getResourceStateIndex(ResourceMask); 52*0b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[Index]; 53*0b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 54*0b57cec5SDimitry Andric for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 55*0b57cec5SDimitry Andric const User U = getResourceUser(ProcResID, I); 56*0b57cec5SDimitry Andric if (U.second && IPI.find(U.first) != IPI.end()) 57*0b57cec5SDimitry Andric Users.emplace_back(U); 58*0b57cec5SDimitry Andric } 59*0b57cec5SDimitry Andric } 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry Andric void PressureTracker::onInstructionDispatched(unsigned IID) { 62*0b57cec5SDimitry Andric IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 63*0b57cec5SDimitry Andric } 64*0b57cec5SDimitry Andric 65*0b57cec5SDimitry Andric void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 66*0b57cec5SDimitry Andric 67*0b57cec5SDimitry Andric void PressureTracker::handleInstructionIssuedEvent( 68*0b57cec5SDimitry Andric const HWInstructionIssuedEvent &Event) { 69*0b57cec5SDimitry Andric unsigned IID = Event.IR.getSourceIndex(); 70*0b57cec5SDimitry Andric using ResourceRef = HWInstructionIssuedEvent::ResourceRef; 71*0b57cec5SDimitry Andric using ResourceUse = std::pair<ResourceRef, ResourceCycles>; 72*0b57cec5SDimitry Andric for (const ResourceUse &Use : Event.UsedResources) { 73*0b57cec5SDimitry Andric const ResourceRef &RR = Use.first; 74*0b57cec5SDimitry Andric unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 75*0b57cec5SDimitry Andric Index += countTrailingZeros(RR.second); 76*0b57cec5SDimitry Andric ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 77*0b57cec5SDimitry Andric } 78*0b57cec5SDimitry Andric } 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric void PressureTracker::updateResourcePressureDistribution( 81*0b57cec5SDimitry Andric uint64_t CumulativeMask) { 82*0b57cec5SDimitry Andric while (CumulativeMask) { 83*0b57cec5SDimitry Andric uint64_t Current = CumulativeMask & (-CumulativeMask); 84*0b57cec5SDimitry Andric unsigned ResIdx = getResourceStateIndex(Current); 85*0b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 86*0b57cec5SDimitry Andric uint64_t Mask = ProcResID2Mask[ProcResID]; 87*0b57cec5SDimitry Andric 88*0b57cec5SDimitry Andric if (Mask == Current) { 89*0b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 90*0b57cec5SDimitry Andric CumulativeMask ^= Current; 91*0b57cec5SDimitry Andric continue; 92*0b57cec5SDimitry Andric } 93*0b57cec5SDimitry Andric 94*0b57cec5SDimitry Andric Mask ^= Current; 95*0b57cec5SDimitry Andric while (Mask) { 96*0b57cec5SDimitry Andric uint64_t SubUnit = Mask & (-Mask); 97*0b57cec5SDimitry Andric ResIdx = getResourceStateIndex(SubUnit); 98*0b57cec5SDimitry Andric ProcResID = ResIdx2ProcResID[ResIdx]; 99*0b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 100*0b57cec5SDimitry Andric Mask ^= SubUnit; 101*0b57cec5SDimitry Andric } 102*0b57cec5SDimitry Andric 103*0b57cec5SDimitry Andric CumulativeMask ^= Current; 104*0b57cec5SDimitry Andric } 105*0b57cec5SDimitry Andric } 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 108*0b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 109*0b57cec5SDimitry Andric "Unexpected invalid event!"); 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric switch (Event.Reason) { 112*0b57cec5SDimitry Andric default: 113*0b57cec5SDimitry Andric break; 114*0b57cec5SDimitry Andric 115*0b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: { 116*0b57cec5SDimitry Andric const uint64_t ResourceMask = Event.ResourceMask; 117*0b57cec5SDimitry Andric updateResourcePressureDistribution(Event.ResourceMask); 118*0b57cec5SDimitry Andric 119*0b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 120*0b57cec5SDimitry Andric const Instruction &IS = *IR.getInstruction(); 121*0b57cec5SDimitry Andric unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 122*0b57cec5SDimitry Andric if (!BusyResources) 123*0b57cec5SDimitry Andric continue; 124*0b57cec5SDimitry Andric 125*0b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 126*0b57cec5SDimitry Andric IPI[IID].ResourcePressureCycles++; 127*0b57cec5SDimitry Andric } 128*0b57cec5SDimitry Andric break; 129*0b57cec5SDimitry Andric } 130*0b57cec5SDimitry Andric 131*0b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 132*0b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 133*0b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 134*0b57cec5SDimitry Andric IPI[IID].RegisterPressureCycles++; 135*0b57cec5SDimitry Andric } 136*0b57cec5SDimitry Andric break; 137*0b57cec5SDimitry Andric 138*0b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 139*0b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 140*0b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 141*0b57cec5SDimitry Andric IPI[IID].MemoryPressureCycles++; 142*0b57cec5SDimitry Andric } 143*0b57cec5SDimitry Andric } 144*0b57cec5SDimitry Andric } 145*0b57cec5SDimitry Andric 146*0b57cec5SDimitry Andric #ifndef NDEBUG 147*0b57cec5SDimitry Andric void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 148*0b57cec5SDimitry Andric const DependencyEdge &DepEdge, 149*0b57cec5SDimitry Andric MCInstPrinter &MCIP) const { 150*0b57cec5SDimitry Andric unsigned FromIID = DepEdge.FromIID; 151*0b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 152*0b57cec5SDimitry Andric assert(FromIID < ToIID && "Graph should be acyclic!"); 153*0b57cec5SDimitry Andric 154*0b57cec5SDimitry Andric const DependencyEdge::Dependency &DE = DepEdge.Dep; 155*0b57cec5SDimitry Andric assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 156*0b57cec5SDimitry Andric 157*0b57cec5SDimitry Andric OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 158*0b57cec5SDimitry Andric if (DE.Type == DependencyEdge::DT_REGISTER) { 159*0b57cec5SDimitry Andric OS << " - REGISTER: "; 160*0b57cec5SDimitry Andric MCIP.printRegName(OS, DE.ResourceOrRegID); 161*0b57cec5SDimitry Andric } else if (DE.Type == DependencyEdge::DT_MEMORY) { 162*0b57cec5SDimitry Andric OS << " - MEMORY"; 163*0b57cec5SDimitry Andric } else { 164*0b57cec5SDimitry Andric assert(DE.Type == DependencyEdge::DT_RESOURCE && 165*0b57cec5SDimitry Andric "Unsupported dependency type!"); 166*0b57cec5SDimitry Andric OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 167*0b57cec5SDimitry Andric } 168*0b57cec5SDimitry Andric OS << " - CYCLES: " << DE.Cost << '\n'; 169*0b57cec5SDimitry Andric } 170*0b57cec5SDimitry Andric #endif // NDEBUG 171*0b57cec5SDimitry Andric 172*0b57cec5SDimitry Andric void DependencyGraph::initializeRootSet( 173*0b57cec5SDimitry Andric SmallVectorImpl<unsigned> &RootSet) const { 174*0b57cec5SDimitry Andric for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 175*0b57cec5SDimitry Andric const DGNode &N = Nodes[I]; 176*0b57cec5SDimitry Andric if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 177*0b57cec5SDimitry Andric RootSet.emplace_back(I); 178*0b57cec5SDimitry Andric } 179*0b57cec5SDimitry Andric } 180*0b57cec5SDimitry Andric 181*0b57cec5SDimitry Andric void DependencyGraph::propagateThroughEdges( 182*0b57cec5SDimitry Andric SmallVectorImpl<unsigned> &RootSet) { 183*0b57cec5SDimitry Andric SmallVector<unsigned, 8> ToVisit; 184*0b57cec5SDimitry Andric 185*0b57cec5SDimitry Andric // A critical sequence is computed as the longest path from a node of the 186*0b57cec5SDimitry Andric // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 187*0b57cec5SDimitry Andric // composed of nodes with at least one successor, and no predecessors. 188*0b57cec5SDimitry Andric // 189*0b57cec5SDimitry Andric // Each node of the graph starts with an initial default cost of zero. The 190*0b57cec5SDimitry Andric // cost of a node is a measure of criticality: the higher the cost, the bigger 191*0b57cec5SDimitry Andric // is the performance impact. 192*0b57cec5SDimitry Andric // 193*0b57cec5SDimitry Andric // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 194*0b57cec5SDimitry Andric // the inner loop selects (i.e. visits) a node N from a set of `unvisited 195*0b57cec5SDimitry Andric // nodes`, and then propagates the cost of N to all its neighbors. 196*0b57cec5SDimitry Andric // 197*0b57cec5SDimitry Andric // The `unvisited nodes` set initially contains all the nodes from the 198*0b57cec5SDimitry Andric // RootSet. A node N is added to the `unvisited nodes` if all its 199*0b57cec5SDimitry Andric // predecessors have been visited already. 200*0b57cec5SDimitry Andric // 201*0b57cec5SDimitry Andric // For simplicity, every node tracks the number of unvisited incoming edges in 202*0b57cec5SDimitry Andric // field `NumVisitedPredecessors`. When the value of that field drops to 203*0b57cec5SDimitry Andric // zero, then the corresponding node is added to a `ToVisit` set. 204*0b57cec5SDimitry Andric // 205*0b57cec5SDimitry Andric // At the end of every iteration of the outer loop, set `ToVisit` becomes our 206*0b57cec5SDimitry Andric // new `unvisited nodes` set. 207*0b57cec5SDimitry Andric // 208*0b57cec5SDimitry Andric // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 209*0b57cec5SDimitry Andric // is empty. This algorithm works under the assumption that the graph is 210*0b57cec5SDimitry Andric // acyclic. 211*0b57cec5SDimitry Andric do { 212*0b57cec5SDimitry Andric for (unsigned IID : RootSet) { 213*0b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 214*0b57cec5SDimitry Andric for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 215*0b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 216*0b57cec5SDimitry Andric DGNode &To = Nodes[ToIID]; 217*0b57cec5SDimitry Andric uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 218*0b57cec5SDimitry Andric // Check if this is the most expensive incoming edge seen so far. In 219*0b57cec5SDimitry Andric // case, update the total cost of the destination node (ToIID), as well 220*0b57cec5SDimitry Andric // its field `CriticalPredecessor`. 221*0b57cec5SDimitry Andric if (Cost > To.Cost) { 222*0b57cec5SDimitry Andric To.CriticalPredecessor = DepEdge; 223*0b57cec5SDimitry Andric To.Cost = Cost; 224*0b57cec5SDimitry Andric To.Depth = N.Depth + 1; 225*0b57cec5SDimitry Andric } 226*0b57cec5SDimitry Andric To.NumVisitedPredecessors++; 227*0b57cec5SDimitry Andric if (To.NumVisitedPredecessors == To.NumPredecessors) 228*0b57cec5SDimitry Andric ToVisit.emplace_back(ToIID); 229*0b57cec5SDimitry Andric } 230*0b57cec5SDimitry Andric } 231*0b57cec5SDimitry Andric 232*0b57cec5SDimitry Andric std::swap(RootSet, ToVisit); 233*0b57cec5SDimitry Andric ToVisit.clear(); 234*0b57cec5SDimitry Andric } while (!RootSet.empty()); 235*0b57cec5SDimitry Andric } 236*0b57cec5SDimitry Andric 237*0b57cec5SDimitry Andric void DependencyGraph::getCriticalSequence( 238*0b57cec5SDimitry Andric SmallVectorImpl<const DependencyEdge *> &Seq) const { 239*0b57cec5SDimitry Andric // At this stage, nodes of the graph have been already visited, and costs have 240*0b57cec5SDimitry Andric // been propagated through the edges (see method `propagateThroughEdges()`). 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric // Identify the node N with the highest cost in the graph. By construction, 243*0b57cec5SDimitry Andric // that node is the last instruction of our critical sequence. 244*0b57cec5SDimitry Andric // Field N.Depth would tell us the total length of the sequence. 245*0b57cec5SDimitry Andric // 246*0b57cec5SDimitry Andric // To obtain the sequence of critical edges, we simply follow the chain of critical 247*0b57cec5SDimitry Andric // predecessors starting from node N (field DGNode::CriticalPredecessor). 248*0b57cec5SDimitry Andric const auto It = std::max_element( 249*0b57cec5SDimitry Andric Nodes.begin(), Nodes.end(), 250*0b57cec5SDimitry Andric [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 251*0b57cec5SDimitry Andric unsigned IID = std::distance(Nodes.begin(), It); 252*0b57cec5SDimitry Andric Seq.resize(Nodes[IID].Depth); 253*0b57cec5SDimitry Andric for (unsigned I = Seq.size(), E = 0; I > E; --I) { 254*0b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 255*0b57cec5SDimitry Andric Seq[I - 1] = &N.CriticalPredecessor; 256*0b57cec5SDimitry Andric IID = N.CriticalPredecessor.FromIID; 257*0b57cec5SDimitry Andric } 258*0b57cec5SDimitry Andric } 259*0b57cec5SDimitry Andric 260*0b57cec5SDimitry Andric static void printInstruction(formatted_raw_ostream &FOS, 261*0b57cec5SDimitry Andric const MCSubtargetInfo &STI, MCInstPrinter &MCIP, 262*0b57cec5SDimitry Andric const MCInst &MCI, 263*0b57cec5SDimitry Andric bool UseDifferentColor = false) { 264*0b57cec5SDimitry Andric std::string Instruction; 265*0b57cec5SDimitry Andric raw_string_ostream InstrStream(Instruction); 266*0b57cec5SDimitry Andric 267*0b57cec5SDimitry Andric FOS.PadToColumn(14); 268*0b57cec5SDimitry Andric 269*0b57cec5SDimitry Andric MCIP.printInst(&MCI, InstrStream, "", STI); 270*0b57cec5SDimitry Andric InstrStream.flush(); 271*0b57cec5SDimitry Andric 272*0b57cec5SDimitry Andric if (UseDifferentColor) 273*0b57cec5SDimitry Andric FOS.changeColor(raw_ostream::CYAN, true, false); 274*0b57cec5SDimitry Andric FOS << StringRef(Instruction).ltrim(); 275*0b57cec5SDimitry Andric if (UseDifferentColor) 276*0b57cec5SDimitry Andric FOS.resetColor(); 277*0b57cec5SDimitry Andric } 278*0b57cec5SDimitry Andric 279*0b57cec5SDimitry Andric void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 280*0b57cec5SDimitry Andric SmallVector<const DependencyEdge *, 16> Seq; 281*0b57cec5SDimitry Andric DG.getCriticalSequence(Seq); 282*0b57cec5SDimitry Andric if (Seq.empty()) 283*0b57cec5SDimitry Andric return; 284*0b57cec5SDimitry Andric 285*0b57cec5SDimitry Andric OS << "\nCritical sequence based on the simulation:\n\n"; 286*0b57cec5SDimitry Andric 287*0b57cec5SDimitry Andric const DependencyEdge &FirstEdge = *Seq[0]; 288*0b57cec5SDimitry Andric unsigned FromIID = FirstEdge.FromIID % Source.size(); 289*0b57cec5SDimitry Andric unsigned ToIID = FirstEdge.ToIID % Source.size(); 290*0b57cec5SDimitry Andric bool IsLoopCarried = FromIID >= ToIID; 291*0b57cec5SDimitry Andric 292*0b57cec5SDimitry Andric formatted_raw_ostream FOS(OS); 293*0b57cec5SDimitry Andric FOS.PadToColumn(14); 294*0b57cec5SDimitry Andric FOS << "Instruction"; 295*0b57cec5SDimitry Andric FOS.PadToColumn(58); 296*0b57cec5SDimitry Andric FOS << "Dependency Information"; 297*0b57cec5SDimitry Andric 298*0b57cec5SDimitry Andric bool HasColors = FOS.has_colors(); 299*0b57cec5SDimitry Andric 300*0b57cec5SDimitry Andric unsigned CurrentIID = 0; 301*0b57cec5SDimitry Andric if (IsLoopCarried) { 302*0b57cec5SDimitry Andric FOS << "\n +----< " << FromIID << "."; 303*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors); 304*0b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |"; 305*0b57cec5SDimitry Andric } else { 306*0b57cec5SDimitry Andric while (CurrentIID < FromIID) { 307*0b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 308*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 309*0b57cec5SDimitry Andric CurrentIID++; 310*0b57cec5SDimitry Andric } 311*0b57cec5SDimitry Andric 312*0b57cec5SDimitry Andric FOS << "\n +----< " << CurrentIID << "."; 313*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 314*0b57cec5SDimitry Andric CurrentIID++; 315*0b57cec5SDimitry Andric } 316*0b57cec5SDimitry Andric 317*0b57cec5SDimitry Andric for (const DependencyEdge *&DE : Seq) { 318*0b57cec5SDimitry Andric ToIID = DE->ToIID % Source.size(); 319*0b57cec5SDimitry Andric unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 320*0b57cec5SDimitry Andric 321*0b57cec5SDimitry Andric while (CurrentIID < LastIID) { 322*0b57cec5SDimitry Andric FOS << "\n | " << CurrentIID << "."; 323*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 324*0b57cec5SDimitry Andric CurrentIID++; 325*0b57cec5SDimitry Andric } 326*0b57cec5SDimitry Andric 327*0b57cec5SDimitry Andric if (CurrentIID == ToIID) { 328*0b57cec5SDimitry Andric FOS << "\n +----> " << ToIID << "."; 329*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 330*0b57cec5SDimitry Andric } else { 331*0b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |" 332*0b57cec5SDimitry Andric << "\n +----> " << ToIID << "."; 333*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors); 334*0b57cec5SDimitry Andric } 335*0b57cec5SDimitry Andric FOS.PadToColumn(58); 336*0b57cec5SDimitry Andric 337*0b57cec5SDimitry Andric const DependencyEdge::Dependency &Dep = DE->Dep; 338*0b57cec5SDimitry Andric if (HasColors) 339*0b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 340*0b57cec5SDimitry Andric 341*0b57cec5SDimitry Andric if (Dep.Type == DependencyEdge::DT_REGISTER) { 342*0b57cec5SDimitry Andric FOS << "## REGISTER dependency: "; 343*0b57cec5SDimitry Andric if (HasColors) 344*0b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 345*0b57cec5SDimitry Andric MCIP.printRegName(FOS, Dep.ResourceOrRegID); 346*0b57cec5SDimitry Andric } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 347*0b57cec5SDimitry Andric FOS << "## MEMORY dependency."; 348*0b57cec5SDimitry Andric } else { 349*0b57cec5SDimitry Andric assert(Dep.Type == DependencyEdge::DT_RESOURCE && 350*0b57cec5SDimitry Andric "Unsupported dependency type!"); 351*0b57cec5SDimitry Andric FOS << "## RESOURCE interference: "; 352*0b57cec5SDimitry Andric if (HasColors) 353*0b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 354*0b57cec5SDimitry Andric FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 355*0b57cec5SDimitry Andric if (HasColors) { 356*0b57cec5SDimitry Andric FOS.resetColor(); 357*0b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 358*0b57cec5SDimitry Andric } 359*0b57cec5SDimitry Andric FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 360*0b57cec5SDimitry Andric << "% ]"; 361*0b57cec5SDimitry Andric } 362*0b57cec5SDimitry Andric if (HasColors) 363*0b57cec5SDimitry Andric FOS.resetColor(); 364*0b57cec5SDimitry Andric ++CurrentIID; 365*0b57cec5SDimitry Andric } 366*0b57cec5SDimitry Andric 367*0b57cec5SDimitry Andric while (CurrentIID < Source.size()) { 368*0b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 369*0b57cec5SDimitry Andric printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 370*0b57cec5SDimitry Andric CurrentIID++; 371*0b57cec5SDimitry Andric } 372*0b57cec5SDimitry Andric 373*0b57cec5SDimitry Andric FOS << '\n'; 374*0b57cec5SDimitry Andric FOS.flush(); 375*0b57cec5SDimitry Andric } 376*0b57cec5SDimitry Andric 377*0b57cec5SDimitry Andric #ifndef NDEBUG 378*0b57cec5SDimitry Andric void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 379*0b57cec5SDimitry Andric OS << "\nREG DEPS\n"; 380*0b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 381*0b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 382*0b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 383*0b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 384*0b57cec5SDimitry Andric 385*0b57cec5SDimitry Andric OS << "\nMEM DEPS\n"; 386*0b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 387*0b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 388*0b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 389*0b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 390*0b57cec5SDimitry Andric 391*0b57cec5SDimitry Andric OS << "\nRESOURCE DEPS\n"; 392*0b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 393*0b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 394*0b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 395*0b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 396*0b57cec5SDimitry Andric } 397*0b57cec5SDimitry Andric #endif // NDEBUG 398*0b57cec5SDimitry Andric 399*0b57cec5SDimitry Andric void DependencyGraph::addDependency(unsigned From, unsigned To, 400*0b57cec5SDimitry Andric DependencyEdge::Dependency &&Dep) { 401*0b57cec5SDimitry Andric DGNode &NodeFrom = Nodes[From]; 402*0b57cec5SDimitry Andric DGNode &NodeTo = Nodes[To]; 403*0b57cec5SDimitry Andric SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 404*0b57cec5SDimitry Andric 405*0b57cec5SDimitry Andric auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 406*0b57cec5SDimitry Andric return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 407*0b57cec5SDimitry Andric }); 408*0b57cec5SDimitry Andric 409*0b57cec5SDimitry Andric if (It != Vec.end()) { 410*0b57cec5SDimitry Andric It->Dep.Cost += Dep.Cost; 411*0b57cec5SDimitry Andric It->Frequency++; 412*0b57cec5SDimitry Andric return; 413*0b57cec5SDimitry Andric } 414*0b57cec5SDimitry Andric 415*0b57cec5SDimitry Andric DependencyEdge DE = {Dep, From, To, 1}; 416*0b57cec5SDimitry Andric Vec.emplace_back(DE); 417*0b57cec5SDimitry Andric NodeTo.NumPredecessors++; 418*0b57cec5SDimitry Andric } 419*0b57cec5SDimitry Andric 420*0b57cec5SDimitry Andric BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 421*0b57cec5SDimitry Andric MCInstPrinter &Printer, 422*0b57cec5SDimitry Andric ArrayRef<MCInst> S, unsigned NumIter) 423*0b57cec5SDimitry Andric : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), 424*0b57cec5SDimitry Andric Source(S), Iterations(NumIter), TotalCycles(0), 425*0b57cec5SDimitry Andric PressureIncreasedBecauseOfResources(false), 426*0b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies(false), 427*0b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies(false), 428*0b57cec5SDimitry Andric SeenStallCycles(false), BPI() {} 429*0b57cec5SDimitry Andric 430*0b57cec5SDimitry Andric void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 431*0b57cec5SDimitry Andric unsigned RegID, unsigned Cost) { 432*0b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 433*0b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 434*0b57cec5SDimitry Andric if (IsLoopCarried) { 435*0b57cec5SDimitry Andric Cost *= Iterations / 2; 436*0b57cec5SDimitry Andric DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 437*0b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 438*0b57cec5SDimitry Andric return; 439*0b57cec5SDimitry Andric } 440*0b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 441*0b57cec5SDimitry Andric } 442*0b57cec5SDimitry Andric 443*0b57cec5SDimitry Andric void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 444*0b57cec5SDimitry Andric unsigned Cost) { 445*0b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 446*0b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 447*0b57cec5SDimitry Andric if (IsLoopCarried) { 448*0b57cec5SDimitry Andric Cost *= Iterations / 2; 449*0b57cec5SDimitry Andric DG.addMemoryDep(From, To + SourceSize, Cost); 450*0b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 451*0b57cec5SDimitry Andric return; 452*0b57cec5SDimitry Andric } 453*0b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 454*0b57cec5SDimitry Andric } 455*0b57cec5SDimitry Andric 456*0b57cec5SDimitry Andric void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 457*0b57cec5SDimitry Andric uint64_t Mask, unsigned Cost) { 458*0b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 459*0b57cec5SDimitry Andric unsigned SourceSize = Source.size(); 460*0b57cec5SDimitry Andric if (IsLoopCarried) { 461*0b57cec5SDimitry Andric Cost *= Iterations / 2; 462*0b57cec5SDimitry Andric DG.addResourceDep(From, To + SourceSize, Mask, Cost); 463*0b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 464*0b57cec5SDimitry Andric return; 465*0b57cec5SDimitry Andric } 466*0b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 467*0b57cec5SDimitry Andric } 468*0b57cec5SDimitry Andric 469*0b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 470*0b57cec5SDimitry Andric const unsigned IID = Event.IR.getSourceIndex(); 471*0b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Dispatched) { 472*0b57cec5SDimitry Andric Tracker.onInstructionDispatched(IID); 473*0b57cec5SDimitry Andric return; 474*0b57cec5SDimitry Andric } 475*0b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Executed) { 476*0b57cec5SDimitry Andric Tracker.onInstructionExecuted(IID); 477*0b57cec5SDimitry Andric return; 478*0b57cec5SDimitry Andric } 479*0b57cec5SDimitry Andric 480*0b57cec5SDimitry Andric if (Event.Type != HWInstructionEvent::Issued) 481*0b57cec5SDimitry Andric return; 482*0b57cec5SDimitry Andric 483*0b57cec5SDimitry Andric const Instruction &IS = *Event.IR.getInstruction(); 484*0b57cec5SDimitry Andric unsigned To = IID % Source.size(); 485*0b57cec5SDimitry Andric 486*0b57cec5SDimitry Andric unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 487*0b57cec5SDimitry Andric uint64_t ResourceMask = IS.getCriticalResourceMask(); 488*0b57cec5SDimitry Andric SmallVector<std::pair<unsigned, unsigned>, 4> Users; 489*0b57cec5SDimitry Andric while (ResourceMask) { 490*0b57cec5SDimitry Andric uint64_t Current = ResourceMask & (-ResourceMask); 491*0b57cec5SDimitry Andric Tracker.getResourceUsers(Current, Users); 492*0b57cec5SDimitry Andric for (const std::pair<unsigned, unsigned> &U : Users) 493*0b57cec5SDimitry Andric addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 494*0b57cec5SDimitry Andric Users.clear(); 495*0b57cec5SDimitry Andric ResourceMask ^= Current; 496*0b57cec5SDimitry Andric } 497*0b57cec5SDimitry Andric 498*0b57cec5SDimitry Andric const CriticalDependency &RegDep = IS.getCriticalRegDep(); 499*0b57cec5SDimitry Andric if (RegDep.Cycles) { 500*0b57cec5SDimitry Andric Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 501*0b57cec5SDimitry Andric unsigned From = RegDep.IID % Source.size(); 502*0b57cec5SDimitry Andric addRegisterDep(From, To, RegDep.RegID, Cycles); 503*0b57cec5SDimitry Andric } 504*0b57cec5SDimitry Andric 505*0b57cec5SDimitry Andric const CriticalDependency &MemDep = IS.getCriticalMemDep(); 506*0b57cec5SDimitry Andric if (MemDep.Cycles) { 507*0b57cec5SDimitry Andric Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 508*0b57cec5SDimitry Andric unsigned From = MemDep.IID % Source.size(); 509*0b57cec5SDimitry Andric addMemoryDep(From, To, Cycles); 510*0b57cec5SDimitry Andric } 511*0b57cec5SDimitry Andric 512*0b57cec5SDimitry Andric Tracker.handleInstructionIssuedEvent( 513*0b57cec5SDimitry Andric static_cast<const HWInstructionIssuedEvent &>(Event)); 514*0b57cec5SDimitry Andric 515*0b57cec5SDimitry Andric // Check if this is the last simulated instruction. 516*0b57cec5SDimitry Andric if (IID == ((Iterations * Source.size()) - 1)) 517*0b57cec5SDimitry Andric DG.finalizeGraph(); 518*0b57cec5SDimitry Andric } 519*0b57cec5SDimitry Andric 520*0b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 521*0b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 522*0b57cec5SDimitry Andric "Unexpected invalid event!"); 523*0b57cec5SDimitry Andric 524*0b57cec5SDimitry Andric Tracker.handlePressureEvent(Event); 525*0b57cec5SDimitry Andric 526*0b57cec5SDimitry Andric switch (Event.Reason) { 527*0b57cec5SDimitry Andric default: 528*0b57cec5SDimitry Andric break; 529*0b57cec5SDimitry Andric 530*0b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: 531*0b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = true; 532*0b57cec5SDimitry Andric break; 533*0b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 534*0b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = true; 535*0b57cec5SDimitry Andric break; 536*0b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 537*0b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = true; 538*0b57cec5SDimitry Andric break; 539*0b57cec5SDimitry Andric } 540*0b57cec5SDimitry Andric } 541*0b57cec5SDimitry Andric 542*0b57cec5SDimitry Andric void BottleneckAnalysis::onCycleEnd() { 543*0b57cec5SDimitry Andric ++TotalCycles; 544*0b57cec5SDimitry Andric 545*0b57cec5SDimitry Andric bool PressureIncreasedBecauseOfDataDependencies = 546*0b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies || 547*0b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies; 548*0b57cec5SDimitry Andric if (!PressureIncreasedBecauseOfResources && 549*0b57cec5SDimitry Andric !PressureIncreasedBecauseOfDataDependencies) 550*0b57cec5SDimitry Andric return; 551*0b57cec5SDimitry Andric 552*0b57cec5SDimitry Andric ++BPI.PressureIncreaseCycles; 553*0b57cec5SDimitry Andric if (PressureIncreasedBecauseOfRegisterDependencies) 554*0b57cec5SDimitry Andric ++BPI.RegisterDependencyCycles; 555*0b57cec5SDimitry Andric if (PressureIncreasedBecauseOfMemoryDependencies) 556*0b57cec5SDimitry Andric ++BPI.MemoryDependencyCycles; 557*0b57cec5SDimitry Andric if (PressureIncreasedBecauseOfDataDependencies) 558*0b57cec5SDimitry Andric ++BPI.DataDependencyCycles; 559*0b57cec5SDimitry Andric if (PressureIncreasedBecauseOfResources) 560*0b57cec5SDimitry Andric ++BPI.ResourcePressureCycles; 561*0b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = false; 562*0b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = false; 563*0b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = false; 564*0b57cec5SDimitry Andric } 565*0b57cec5SDimitry Andric 566*0b57cec5SDimitry Andric void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 567*0b57cec5SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 568*0b57cec5SDimitry Andric OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 569*0b57cec5SDimitry Andric return; 570*0b57cec5SDimitry Andric } 571*0b57cec5SDimitry Andric 572*0b57cec5SDimitry Andric double PressurePerCycle = 573*0b57cec5SDimitry Andric (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 574*0b57cec5SDimitry Andric double ResourcePressurePerCycle = 575*0b57cec5SDimitry Andric (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 576*0b57cec5SDimitry Andric double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 577*0b57cec5SDimitry Andric double RegDepPressurePerCycle = 578*0b57cec5SDimitry Andric (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 579*0b57cec5SDimitry Andric double MemDepPressurePerCycle = 580*0b57cec5SDimitry Andric (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 581*0b57cec5SDimitry Andric 582*0b57cec5SDimitry Andric OS << "\n\nCycles with backend pressure increase [ " 583*0b57cec5SDimitry Andric << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 584*0b57cec5SDimitry Andric 585*0b57cec5SDimitry Andric OS << "\nThroughput Bottlenecks: " 586*0b57cec5SDimitry Andric << "\n Resource Pressure [ " 587*0b57cec5SDimitry Andric << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 588*0b57cec5SDimitry Andric << "% ]"; 589*0b57cec5SDimitry Andric 590*0b57cec5SDimitry Andric if (BPI.PressureIncreaseCycles) { 591*0b57cec5SDimitry Andric ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 592*0b57cec5SDimitry Andric const MCSchedModel &SM = STI.getSchedModel(); 593*0b57cec5SDimitry Andric for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 594*0b57cec5SDimitry Andric unsigned ResourceCycles = Distribution[I]; 595*0b57cec5SDimitry Andric if (ResourceCycles) { 596*0b57cec5SDimitry Andric double Frequency = (double)ResourceCycles * 100 / TotalCycles; 597*0b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 598*0b57cec5SDimitry Andric OS << "\n - " << PRDesc.Name << " [ " 599*0b57cec5SDimitry Andric << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 600*0b57cec5SDimitry Andric } 601*0b57cec5SDimitry Andric } 602*0b57cec5SDimitry Andric } 603*0b57cec5SDimitry Andric 604*0b57cec5SDimitry Andric OS << "\n Data Dependencies: [ " 605*0b57cec5SDimitry Andric << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 606*0b57cec5SDimitry Andric OS << "\n - Register Dependencies [ " 607*0b57cec5SDimitry Andric << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 608*0b57cec5SDimitry Andric << "% ]"; 609*0b57cec5SDimitry Andric OS << "\n - Memory Dependencies [ " 610*0b57cec5SDimitry Andric << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 611*0b57cec5SDimitry Andric << "% ]\n"; 612*0b57cec5SDimitry Andric } 613*0b57cec5SDimitry Andric 614*0b57cec5SDimitry Andric void BottleneckAnalysis::printView(raw_ostream &OS) const { 615*0b57cec5SDimitry Andric std::string Buffer; 616*0b57cec5SDimitry Andric raw_string_ostream TempStream(Buffer); 617*0b57cec5SDimitry Andric printBottleneckHints(TempStream); 618*0b57cec5SDimitry Andric TempStream.flush(); 619*0b57cec5SDimitry Andric OS << Buffer; 620*0b57cec5SDimitry Andric printCriticalSequence(OS); 621*0b57cec5SDimitry Andric } 622*0b57cec5SDimitry Andric 623*0b57cec5SDimitry Andric } // namespace mca. 624*0b57cec5SDimitry Andric } // namespace llvm 625