1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the functionalities used by the BottleneckAnalysis 11 /// to report bottleneck info. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "Views/BottleneckAnalysis.h" 16 #include "llvm/MC/MCInst.h" 17 #include "llvm/MCA/Support.h" 18 #include "llvm/Support/Format.h" 19 20 namespace llvm { 21 namespace mca { 22 23 #define DEBUG_TYPE "llvm-mca" 24 25 PressureTracker::PressureTracker(const MCSchedModel &Model) 26 : SM(Model), 27 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 28 ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 29 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 30 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 31 computeProcResourceMasks(SM, ProcResID2Mask); 32 33 // Ignore the invalid resource at index zero. 34 unsigned NextResourceUsersIdx = 0; 35 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 36 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 37 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 38 NextResourceUsersIdx += ProcResource.NumUnits; 39 uint64_t ResourceMask = ProcResID2Mask[I]; 40 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 41 } 42 43 ResourceUsers.resize(NextResourceUsersIdx); 44 std::fill(ResourceUsers.begin(), ResourceUsers.end(), 45 std::make_pair<unsigned, unsigned>(~0U, 0U)); 46 } 47 48 void PressureTracker::getResourceUsers(uint64_t ResourceMask, 49 SmallVectorImpl<User> &Users) const { 50 unsigned Index = getResourceStateIndex(ResourceMask); 51 unsigned ProcResID = ResIdx2ProcResID[Index]; 52 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 53 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 54 const User U = getResourceUser(ProcResID, I); 55 if (U.second && IPI.find(U.first) != IPI.end()) 56 Users.emplace_back(U); 57 } 58 } 59 60 void PressureTracker::onInstructionDispatched(unsigned IID) { 61 IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 62 } 63 64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 65 66 void PressureTracker::handleInstructionIssuedEvent( 67 const HWInstructionIssuedEvent &Event) { 68 unsigned IID = Event.IR.getSourceIndex(); 69 using ResourceRef = HWInstructionIssuedEvent::ResourceRef; 70 using ResourceUse = std::pair<ResourceRef, ResourceCycles>; 71 for (const ResourceUse &Use : Event.UsedResources) { 72 const ResourceRef &RR = Use.first; 73 unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 74 Index += countTrailingZeros(RR.second); 75 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 76 } 77 } 78 79 void PressureTracker::updateResourcePressureDistribution( 80 uint64_t CumulativeMask) { 81 while (CumulativeMask) { 82 uint64_t Current = CumulativeMask & (-CumulativeMask); 83 unsigned ResIdx = getResourceStateIndex(Current); 84 unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 85 uint64_t Mask = ProcResID2Mask[ProcResID]; 86 87 if (Mask == Current) { 88 ResourcePressureDistribution[ProcResID]++; 89 CumulativeMask ^= Current; 90 continue; 91 } 92 93 Mask ^= Current; 94 while (Mask) { 95 uint64_t SubUnit = Mask & (-Mask); 96 ResIdx = getResourceStateIndex(SubUnit); 97 ProcResID = ResIdx2ProcResID[ResIdx]; 98 ResourcePressureDistribution[ProcResID]++; 99 Mask ^= SubUnit; 100 } 101 102 CumulativeMask ^= Current; 103 } 104 } 105 106 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 107 assert(Event.Reason != HWPressureEvent::INVALID && 108 "Unexpected invalid event!"); 109 110 switch (Event.Reason) { 111 default: 112 break; 113 114 case HWPressureEvent::RESOURCES: { 115 const uint64_t ResourceMask = Event.ResourceMask; 116 updateResourcePressureDistribution(Event.ResourceMask); 117 118 for (const InstRef &IR : Event.AffectedInstructions) { 119 const Instruction &IS = *IR.getInstruction(); 120 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 121 if (!BusyResources) 122 continue; 123 124 unsigned IID = IR.getSourceIndex(); 125 IPI[IID].ResourcePressureCycles++; 126 } 127 break; 128 } 129 130 case HWPressureEvent::REGISTER_DEPS: 131 for (const InstRef &IR : Event.AffectedInstructions) { 132 unsigned IID = IR.getSourceIndex(); 133 IPI[IID].RegisterPressureCycles++; 134 } 135 break; 136 137 case HWPressureEvent::MEMORY_DEPS: 138 for (const InstRef &IR : Event.AffectedInstructions) { 139 unsigned IID = IR.getSourceIndex(); 140 IPI[IID].MemoryPressureCycles++; 141 } 142 } 143 } 144 145 #ifndef NDEBUG 146 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 147 const DependencyEdge &DepEdge, 148 MCInstPrinter &MCIP) const { 149 unsigned FromIID = DepEdge.FromIID; 150 unsigned ToIID = DepEdge.ToIID; 151 assert(FromIID < ToIID && "Graph should be acyclic!"); 152 153 const DependencyEdge::Dependency &DE = DepEdge.Dep; 154 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 155 156 OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 157 if (DE.Type == DependencyEdge::DT_REGISTER) { 158 OS << " - REGISTER: "; 159 MCIP.printRegName(OS, DE.ResourceOrRegID); 160 } else if (DE.Type == DependencyEdge::DT_MEMORY) { 161 OS << " - MEMORY"; 162 } else { 163 assert(DE.Type == DependencyEdge::DT_RESOURCE && 164 "Unsupported dependency type!"); 165 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 166 } 167 OS << " - COST: " << DE.Cost << '\n'; 168 } 169 #endif // NDEBUG 170 171 void DependencyGraph::pruneEdges(unsigned Iterations) { 172 for (DGNode &N : Nodes) { 173 unsigned NumPruned = 0; 174 const unsigned Size = N.OutgoingEdges.size(); 175 // Use a cut-off threshold to prune edges with a low frequency. 176 for (unsigned I = 0, E = Size; I < E; ++I) { 177 DependencyEdge &Edge = N.OutgoingEdges[I]; 178 if (Edge.Frequency == Iterations) 179 continue; 180 double Factor = (double)Edge.Frequency / Iterations; 181 if (0.10 < Factor) 182 continue; 183 Nodes[Edge.ToIID].NumPredecessors--; 184 std::swap(Edge, N.OutgoingEdges[E - 1]); 185 --E; 186 ++NumPruned; 187 } 188 189 if (NumPruned) 190 N.OutgoingEdges.resize(Size - NumPruned); 191 } 192 } 193 194 void DependencyGraph::initializeRootSet( 195 SmallVectorImpl<unsigned> &RootSet) const { 196 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 197 const DGNode &N = Nodes[I]; 198 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 199 RootSet.emplace_back(I); 200 } 201 } 202 203 void DependencyGraph::propagateThroughEdges( 204 SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) { 205 SmallVector<unsigned, 8> ToVisit; 206 207 // A critical sequence is computed as the longest path from a node of the 208 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 209 // composed of nodes with at least one successor, and no predecessors. 210 // 211 // Each node of the graph starts with an initial default cost of zero. The 212 // cost of a node is a measure of criticality: the higher the cost, the bigger 213 // is the performance impact. 214 // For register and memory dependencies, the cost is a function of the write 215 // latency as well as the actual delay (in cycles) caused to users. 216 // For processor resource dependencies, the cost is a function of the resource 217 // pressure. Resource interferences with low frequency values are ignored. 218 // 219 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 220 // the inner loop selects (i.e. visits) a node N from a set of `unvisited 221 // nodes`, and then propagates the cost of N to all its neighbors. 222 // 223 // The `unvisited nodes` set initially contains all the nodes from the 224 // RootSet. A node N is added to the `unvisited nodes` if all its 225 // predecessors have been visited already. 226 // 227 // For simplicity, every node tracks the number of unvisited incoming edges in 228 // field `NumVisitedPredecessors`. When the value of that field drops to 229 // zero, then the corresponding node is added to a `ToVisit` set. 230 // 231 // At the end of every iteration of the outer loop, set `ToVisit` becomes our 232 // new `unvisited nodes` set. 233 // 234 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 235 // is empty. This algorithm works under the assumption that the graph is 236 // acyclic. 237 do { 238 for (unsigned IID : RootSet) { 239 const DGNode &N = Nodes[IID]; 240 for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 241 unsigned ToIID = DepEdge.ToIID; 242 DGNode &To = Nodes[ToIID]; 243 uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 244 // Check if this is the most expensive incoming edge seen so far. In 245 // case, update the total cost of the destination node (ToIID), as well 246 // its field `CriticalPredecessor`. 247 if (Cost > To.Cost) { 248 To.CriticalPredecessor = DepEdge; 249 To.Cost = Cost; 250 To.Depth = N.Depth + 1; 251 } 252 To.NumVisitedPredecessors++; 253 if (To.NumVisitedPredecessors == To.NumPredecessors) 254 ToVisit.emplace_back(ToIID); 255 } 256 } 257 258 std::swap(RootSet, ToVisit); 259 ToVisit.clear(); 260 } while (!RootSet.empty()); 261 } 262 263 void DependencyGraph::getCriticalSequence( 264 SmallVectorImpl<const DependencyEdge *> &Seq) const { 265 // At this stage, nodes of the graph have been already visited, and costs have 266 // been propagated through the edges (see method `propagateThroughEdges()`). 267 268 // Identify the node N with the highest cost in the graph. By construction, 269 // that node is the last instruction of our critical sequence. 270 // Field N.Depth would tell us the total length of the sequence. 271 // 272 // To obtain the sequence of critical edges, we simply follow the chain of critical 273 // predecessors starting from node N (field DGNode::CriticalPredecessor). 274 const auto It = std::max_element( 275 Nodes.begin(), Nodes.end(), 276 [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 277 unsigned IID = std::distance(Nodes.begin(), It); 278 Seq.resize(Nodes[IID].Depth); 279 for (unsigned I = Seq.size(), E = 0; I > E; --I) { 280 const DGNode &N = Nodes[IID]; 281 Seq[I - 1] = &N.CriticalPredecessor; 282 IID = N.CriticalPredecessor.FromIID; 283 } 284 } 285 286 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, 287 const MCInst &MCI, 288 bool UseDifferentColor) const { 289 FOS.PadToColumn(14); 290 if (UseDifferentColor) 291 FOS.changeColor(raw_ostream::CYAN, true, false); 292 FOS << printInstructionString(MCI); 293 if (UseDifferentColor) 294 FOS.resetColor(); 295 } 296 297 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 298 // Early exit if no bottlenecks were found during the simulation. 299 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 300 return; 301 302 SmallVector<const DependencyEdge *, 16> Seq; 303 DG.getCriticalSequence(Seq); 304 if (Seq.empty()) 305 return; 306 307 OS << "\nCritical sequence based on the simulation:\n\n"; 308 309 const DependencyEdge &FirstEdge = *Seq[0]; 310 ArrayRef<llvm::MCInst> Source = getSource(); 311 unsigned FromIID = FirstEdge.FromIID % Source.size(); 312 unsigned ToIID = FirstEdge.ToIID % Source.size(); 313 bool IsLoopCarried = FromIID >= ToIID; 314 315 formatted_raw_ostream FOS(OS); 316 FOS.PadToColumn(14); 317 FOS << "Instruction"; 318 FOS.PadToColumn(58); 319 FOS << "Dependency Information"; 320 321 bool HasColors = FOS.has_colors(); 322 323 unsigned CurrentIID = 0; 324 if (IsLoopCarried) { 325 FOS << "\n +----< " << FromIID << "."; 326 printInstruction(FOS, Source[FromIID], HasColors); 327 FOS << "\n |\n | < loop carried > \n |"; 328 } else { 329 while (CurrentIID < FromIID) { 330 FOS << "\n " << CurrentIID << "."; 331 printInstruction(FOS, Source[CurrentIID]); 332 CurrentIID++; 333 } 334 335 FOS << "\n +----< " << CurrentIID << "."; 336 printInstruction(FOS, Source[CurrentIID], HasColors); 337 CurrentIID++; 338 } 339 340 for (const DependencyEdge *&DE : Seq) { 341 ToIID = DE->ToIID % Source.size(); 342 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 343 344 while (CurrentIID < LastIID) { 345 FOS << "\n | " << CurrentIID << "."; 346 printInstruction(FOS, Source[CurrentIID]); 347 CurrentIID++; 348 } 349 350 if (CurrentIID == ToIID) { 351 FOS << "\n +----> " << ToIID << "."; 352 printInstruction(FOS, Source[CurrentIID], HasColors); 353 } else { 354 FOS << "\n |\n | < loop carried > \n |" 355 << "\n +----> " << ToIID << "."; 356 printInstruction(FOS, Source[ToIID], HasColors); 357 } 358 FOS.PadToColumn(58); 359 360 const DependencyEdge::Dependency &Dep = DE->Dep; 361 if (HasColors) 362 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 363 364 if (Dep.Type == DependencyEdge::DT_REGISTER) { 365 FOS << "## REGISTER dependency: "; 366 if (HasColors) 367 FOS.changeColor(raw_ostream::MAGENTA, true, false); 368 getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID); 369 } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 370 FOS << "## MEMORY dependency."; 371 } else { 372 assert(Dep.Type == DependencyEdge::DT_RESOURCE && 373 "Unsupported dependency type!"); 374 FOS << "## RESOURCE interference: "; 375 if (HasColors) 376 FOS.changeColor(raw_ostream::MAGENTA, true, false); 377 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 378 if (HasColors) { 379 FOS.resetColor(); 380 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 381 } 382 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 383 << "% ]"; 384 } 385 if (HasColors) 386 FOS.resetColor(); 387 ++CurrentIID; 388 } 389 390 while (CurrentIID < Source.size()) { 391 FOS << "\n " << CurrentIID << "."; 392 printInstruction(FOS, Source[CurrentIID]); 393 CurrentIID++; 394 } 395 396 FOS << '\n'; 397 FOS.flush(); 398 } 399 400 #ifndef NDEBUG 401 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 402 OS << "\nREG DEPS\n"; 403 for (const DGNode &Node : Nodes) 404 for (const DependencyEdge &DE : Node.OutgoingEdges) 405 if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 406 dumpDependencyEdge(OS, DE, MCIP); 407 408 OS << "\nMEM DEPS\n"; 409 for (const DGNode &Node : Nodes) 410 for (const DependencyEdge &DE : Node.OutgoingEdges) 411 if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 412 dumpDependencyEdge(OS, DE, MCIP); 413 414 OS << "\nRESOURCE DEPS\n"; 415 for (const DGNode &Node : Nodes) 416 for (const DependencyEdge &DE : Node.OutgoingEdges) 417 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 418 dumpDependencyEdge(OS, DE, MCIP); 419 } 420 #endif // NDEBUG 421 422 void DependencyGraph::addDependency(unsigned From, unsigned To, 423 DependencyEdge::Dependency &&Dep) { 424 DGNode &NodeFrom = Nodes[From]; 425 DGNode &NodeTo = Nodes[To]; 426 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 427 428 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 429 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 430 }); 431 432 if (It != Vec.end()) { 433 It->Dep.Cost += Dep.Cost; 434 It->Frequency++; 435 return; 436 } 437 438 DependencyEdge DE = {Dep, From, To, 1}; 439 Vec.emplace_back(DE); 440 NodeTo.NumPredecessors++; 441 } 442 443 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 444 MCInstPrinter &Printer, 445 ArrayRef<MCInst> S, unsigned NumIter) 446 : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), 447 DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), 448 PressureIncreasedBecauseOfResources(false), 449 PressureIncreasedBecauseOfRegisterDependencies(false), 450 PressureIncreasedBecauseOfMemoryDependencies(false), 451 SeenStallCycles(false), BPI() {} 452 453 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 454 unsigned RegID, unsigned Cost) { 455 bool IsLoopCarried = From >= To; 456 unsigned SourceSize = getSource().size(); 457 if (IsLoopCarried) { 458 DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 459 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 460 return; 461 } 462 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 463 } 464 465 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 466 unsigned Cost) { 467 bool IsLoopCarried = From >= To; 468 unsigned SourceSize = getSource().size(); 469 if (IsLoopCarried) { 470 DG.addMemoryDep(From, To + SourceSize, Cost); 471 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 472 return; 473 } 474 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 475 } 476 477 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 478 uint64_t Mask, unsigned Cost) { 479 bool IsLoopCarried = From >= To; 480 unsigned SourceSize = getSource().size(); 481 if (IsLoopCarried) { 482 DG.addResourceDep(From, To + SourceSize, Mask, Cost); 483 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 484 return; 485 } 486 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 487 } 488 489 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 490 const unsigned IID = Event.IR.getSourceIndex(); 491 if (Event.Type == HWInstructionEvent::Dispatched) { 492 Tracker.onInstructionDispatched(IID); 493 return; 494 } 495 if (Event.Type == HWInstructionEvent::Executed) { 496 Tracker.onInstructionExecuted(IID); 497 return; 498 } 499 500 if (Event.Type != HWInstructionEvent::Issued) 501 return; 502 503 ArrayRef<llvm::MCInst> Source = getSource(); 504 const Instruction &IS = *Event.IR.getInstruction(); 505 unsigned To = IID % Source.size(); 506 507 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 508 uint64_t ResourceMask = IS.getCriticalResourceMask(); 509 SmallVector<std::pair<unsigned, unsigned>, 4> Users; 510 while (ResourceMask) { 511 uint64_t Current = ResourceMask & (-ResourceMask); 512 Tracker.getResourceUsers(Current, Users); 513 for (const std::pair<unsigned, unsigned> &U : Users) 514 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 515 Users.clear(); 516 ResourceMask ^= Current; 517 } 518 519 const CriticalDependency &RegDep = IS.getCriticalRegDep(); 520 if (RegDep.Cycles) { 521 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 522 unsigned From = RegDep.IID % Source.size(); 523 addRegisterDep(From, To, RegDep.RegID, Cycles); 524 } 525 526 const CriticalDependency &MemDep = IS.getCriticalMemDep(); 527 if (MemDep.Cycles) { 528 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 529 unsigned From = MemDep.IID % Source.size(); 530 addMemoryDep(From, To, Cycles); 531 } 532 533 Tracker.handleInstructionIssuedEvent( 534 static_cast<const HWInstructionIssuedEvent &>(Event)); 535 536 // Check if this is the last simulated instruction. 537 if (IID == ((Iterations * Source.size()) - 1)) 538 DG.finalizeGraph(Iterations); 539 } 540 541 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 542 assert(Event.Reason != HWPressureEvent::INVALID && 543 "Unexpected invalid event!"); 544 545 Tracker.handlePressureEvent(Event); 546 547 switch (Event.Reason) { 548 default: 549 break; 550 551 case HWPressureEvent::RESOURCES: 552 PressureIncreasedBecauseOfResources = true; 553 break; 554 case HWPressureEvent::REGISTER_DEPS: 555 PressureIncreasedBecauseOfRegisterDependencies = true; 556 break; 557 case HWPressureEvent::MEMORY_DEPS: 558 PressureIncreasedBecauseOfMemoryDependencies = true; 559 break; 560 } 561 } 562 563 void BottleneckAnalysis::onCycleEnd() { 564 ++TotalCycles; 565 566 bool PressureIncreasedBecauseOfDataDependencies = 567 PressureIncreasedBecauseOfRegisterDependencies || 568 PressureIncreasedBecauseOfMemoryDependencies; 569 if (!PressureIncreasedBecauseOfResources && 570 !PressureIncreasedBecauseOfDataDependencies) 571 return; 572 573 ++BPI.PressureIncreaseCycles; 574 if (PressureIncreasedBecauseOfRegisterDependencies) 575 ++BPI.RegisterDependencyCycles; 576 if (PressureIncreasedBecauseOfMemoryDependencies) 577 ++BPI.MemoryDependencyCycles; 578 if (PressureIncreasedBecauseOfDataDependencies) 579 ++BPI.DataDependencyCycles; 580 if (PressureIncreasedBecauseOfResources) 581 ++BPI.ResourcePressureCycles; 582 PressureIncreasedBecauseOfResources = false; 583 PressureIncreasedBecauseOfRegisterDependencies = false; 584 PressureIncreasedBecauseOfMemoryDependencies = false; 585 } 586 587 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 588 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 589 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 590 return; 591 } 592 593 double PressurePerCycle = 594 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 595 double ResourcePressurePerCycle = 596 (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 597 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 598 double RegDepPressurePerCycle = 599 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 600 double MemDepPressurePerCycle = 601 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 602 603 OS << "\n\nCycles with backend pressure increase [ " 604 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 605 606 OS << "\nThroughput Bottlenecks: " 607 << "\n Resource Pressure [ " 608 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 609 << "% ]"; 610 611 if (BPI.PressureIncreaseCycles) { 612 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 613 const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); 614 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 615 unsigned ResourceCycles = Distribution[I]; 616 if (ResourceCycles) { 617 double Frequency = (double)ResourceCycles * 100 / TotalCycles; 618 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 619 OS << "\n - " << PRDesc.Name << " [ " 620 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 621 } 622 } 623 } 624 625 OS << "\n Data Dependencies: [ " 626 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 627 OS << "\n - Register Dependencies [ " 628 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 629 << "% ]"; 630 OS << "\n - Memory Dependencies [ " 631 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 632 << "% ]\n"; 633 } 634 635 void BottleneckAnalysis::printView(raw_ostream &OS) const { 636 std::string Buffer; 637 raw_string_ostream TempStream(Buffer); 638 printBottleneckHints(TempStream); 639 TempStream.flush(); 640 OS << Buffer; 641 printCriticalSequence(OS); 642 } 643 644 } // namespace mca. 645 } // namespace llvm 646