1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the functionalities used by the BottleneckAnalysis 11 /// to report bottleneck info. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "Views/BottleneckAnalysis.h" 16 #include "llvm/MC/MCInst.h" 17 #include "llvm/MCA/Support.h" 18 #include "llvm/Support/Format.h" 19 20 namespace llvm { 21 namespace mca { 22 23 #define DEBUG_TYPE "llvm-mca" 24 25 PressureTracker::PressureTracker(const MCSchedModel &Model) 26 : SM(Model), 27 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 28 ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 29 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 30 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 31 computeProcResourceMasks(SM, ProcResID2Mask); 32 33 // Ignore the invalid resource at index zero. 34 unsigned NextResourceUsersIdx = 0; 35 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 36 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 37 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 38 NextResourceUsersIdx += ProcResource.NumUnits; 39 uint64_t ResourceMask = ProcResID2Mask[I]; 40 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 41 } 42 43 ResourceUsers.resize(NextResourceUsersIdx); 44 std::fill(ResourceUsers.begin(), ResourceUsers.end(), 45 std::make_pair<unsigned, unsigned>(~0U, 0U)); 46 } 47 48 void PressureTracker::getResourceUsers(uint64_t ResourceMask, 49 SmallVectorImpl<User> &Users) const { 50 unsigned Index = getResourceStateIndex(ResourceMask); 51 unsigned ProcResID = ResIdx2ProcResID[Index]; 52 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 53 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 54 const User U = getResourceUser(ProcResID, I); 55 if (U.second && IPI.contains(U.first)) 56 Users.emplace_back(U); 57 } 58 } 59 60 void PressureTracker::onInstructionDispatched(unsigned IID) { 61 IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 62 } 63 64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 65 66 void PressureTracker::handleInstructionIssuedEvent( 67 const HWInstructionIssuedEvent &Event) { 68 unsigned IID = Event.IR.getSourceIndex(); 69 for (const ResourceUse &Use : Event.UsedResources) { 70 const ResourceRef &RR = Use.first; 71 unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 72 Index += llvm::countr_zero(RR.second); 73 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 74 } 75 } 76 77 void PressureTracker::updateResourcePressureDistribution( 78 uint64_t CumulativeMask) { 79 while (CumulativeMask) { 80 uint64_t Current = CumulativeMask & (-CumulativeMask); 81 unsigned ResIdx = getResourceStateIndex(Current); 82 unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 83 uint64_t Mask = ProcResID2Mask[ProcResID]; 84 85 if (Mask == Current) { 86 ResourcePressureDistribution[ProcResID]++; 87 CumulativeMask ^= Current; 88 continue; 89 } 90 91 Mask ^= Current; 92 while (Mask) { 93 uint64_t SubUnit = Mask & (-Mask); 94 ResIdx = getResourceStateIndex(SubUnit); 95 ProcResID = ResIdx2ProcResID[ResIdx]; 96 ResourcePressureDistribution[ProcResID]++; 97 Mask ^= SubUnit; 98 } 99 100 CumulativeMask ^= Current; 101 } 102 } 103 104 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 105 assert(Event.Reason != HWPressureEvent::INVALID && 106 "Unexpected invalid event!"); 107 108 switch (Event.Reason) { 109 default: 110 break; 111 112 case HWPressureEvent::RESOURCES: { 113 const uint64_t ResourceMask = Event.ResourceMask; 114 updateResourcePressureDistribution(Event.ResourceMask); 115 116 for (const InstRef &IR : Event.AffectedInstructions) { 117 const Instruction &IS = *IR.getInstruction(); 118 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 119 if (!BusyResources) 120 continue; 121 122 unsigned IID = IR.getSourceIndex(); 123 IPI[IID].ResourcePressureCycles++; 124 } 125 break; 126 } 127 128 case HWPressureEvent::REGISTER_DEPS: 129 for (const InstRef &IR : Event.AffectedInstructions) { 130 unsigned IID = IR.getSourceIndex(); 131 IPI[IID].RegisterPressureCycles++; 132 } 133 break; 134 135 case HWPressureEvent::MEMORY_DEPS: 136 for (const InstRef &IR : Event.AffectedInstructions) { 137 unsigned IID = IR.getSourceIndex(); 138 IPI[IID].MemoryPressureCycles++; 139 } 140 } 141 } 142 143 #ifndef NDEBUG 144 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 145 const DependencyEdge &DepEdge, 146 MCInstPrinter &MCIP) const { 147 unsigned FromIID = DepEdge.FromIID; 148 unsigned ToIID = DepEdge.ToIID; 149 assert(FromIID < ToIID && "Graph should be acyclic!"); 150 151 const DependencyEdge::Dependency &DE = DepEdge.Dep; 152 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 153 154 OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 155 if (DE.Type == DependencyEdge::DT_REGISTER) { 156 OS << " - REGISTER: "; 157 MCIP.printRegName(OS, DE.ResourceOrRegID); 158 } else if (DE.Type == DependencyEdge::DT_MEMORY) { 159 OS << " - MEMORY"; 160 } else { 161 assert(DE.Type == DependencyEdge::DT_RESOURCE && 162 "Unsupported dependency type!"); 163 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 164 } 165 OS << " - COST: " << DE.Cost << '\n'; 166 } 167 #endif // NDEBUG 168 169 void DependencyGraph::pruneEdges(unsigned Iterations) { 170 for (DGNode &N : Nodes) { 171 unsigned NumPruned = 0; 172 const unsigned Size = N.OutgoingEdges.size(); 173 // Use a cut-off threshold to prune edges with a low frequency. 174 for (unsigned I = 0, E = Size; I < E; ++I) { 175 DependencyEdge &Edge = N.OutgoingEdges[I]; 176 if (Edge.Frequency == Iterations) 177 continue; 178 double Factor = (double)Edge.Frequency / Iterations; 179 if (0.10 < Factor) 180 continue; 181 Nodes[Edge.ToIID].NumPredecessors--; 182 std::swap(Edge, N.OutgoingEdges[E - 1]); 183 --E; 184 ++NumPruned; 185 } 186 187 if (NumPruned) 188 N.OutgoingEdges.resize(Size - NumPruned); 189 } 190 } 191 192 void DependencyGraph::initializeRootSet( 193 SmallVectorImpl<unsigned> &RootSet) const { 194 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 195 const DGNode &N = Nodes[I]; 196 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 197 RootSet.emplace_back(I); 198 } 199 } 200 201 void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, 202 unsigned Iterations) { 203 SmallVector<unsigned, 8> ToVisit; 204 205 // A critical sequence is computed as the longest path from a node of the 206 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 207 // composed of nodes with at least one successor, and no predecessors. 208 // 209 // Each node of the graph starts with an initial default cost of zero. The 210 // cost of a node is a measure of criticality: the higher the cost, the bigger 211 // is the performance impact. 212 // For register and memory dependencies, the cost is a function of the write 213 // latency as well as the actual delay (in cycles) caused to users. 214 // For processor resource dependencies, the cost is a function of the resource 215 // pressure. Resource interferences with low frequency values are ignored. 216 // 217 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 218 // the inner loop selects (i.e. visits) a node N from a set of `unvisited 219 // nodes`, and then propagates the cost of N to all its neighbors. 220 // 221 // The `unvisited nodes` set initially contains all the nodes from the 222 // RootSet. A node N is added to the `unvisited nodes` if all its 223 // predecessors have been visited already. 224 // 225 // For simplicity, every node tracks the number of unvisited incoming edges in 226 // field `NumVisitedPredecessors`. When the value of that field drops to 227 // zero, then the corresponding node is added to a `ToVisit` set. 228 // 229 // At the end of every iteration of the outer loop, set `ToVisit` becomes our 230 // new `unvisited nodes` set. 231 // 232 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 233 // is empty. This algorithm works under the assumption that the graph is 234 // acyclic. 235 do { 236 for (unsigned IID : RootSet) { 237 const DGNode &N = Nodes[IID]; 238 for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 239 unsigned ToIID = DepEdge.ToIID; 240 DGNode &To = Nodes[ToIID]; 241 uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 242 // Check if this is the most expensive incoming edge seen so far. In 243 // case, update the total cost of the destination node (ToIID), as well 244 // its field `CriticalPredecessor`. 245 if (Cost > To.Cost) { 246 To.CriticalPredecessor = DepEdge; 247 To.Cost = Cost; 248 To.Depth = N.Depth + 1; 249 } 250 To.NumVisitedPredecessors++; 251 if (To.NumVisitedPredecessors == To.NumPredecessors) 252 ToVisit.emplace_back(ToIID); 253 } 254 } 255 256 std::swap(RootSet, ToVisit); 257 ToVisit.clear(); 258 } while (!RootSet.empty()); 259 } 260 261 void DependencyGraph::getCriticalSequence( 262 SmallVectorImpl<const DependencyEdge *> &Seq) const { 263 // At this stage, nodes of the graph have been already visited, and costs have 264 // been propagated through the edges (see method `propagateThroughEdges()`). 265 266 // Identify the node N with the highest cost in the graph. By construction, 267 // that node is the last instruction of our critical sequence. 268 // Field N.Depth would tell us the total length of the sequence. 269 // 270 // To obtain the sequence of critical edges, we simply follow the chain of 271 // critical predecessors starting from node N (field 272 // DGNode::CriticalPredecessor). 273 const auto It = std::max_element( 274 Nodes.begin(), Nodes.end(), 275 [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 276 unsigned IID = std::distance(Nodes.begin(), It); 277 Seq.resize(Nodes[IID].Depth); 278 for (const DependencyEdge *&DE : llvm::reverse(Seq)) { 279 const DGNode &N = Nodes[IID]; 280 DE = &N.CriticalPredecessor; 281 IID = N.CriticalPredecessor.FromIID; 282 } 283 } 284 285 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, 286 const MCInst &MCI, 287 bool UseDifferentColor) const { 288 FOS.PadToColumn(14); 289 if (UseDifferentColor) 290 FOS.changeColor(raw_ostream::CYAN, true, false); 291 FOS << printInstructionString(MCI); 292 if (UseDifferentColor) 293 FOS.resetColor(); 294 } 295 296 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 297 // Early exit if no bottlenecks were found during the simulation. 298 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 299 return; 300 301 SmallVector<const DependencyEdge *, 16> Seq; 302 DG.getCriticalSequence(Seq); 303 if (Seq.empty()) 304 return; 305 306 OS << "\nCritical sequence based on the simulation:\n\n"; 307 308 const DependencyEdge &FirstEdge = *Seq[0]; 309 ArrayRef<llvm::MCInst> Source = getSource(); 310 unsigned FromIID = FirstEdge.FromIID % Source.size(); 311 unsigned ToIID = FirstEdge.ToIID % Source.size(); 312 bool IsLoopCarried = FromIID >= ToIID; 313 314 formatted_raw_ostream FOS(OS); 315 FOS.PadToColumn(14); 316 FOS << "Instruction"; 317 FOS.PadToColumn(58); 318 FOS << "Dependency Information"; 319 320 bool HasColors = FOS.has_colors(); 321 322 unsigned CurrentIID = 0; 323 if (IsLoopCarried) { 324 FOS << "\n +----< " << FromIID << "."; 325 printInstruction(FOS, Source[FromIID], HasColors); 326 FOS << "\n |\n | < loop carried > \n |"; 327 } else { 328 while (CurrentIID < FromIID) { 329 FOS << "\n " << CurrentIID << "."; 330 printInstruction(FOS, Source[CurrentIID]); 331 CurrentIID++; 332 } 333 334 FOS << "\n +----< " << CurrentIID << "."; 335 printInstruction(FOS, Source[CurrentIID], HasColors); 336 CurrentIID++; 337 } 338 339 for (const DependencyEdge *&DE : Seq) { 340 ToIID = DE->ToIID % Source.size(); 341 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 342 343 while (CurrentIID < LastIID) { 344 FOS << "\n | " << CurrentIID << "."; 345 printInstruction(FOS, Source[CurrentIID]); 346 CurrentIID++; 347 } 348 349 if (CurrentIID == ToIID) { 350 FOS << "\n +----> " << ToIID << "."; 351 printInstruction(FOS, Source[CurrentIID], HasColors); 352 } else { 353 FOS << "\n |\n | < loop carried > \n |" 354 << "\n +----> " << ToIID << "."; 355 printInstruction(FOS, Source[ToIID], HasColors); 356 } 357 FOS.PadToColumn(58); 358 359 const DependencyEdge::Dependency &Dep = DE->Dep; 360 if (HasColors) 361 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 362 363 if (Dep.Type == DependencyEdge::DT_REGISTER) { 364 FOS << "## REGISTER dependency: "; 365 if (HasColors) 366 FOS.changeColor(raw_ostream::MAGENTA, true, false); 367 getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID); 368 } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 369 FOS << "## MEMORY dependency."; 370 } else { 371 assert(Dep.Type == DependencyEdge::DT_RESOURCE && 372 "Unsupported dependency type!"); 373 FOS << "## RESOURCE interference: "; 374 if (HasColors) 375 FOS.changeColor(raw_ostream::MAGENTA, true, false); 376 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 377 if (HasColors) { 378 FOS.resetColor(); 379 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 380 } 381 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 382 << "% ]"; 383 } 384 if (HasColors) 385 FOS.resetColor(); 386 ++CurrentIID; 387 } 388 389 while (CurrentIID < Source.size()) { 390 FOS << "\n " << CurrentIID << "."; 391 printInstruction(FOS, Source[CurrentIID]); 392 CurrentIID++; 393 } 394 395 FOS << '\n'; 396 FOS.flush(); 397 } 398 399 #ifndef NDEBUG 400 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 401 OS << "\nREG DEPS\n"; 402 for (const DGNode &Node : Nodes) 403 for (const DependencyEdge &DE : Node.OutgoingEdges) 404 if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 405 dumpDependencyEdge(OS, DE, MCIP); 406 407 OS << "\nMEM DEPS\n"; 408 for (const DGNode &Node : Nodes) 409 for (const DependencyEdge &DE : Node.OutgoingEdges) 410 if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 411 dumpDependencyEdge(OS, DE, MCIP); 412 413 OS << "\nRESOURCE DEPS\n"; 414 for (const DGNode &Node : Nodes) 415 for (const DependencyEdge &DE : Node.OutgoingEdges) 416 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 417 dumpDependencyEdge(OS, DE, MCIP); 418 } 419 #endif // NDEBUG 420 421 void DependencyGraph::addDependency(unsigned From, unsigned To, 422 DependencyEdge::Dependency &&Dep) { 423 DGNode &NodeFrom = Nodes[From]; 424 DGNode &NodeTo = Nodes[To]; 425 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 426 427 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 428 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 429 }); 430 431 if (It != Vec.end()) { 432 It->Dep.Cost += Dep.Cost; 433 It->Frequency++; 434 return; 435 } 436 437 DependencyEdge DE = {Dep, From, To, 1}; 438 Vec.emplace_back(DE); 439 NodeTo.NumPredecessors++; 440 } 441 442 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 443 MCInstPrinter &Printer, 444 ArrayRef<MCInst> S, unsigned NumIter) 445 : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), 446 DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), 447 PressureIncreasedBecauseOfResources(false), 448 PressureIncreasedBecauseOfRegisterDependencies(false), 449 PressureIncreasedBecauseOfMemoryDependencies(false), 450 SeenStallCycles(false), BPI() {} 451 452 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 453 unsigned RegID, unsigned Cost) { 454 bool IsLoopCarried = From >= To; 455 unsigned SourceSize = getSource().size(); 456 if (IsLoopCarried) { 457 DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 458 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 459 return; 460 } 461 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 462 } 463 464 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 465 unsigned Cost) { 466 bool IsLoopCarried = From >= To; 467 unsigned SourceSize = getSource().size(); 468 if (IsLoopCarried) { 469 DG.addMemoryDep(From, To + SourceSize, Cost); 470 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 471 return; 472 } 473 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 474 } 475 476 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 477 uint64_t Mask, unsigned Cost) { 478 bool IsLoopCarried = From >= To; 479 unsigned SourceSize = getSource().size(); 480 if (IsLoopCarried) { 481 DG.addResourceDep(From, To + SourceSize, Mask, Cost); 482 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 483 return; 484 } 485 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 486 } 487 488 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 489 const unsigned IID = Event.IR.getSourceIndex(); 490 if (Event.Type == HWInstructionEvent::Dispatched) { 491 Tracker.onInstructionDispatched(IID); 492 return; 493 } 494 if (Event.Type == HWInstructionEvent::Executed) { 495 Tracker.onInstructionExecuted(IID); 496 return; 497 } 498 499 if (Event.Type != HWInstructionEvent::Issued) 500 return; 501 502 ArrayRef<llvm::MCInst> Source = getSource(); 503 const Instruction &IS = *Event.IR.getInstruction(); 504 unsigned To = IID % Source.size(); 505 506 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 507 uint64_t ResourceMask = IS.getCriticalResourceMask(); 508 SmallVector<std::pair<unsigned, unsigned>, 4> Users; 509 while (ResourceMask) { 510 uint64_t Current = ResourceMask & (-ResourceMask); 511 Tracker.getResourceUsers(Current, Users); 512 for (const std::pair<unsigned, unsigned> &U : Users) 513 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 514 Users.clear(); 515 ResourceMask ^= Current; 516 } 517 518 const CriticalDependency &RegDep = IS.getCriticalRegDep(); 519 if (RegDep.Cycles) { 520 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 521 unsigned From = RegDep.IID % Source.size(); 522 addRegisterDep(From, To, RegDep.RegID, Cycles); 523 } 524 525 const CriticalDependency &MemDep = IS.getCriticalMemDep(); 526 if (MemDep.Cycles) { 527 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 528 unsigned From = MemDep.IID % Source.size(); 529 addMemoryDep(From, To, Cycles); 530 } 531 532 Tracker.handleInstructionIssuedEvent( 533 static_cast<const HWInstructionIssuedEvent &>(Event)); 534 535 // Check if this is the last simulated instruction. 536 if (IID == ((Iterations * Source.size()) - 1)) 537 DG.finalizeGraph(Iterations); 538 } 539 540 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 541 assert(Event.Reason != HWPressureEvent::INVALID && 542 "Unexpected invalid event!"); 543 544 Tracker.handlePressureEvent(Event); 545 546 switch (Event.Reason) { 547 default: 548 break; 549 550 case HWPressureEvent::RESOURCES: 551 PressureIncreasedBecauseOfResources = true; 552 break; 553 case HWPressureEvent::REGISTER_DEPS: 554 PressureIncreasedBecauseOfRegisterDependencies = true; 555 break; 556 case HWPressureEvent::MEMORY_DEPS: 557 PressureIncreasedBecauseOfMemoryDependencies = true; 558 break; 559 } 560 } 561 562 void BottleneckAnalysis::onCycleEnd() { 563 ++TotalCycles; 564 565 bool PressureIncreasedBecauseOfDataDependencies = 566 PressureIncreasedBecauseOfRegisterDependencies || 567 PressureIncreasedBecauseOfMemoryDependencies; 568 if (!PressureIncreasedBecauseOfResources && 569 !PressureIncreasedBecauseOfDataDependencies) 570 return; 571 572 ++BPI.PressureIncreaseCycles; 573 if (PressureIncreasedBecauseOfRegisterDependencies) 574 ++BPI.RegisterDependencyCycles; 575 if (PressureIncreasedBecauseOfMemoryDependencies) 576 ++BPI.MemoryDependencyCycles; 577 if (PressureIncreasedBecauseOfDataDependencies) 578 ++BPI.DataDependencyCycles; 579 if (PressureIncreasedBecauseOfResources) 580 ++BPI.ResourcePressureCycles; 581 PressureIncreasedBecauseOfResources = false; 582 PressureIncreasedBecauseOfRegisterDependencies = false; 583 PressureIncreasedBecauseOfMemoryDependencies = false; 584 } 585 586 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 587 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 588 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 589 return; 590 } 591 592 double PressurePerCycle = 593 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 594 double ResourcePressurePerCycle = 595 (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 596 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 597 double RegDepPressurePerCycle = 598 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 599 double MemDepPressurePerCycle = 600 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 601 602 OS << "\n\nCycles with backend pressure increase [ " 603 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 604 605 OS << "\nThroughput Bottlenecks: " 606 << "\n Resource Pressure [ " 607 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 608 << "% ]"; 609 610 if (BPI.PressureIncreaseCycles) { 611 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 612 const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); 613 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 614 unsigned ReleaseAtCycles = Distribution[I]; 615 if (ReleaseAtCycles) { 616 double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles; 617 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 618 OS << "\n - " << PRDesc.Name << " [ " 619 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 620 } 621 } 622 } 623 624 OS << "\n Data Dependencies: [ " 625 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 626 OS << "\n - Register Dependencies [ " 627 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 628 << "% ]"; 629 OS << "\n - Memory Dependencies [ " 630 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 631 << "% ]\n"; 632 } 633 634 void BottleneckAnalysis::printView(raw_ostream &OS) const { 635 std::string Buffer; 636 raw_string_ostream TempStream(Buffer); 637 printBottleneckHints(TempStream); 638 TempStream.flush(); 639 OS << Buffer; 640 printCriticalSequence(OS); 641 } 642 643 } // namespace mca. 644 } // namespace llvm 645