1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// 10 /// This file implements the functionalities used by the BottleneckAnalysis 11 /// to report bottleneck info. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "Views/BottleneckAnalysis.h" 16 #include "llvm/MC/MCInst.h" 17 #include "llvm/MCA/Support.h" 18 #include "llvm/Support/Format.h" 19 #include "llvm/Support/FormattedStream.h" 20 21 namespace llvm { 22 namespace mca { 23 24 #define DEBUG_TYPE "llvm-mca" 25 26 PressureTracker::PressureTracker(const MCSchedModel &Model) 27 : SM(Model), 28 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 29 ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 30 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 31 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 32 computeProcResourceMasks(SM, ProcResID2Mask); 33 34 // Ignore the invalid resource at index zero. 35 unsigned NextResourceUsersIdx = 0; 36 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 37 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 38 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 39 NextResourceUsersIdx += ProcResource.NumUnits; 40 uint64_t ResourceMask = ProcResID2Mask[I]; 41 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 42 } 43 44 ResourceUsers.resize(NextResourceUsersIdx); 45 std::fill(ResourceUsers.begin(), ResourceUsers.end(), 46 std::make_pair<unsigned, unsigned>(~0U, 0U)); 47 } 48 49 void PressureTracker::getResourceUsers(uint64_t ResourceMask, 50 SmallVectorImpl<User> &Users) const { 51 unsigned Index = getResourceStateIndex(ResourceMask); 52 unsigned ProcResID = ResIdx2ProcResID[Index]; 53 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 54 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 55 const User U = getResourceUser(ProcResID, I); 56 if (U.second && IPI.find(U.first) != IPI.end()) 57 Users.emplace_back(U); 58 } 59 } 60 61 void PressureTracker::onInstructionDispatched(unsigned IID) { 62 IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 63 } 64 65 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 66 67 void PressureTracker::handleInstructionIssuedEvent( 68 const HWInstructionIssuedEvent &Event) { 69 unsigned IID = Event.IR.getSourceIndex(); 70 using ResourceRef = HWInstructionIssuedEvent::ResourceRef; 71 using ResourceUse = std::pair<ResourceRef, ResourceCycles>; 72 for (const ResourceUse &Use : Event.UsedResources) { 73 const ResourceRef &RR = Use.first; 74 unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 75 Index += countTrailingZeros(RR.second); 76 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 77 } 78 } 79 80 void PressureTracker::updateResourcePressureDistribution( 81 uint64_t CumulativeMask) { 82 while (CumulativeMask) { 83 uint64_t Current = CumulativeMask & (-CumulativeMask); 84 unsigned ResIdx = getResourceStateIndex(Current); 85 unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 86 uint64_t Mask = ProcResID2Mask[ProcResID]; 87 88 if (Mask == Current) { 89 ResourcePressureDistribution[ProcResID]++; 90 CumulativeMask ^= Current; 91 continue; 92 } 93 94 Mask ^= Current; 95 while (Mask) { 96 uint64_t SubUnit = Mask & (-Mask); 97 ResIdx = getResourceStateIndex(SubUnit); 98 ProcResID = ResIdx2ProcResID[ResIdx]; 99 ResourcePressureDistribution[ProcResID]++; 100 Mask ^= SubUnit; 101 } 102 103 CumulativeMask ^= Current; 104 } 105 } 106 107 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 108 assert(Event.Reason != HWPressureEvent::INVALID && 109 "Unexpected invalid event!"); 110 111 switch (Event.Reason) { 112 default: 113 break; 114 115 case HWPressureEvent::RESOURCES: { 116 const uint64_t ResourceMask = Event.ResourceMask; 117 updateResourcePressureDistribution(Event.ResourceMask); 118 119 for (const InstRef &IR : Event.AffectedInstructions) { 120 const Instruction &IS = *IR.getInstruction(); 121 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 122 if (!BusyResources) 123 continue; 124 125 unsigned IID = IR.getSourceIndex(); 126 IPI[IID].ResourcePressureCycles++; 127 } 128 break; 129 } 130 131 case HWPressureEvent::REGISTER_DEPS: 132 for (const InstRef &IR : Event.AffectedInstructions) { 133 unsigned IID = IR.getSourceIndex(); 134 IPI[IID].RegisterPressureCycles++; 135 } 136 break; 137 138 case HWPressureEvent::MEMORY_DEPS: 139 for (const InstRef &IR : Event.AffectedInstructions) { 140 unsigned IID = IR.getSourceIndex(); 141 IPI[IID].MemoryPressureCycles++; 142 } 143 } 144 } 145 146 #ifndef NDEBUG 147 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 148 const DependencyEdge &DepEdge, 149 MCInstPrinter &MCIP) const { 150 unsigned FromIID = DepEdge.FromIID; 151 unsigned ToIID = DepEdge.ToIID; 152 assert(FromIID < ToIID && "Graph should be acyclic!"); 153 154 const DependencyEdge::Dependency &DE = DepEdge.Dep; 155 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 156 157 OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 158 if (DE.Type == DependencyEdge::DT_REGISTER) { 159 OS << " - REGISTER: "; 160 MCIP.printRegName(OS, DE.ResourceOrRegID); 161 } else if (DE.Type == DependencyEdge::DT_MEMORY) { 162 OS << " - MEMORY"; 163 } else { 164 assert(DE.Type == DependencyEdge::DT_RESOURCE && 165 "Unsupported dependency type!"); 166 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 167 } 168 OS << " - COST: " << DE.Cost << '\n'; 169 } 170 #endif // NDEBUG 171 172 void DependencyGraph::pruneEdges(unsigned Iterations) { 173 for (DGNode &N : Nodes) { 174 unsigned NumPruned = 0; 175 const unsigned Size = N.OutgoingEdges.size(); 176 // Use a cut-off threshold to prune edges with a low frequency. 177 for (unsigned I = 0, E = Size; I < E; ++I) { 178 DependencyEdge &Edge = N.OutgoingEdges[I]; 179 if (Edge.Frequency == Iterations) 180 continue; 181 double Factor = (double)Edge.Frequency / Iterations; 182 if (0.10 < Factor) 183 continue; 184 Nodes[Edge.ToIID].NumPredecessors--; 185 std::swap(Edge, N.OutgoingEdges[E - 1]); 186 --E; 187 ++NumPruned; 188 } 189 190 if (NumPruned) 191 N.OutgoingEdges.resize(Size - NumPruned); 192 } 193 } 194 195 void DependencyGraph::initializeRootSet( 196 SmallVectorImpl<unsigned> &RootSet) const { 197 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 198 const DGNode &N = Nodes[I]; 199 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 200 RootSet.emplace_back(I); 201 } 202 } 203 204 void DependencyGraph::propagateThroughEdges( 205 SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) { 206 SmallVector<unsigned, 8> ToVisit; 207 208 // A critical sequence is computed as the longest path from a node of the 209 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 210 // composed of nodes with at least one successor, and no predecessors. 211 // 212 // Each node of the graph starts with an initial default cost of zero. The 213 // cost of a node is a measure of criticality: the higher the cost, the bigger 214 // is the performance impact. 215 // For register and memory dependencies, the cost is a function of the write 216 // latency as well as the actual delay (in cycles) caused to users. 217 // For processor resource dependencies, the cost is a function of the resource 218 // pressure. Resource interferences with low frequency values are ignored. 219 // 220 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 221 // the inner loop selects (i.e. visits) a node N from a set of `unvisited 222 // nodes`, and then propagates the cost of N to all its neighbors. 223 // 224 // The `unvisited nodes` set initially contains all the nodes from the 225 // RootSet. A node N is added to the `unvisited nodes` if all its 226 // predecessors have been visited already. 227 // 228 // For simplicity, every node tracks the number of unvisited incoming edges in 229 // field `NumVisitedPredecessors`. When the value of that field drops to 230 // zero, then the corresponding node is added to a `ToVisit` set. 231 // 232 // At the end of every iteration of the outer loop, set `ToVisit` becomes our 233 // new `unvisited nodes` set. 234 // 235 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 236 // is empty. This algorithm works under the assumption that the graph is 237 // acyclic. 238 do { 239 for (unsigned IID : RootSet) { 240 const DGNode &N = Nodes[IID]; 241 for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 242 unsigned ToIID = DepEdge.ToIID; 243 DGNode &To = Nodes[ToIID]; 244 uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 245 // Check if this is the most expensive incoming edge seen so far. In 246 // case, update the total cost of the destination node (ToIID), as well 247 // its field `CriticalPredecessor`. 248 if (Cost > To.Cost) { 249 To.CriticalPredecessor = DepEdge; 250 To.Cost = Cost; 251 To.Depth = N.Depth + 1; 252 } 253 To.NumVisitedPredecessors++; 254 if (To.NumVisitedPredecessors == To.NumPredecessors) 255 ToVisit.emplace_back(ToIID); 256 } 257 } 258 259 std::swap(RootSet, ToVisit); 260 ToVisit.clear(); 261 } while (!RootSet.empty()); 262 } 263 264 void DependencyGraph::getCriticalSequence( 265 SmallVectorImpl<const DependencyEdge *> &Seq) const { 266 // At this stage, nodes of the graph have been already visited, and costs have 267 // been propagated through the edges (see method `propagateThroughEdges()`). 268 269 // Identify the node N with the highest cost in the graph. By construction, 270 // that node is the last instruction of our critical sequence. 271 // Field N.Depth would tell us the total length of the sequence. 272 // 273 // To obtain the sequence of critical edges, we simply follow the chain of critical 274 // predecessors starting from node N (field DGNode::CriticalPredecessor). 275 const auto It = std::max_element( 276 Nodes.begin(), Nodes.end(), 277 [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; }); 278 unsigned IID = std::distance(Nodes.begin(), It); 279 Seq.resize(Nodes[IID].Depth); 280 for (unsigned I = Seq.size(), E = 0; I > E; --I) { 281 const DGNode &N = Nodes[IID]; 282 Seq[I - 1] = &N.CriticalPredecessor; 283 IID = N.CriticalPredecessor.FromIID; 284 } 285 } 286 287 static void printInstruction(formatted_raw_ostream &FOS, 288 const MCSubtargetInfo &STI, MCInstPrinter &MCIP, 289 const MCInst &MCI, 290 bool UseDifferentColor = false) { 291 std::string Instruction; 292 raw_string_ostream InstrStream(Instruction); 293 294 FOS.PadToColumn(14); 295 296 MCIP.printInst(&MCI, 0, "", STI, InstrStream); 297 InstrStream.flush(); 298 299 if (UseDifferentColor) 300 FOS.changeColor(raw_ostream::CYAN, true, false); 301 FOS << StringRef(Instruction).ltrim(); 302 if (UseDifferentColor) 303 FOS.resetColor(); 304 } 305 306 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 307 // Early exit if no bottlenecks were found during the simulation. 308 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 309 return; 310 311 SmallVector<const DependencyEdge *, 16> Seq; 312 DG.getCriticalSequence(Seq); 313 if (Seq.empty()) 314 return; 315 316 OS << "\nCritical sequence based on the simulation:\n\n"; 317 318 const DependencyEdge &FirstEdge = *Seq[0]; 319 unsigned FromIID = FirstEdge.FromIID % Source.size(); 320 unsigned ToIID = FirstEdge.ToIID % Source.size(); 321 bool IsLoopCarried = FromIID >= ToIID; 322 323 formatted_raw_ostream FOS(OS); 324 FOS.PadToColumn(14); 325 FOS << "Instruction"; 326 FOS.PadToColumn(58); 327 FOS << "Dependency Information"; 328 329 bool HasColors = FOS.has_colors(); 330 331 unsigned CurrentIID = 0; 332 if (IsLoopCarried) { 333 FOS << "\n +----< " << FromIID << "."; 334 printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors); 335 FOS << "\n |\n | < loop carried > \n |"; 336 } else { 337 while (CurrentIID < FromIID) { 338 FOS << "\n " << CurrentIID << "."; 339 printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 340 CurrentIID++; 341 } 342 343 FOS << "\n +----< " << CurrentIID << "."; 344 printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 345 CurrentIID++; 346 } 347 348 for (const DependencyEdge *&DE : Seq) { 349 ToIID = DE->ToIID % Source.size(); 350 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 351 352 while (CurrentIID < LastIID) { 353 FOS << "\n | " << CurrentIID << "."; 354 printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 355 CurrentIID++; 356 } 357 358 if (CurrentIID == ToIID) { 359 FOS << "\n +----> " << ToIID << "."; 360 printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors); 361 } else { 362 FOS << "\n |\n | < loop carried > \n |" 363 << "\n +----> " << ToIID << "."; 364 printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors); 365 } 366 FOS.PadToColumn(58); 367 368 const DependencyEdge::Dependency &Dep = DE->Dep; 369 if (HasColors) 370 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 371 372 if (Dep.Type == DependencyEdge::DT_REGISTER) { 373 FOS << "## REGISTER dependency: "; 374 if (HasColors) 375 FOS.changeColor(raw_ostream::MAGENTA, true, false); 376 MCIP.printRegName(FOS, Dep.ResourceOrRegID); 377 } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 378 FOS << "## MEMORY dependency."; 379 } else { 380 assert(Dep.Type == DependencyEdge::DT_RESOURCE && 381 "Unsupported dependency type!"); 382 FOS << "## RESOURCE interference: "; 383 if (HasColors) 384 FOS.changeColor(raw_ostream::MAGENTA, true, false); 385 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 386 if (HasColors) { 387 FOS.resetColor(); 388 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 389 } 390 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 391 << "% ]"; 392 } 393 if (HasColors) 394 FOS.resetColor(); 395 ++CurrentIID; 396 } 397 398 while (CurrentIID < Source.size()) { 399 FOS << "\n " << CurrentIID << "."; 400 printInstruction(FOS, STI, MCIP, Source[CurrentIID]); 401 CurrentIID++; 402 } 403 404 FOS << '\n'; 405 FOS.flush(); 406 } 407 408 #ifndef NDEBUG 409 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 410 OS << "\nREG DEPS\n"; 411 for (const DGNode &Node : Nodes) 412 for (const DependencyEdge &DE : Node.OutgoingEdges) 413 if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 414 dumpDependencyEdge(OS, DE, MCIP); 415 416 OS << "\nMEM DEPS\n"; 417 for (const DGNode &Node : Nodes) 418 for (const DependencyEdge &DE : Node.OutgoingEdges) 419 if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 420 dumpDependencyEdge(OS, DE, MCIP); 421 422 OS << "\nRESOURCE DEPS\n"; 423 for (const DGNode &Node : Nodes) 424 for (const DependencyEdge &DE : Node.OutgoingEdges) 425 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 426 dumpDependencyEdge(OS, DE, MCIP); 427 } 428 #endif // NDEBUG 429 430 void DependencyGraph::addDependency(unsigned From, unsigned To, 431 DependencyEdge::Dependency &&Dep) { 432 DGNode &NodeFrom = Nodes[From]; 433 DGNode &NodeTo = Nodes[To]; 434 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 435 436 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 437 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 438 }); 439 440 if (It != Vec.end()) { 441 It->Dep.Cost += Dep.Cost; 442 It->Frequency++; 443 return; 444 } 445 446 DependencyEdge DE = {Dep, From, To, 1}; 447 Vec.emplace_back(DE); 448 NodeTo.NumPredecessors++; 449 } 450 451 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 452 MCInstPrinter &Printer, 453 ArrayRef<MCInst> S, unsigned NumIter) 454 : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3), 455 Source(S), Iterations(NumIter), TotalCycles(0), 456 PressureIncreasedBecauseOfResources(false), 457 PressureIncreasedBecauseOfRegisterDependencies(false), 458 PressureIncreasedBecauseOfMemoryDependencies(false), 459 SeenStallCycles(false), BPI() {} 460 461 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 462 unsigned RegID, unsigned Cost) { 463 bool IsLoopCarried = From >= To; 464 unsigned SourceSize = Source.size(); 465 if (IsLoopCarried) { 466 DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 467 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 468 return; 469 } 470 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 471 } 472 473 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 474 unsigned Cost) { 475 bool IsLoopCarried = From >= To; 476 unsigned SourceSize = Source.size(); 477 if (IsLoopCarried) { 478 DG.addMemoryDep(From, To + SourceSize, Cost); 479 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 480 return; 481 } 482 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 483 } 484 485 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 486 uint64_t Mask, unsigned Cost) { 487 bool IsLoopCarried = From >= To; 488 unsigned SourceSize = Source.size(); 489 if (IsLoopCarried) { 490 DG.addResourceDep(From, To + SourceSize, Mask, Cost); 491 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 492 return; 493 } 494 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 495 } 496 497 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 498 const unsigned IID = Event.IR.getSourceIndex(); 499 if (Event.Type == HWInstructionEvent::Dispatched) { 500 Tracker.onInstructionDispatched(IID); 501 return; 502 } 503 if (Event.Type == HWInstructionEvent::Executed) { 504 Tracker.onInstructionExecuted(IID); 505 return; 506 } 507 508 if (Event.Type != HWInstructionEvent::Issued) 509 return; 510 511 const Instruction &IS = *Event.IR.getInstruction(); 512 unsigned To = IID % Source.size(); 513 514 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 515 uint64_t ResourceMask = IS.getCriticalResourceMask(); 516 SmallVector<std::pair<unsigned, unsigned>, 4> Users; 517 while (ResourceMask) { 518 uint64_t Current = ResourceMask & (-ResourceMask); 519 Tracker.getResourceUsers(Current, Users); 520 for (const std::pair<unsigned, unsigned> &U : Users) 521 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 522 Users.clear(); 523 ResourceMask ^= Current; 524 } 525 526 const CriticalDependency &RegDep = IS.getCriticalRegDep(); 527 if (RegDep.Cycles) { 528 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 529 unsigned From = RegDep.IID % Source.size(); 530 addRegisterDep(From, To, RegDep.RegID, Cycles); 531 } 532 533 const CriticalDependency &MemDep = IS.getCriticalMemDep(); 534 if (MemDep.Cycles) { 535 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 536 unsigned From = MemDep.IID % Source.size(); 537 addMemoryDep(From, To, Cycles); 538 } 539 540 Tracker.handleInstructionIssuedEvent( 541 static_cast<const HWInstructionIssuedEvent &>(Event)); 542 543 // Check if this is the last simulated instruction. 544 if (IID == ((Iterations * Source.size()) - 1)) 545 DG.finalizeGraph(Iterations); 546 } 547 548 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 549 assert(Event.Reason != HWPressureEvent::INVALID && 550 "Unexpected invalid event!"); 551 552 Tracker.handlePressureEvent(Event); 553 554 switch (Event.Reason) { 555 default: 556 break; 557 558 case HWPressureEvent::RESOURCES: 559 PressureIncreasedBecauseOfResources = true; 560 break; 561 case HWPressureEvent::REGISTER_DEPS: 562 PressureIncreasedBecauseOfRegisterDependencies = true; 563 break; 564 case HWPressureEvent::MEMORY_DEPS: 565 PressureIncreasedBecauseOfMemoryDependencies = true; 566 break; 567 } 568 } 569 570 void BottleneckAnalysis::onCycleEnd() { 571 ++TotalCycles; 572 573 bool PressureIncreasedBecauseOfDataDependencies = 574 PressureIncreasedBecauseOfRegisterDependencies || 575 PressureIncreasedBecauseOfMemoryDependencies; 576 if (!PressureIncreasedBecauseOfResources && 577 !PressureIncreasedBecauseOfDataDependencies) 578 return; 579 580 ++BPI.PressureIncreaseCycles; 581 if (PressureIncreasedBecauseOfRegisterDependencies) 582 ++BPI.RegisterDependencyCycles; 583 if (PressureIncreasedBecauseOfMemoryDependencies) 584 ++BPI.MemoryDependencyCycles; 585 if (PressureIncreasedBecauseOfDataDependencies) 586 ++BPI.DataDependencyCycles; 587 if (PressureIncreasedBecauseOfResources) 588 ++BPI.ResourcePressureCycles; 589 PressureIncreasedBecauseOfResources = false; 590 PressureIncreasedBecauseOfRegisterDependencies = false; 591 PressureIncreasedBecauseOfMemoryDependencies = false; 592 } 593 594 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 595 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 596 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 597 return; 598 } 599 600 double PressurePerCycle = 601 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 602 double ResourcePressurePerCycle = 603 (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 604 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 605 double RegDepPressurePerCycle = 606 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 607 double MemDepPressurePerCycle = 608 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 609 610 OS << "\n\nCycles with backend pressure increase [ " 611 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 612 613 OS << "\nThroughput Bottlenecks: " 614 << "\n Resource Pressure [ " 615 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 616 << "% ]"; 617 618 if (BPI.PressureIncreaseCycles) { 619 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 620 const MCSchedModel &SM = STI.getSchedModel(); 621 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 622 unsigned ResourceCycles = Distribution[I]; 623 if (ResourceCycles) { 624 double Frequency = (double)ResourceCycles * 100 / TotalCycles; 625 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 626 OS << "\n - " << PRDesc.Name << " [ " 627 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 628 } 629 } 630 } 631 632 OS << "\n Data Dependencies: [ " 633 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 634 OS << "\n - Register Dependencies [ " 635 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 636 << "% ]"; 637 OS << "\n - Memory Dependencies [ " 638 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 639 << "% ]\n"; 640 } 641 642 void BottleneckAnalysis::printView(raw_ostream &OS) const { 643 std::string Buffer; 644 raw_string_ostream TempStream(Buffer); 645 printBottleneckHints(TempStream); 646 TempStream.flush(); 647 OS << Buffer; 648 printCriticalSequence(OS); 649 } 650 651 } // namespace mca. 652 } // namespace llvm 653