1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a profile inference algorithm. Given an incomplete and 10 // possibly imprecise block counts, the algorithm reconstructs realistic block 11 // and edge counts that satisfy flow conservation rules, while minimally modify 12 // input block counts. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Utils/SampleProfileInference.h" 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include <queue> 21 #include <set> 22 #include <stack> 23 24 using namespace llvm; 25 #define DEBUG_TYPE "sample-profile-inference" 26 27 namespace { 28 29 static cl::opt<bool> SampleProfileEvenCountDistribution( 30 "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, 31 cl::desc("Try to evenly distribute counts when there are multiple equally " 32 "likely options.")); 33 34 static cl::opt<unsigned> SampleProfileMaxDfsCalls( 35 "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, 36 cl::desc("Maximum number of dfs iterations for even count distribution.")); 37 38 static cl::opt<unsigned> SampleProfileProfiCostInc( 39 "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, 40 cl::desc("A cost of increasing a block's count by one.")); 41 42 static cl::opt<unsigned> SampleProfileProfiCostDec( 43 "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, 44 cl::desc("A cost of decreasing a block's count by one.")); 45 46 static cl::opt<unsigned> SampleProfileProfiCostIncZero( 47 "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, 48 cl::desc("A cost of increasing a count of zero-weight block by one.")); 49 50 static cl::opt<unsigned> SampleProfileProfiCostIncEntry( 51 "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, 52 cl::desc("A cost of increasing the entry block's count by one.")); 53 54 static cl::opt<unsigned> SampleProfileProfiCostDecEntry( 55 "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, 56 cl::desc("A cost of decreasing the entry block's count by one.")); 57 58 /// A value indicating an infinite flow/capacity/weight of a block/edge. 59 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 60 /// during the execution. 61 static constexpr int64_t INF = ((int64_t)1) << 50; 62 63 /// The minimum-cost maximum flow algorithm. 64 /// 65 /// The algorithm finds the maximum flow of minimum cost on a given (directed) 66 /// network using a modified version of the classical Moore-Bellman-Ford 67 /// approach. The algorithm applies a number of augmentation iterations in which 68 /// flow is sent along paths of positive capacity from the source to the sink. 69 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 70 /// where m is the number of edges, n is the number of vertices, and v(f) is the 71 /// value of the maximum flow. However, the observed running time on typical 72 /// instances is sub-quadratic, that is, o(n^2). 73 /// 74 /// The input is a set of edges with specified costs and capacities, and a pair 75 /// of nodes (source and sink). The output is the flow along each edge of the 76 /// minimum total cost respecting the given edge capacities. 77 class MinCostMaxFlow { 78 public: 79 // Initialize algorithm's data structures for a network of a given size. 80 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 81 Source = SourceNode; 82 Target = SinkNode; 83 84 Nodes = std::vector<Node>(NodeCount); 85 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 86 if (SampleProfileEvenCountDistribution) 87 AugmentingEdges = 88 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); 89 } 90 91 // Run the algorithm. 92 int64_t run() { 93 // Iteratively find an augmentation path/dag in the network and send the 94 // flow along its edges 95 size_t AugmentationIters = applyFlowAugmentation(); 96 97 // Compute the total flow and its cost 98 int64_t TotalCost = 0; 99 int64_t TotalFlow = 0; 100 for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 101 for (auto &Edge : Edges[Src]) { 102 if (Edge.Flow > 0) { 103 TotalCost += Edge.Cost * Edge.Flow; 104 if (Src == Source) 105 TotalFlow += Edge.Flow; 106 } 107 } 108 } 109 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 110 << " iterations with " << TotalFlow << " total flow" 111 << " of " << TotalCost << " cost\n"); 112 (void)TotalFlow; 113 (void)AugmentationIters; 114 return TotalCost; 115 } 116 117 /// Adding an edge to the network with a specified capacity and a cost. 118 /// Multiple edges between a pair of nodes are allowed but self-edges 119 /// are not supported. 120 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 121 assert(Capacity > 0 && "adding an edge of zero capacity"); 122 assert(Src != Dst && "loop edge are not supported"); 123 124 Edge SrcEdge; 125 SrcEdge.Dst = Dst; 126 SrcEdge.Cost = Cost; 127 SrcEdge.Capacity = Capacity; 128 SrcEdge.Flow = 0; 129 SrcEdge.RevEdgeIndex = Edges[Dst].size(); 130 131 Edge DstEdge; 132 DstEdge.Dst = Src; 133 DstEdge.Cost = -Cost; 134 DstEdge.Capacity = 0; 135 DstEdge.Flow = 0; 136 DstEdge.RevEdgeIndex = Edges[Src].size(); 137 138 Edges[Src].push_back(SrcEdge); 139 Edges[Dst].push_back(DstEdge); 140 } 141 142 /// Adding an edge to the network of infinite capacity and a given cost. 143 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 144 addEdge(Src, Dst, INF, Cost); 145 } 146 147 /// Get the total flow from a given source node. 148 /// Returns a list of pairs (target node, amount of flow to the target). 149 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 150 std::vector<std::pair<uint64_t, int64_t>> Flow; 151 for (auto &Edge : Edges[Src]) { 152 if (Edge.Flow > 0) 153 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 154 } 155 return Flow; 156 } 157 158 /// Get the total flow between a pair of nodes. 159 int64_t getFlow(uint64_t Src, uint64_t Dst) const { 160 int64_t Flow = 0; 161 for (auto &Edge : Edges[Src]) { 162 if (Edge.Dst == Dst) { 163 Flow += Edge.Flow; 164 } 165 } 166 return Flow; 167 } 168 169 /// A cost of taking an unlikely jump. 170 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 171 /// Minimum BaseDistance for the jump distance values in island joining. 172 static constexpr uint64_t MinBaseDistance = 10000; 173 174 private: 175 /// Iteratively find an augmentation path/dag in the network and send the 176 /// flow along its edges. The method returns the number of applied iterations. 177 size_t applyFlowAugmentation() { 178 size_t AugmentationIters = 0; 179 while (findAugmentingPath()) { 180 uint64_t PathCapacity = computeAugmentingPathCapacity(); 181 while (PathCapacity > 0) { 182 bool Progress = false; 183 if (SampleProfileEvenCountDistribution) { 184 // Identify node/edge candidates for augmentation 185 identifyShortestEdges(PathCapacity); 186 187 // Find an augmenting DAG 188 auto AugmentingOrder = findAugmentingDAG(); 189 190 // Apply the DAG augmentation 191 Progress = augmentFlowAlongDAG(AugmentingOrder); 192 PathCapacity = computeAugmentingPathCapacity(); 193 } 194 195 if (!Progress) { 196 augmentFlowAlongPath(PathCapacity); 197 PathCapacity = 0; 198 } 199 200 AugmentationIters++; 201 } 202 } 203 return AugmentationIters; 204 } 205 206 /// Compute the capacity of the cannonical augmenting path. If the path is 207 /// saturated (that is, no flow can be sent along the path), then return 0. 208 uint64_t computeAugmentingPathCapacity() { 209 uint64_t PathCapacity = INF; 210 uint64_t Now = Target; 211 while (Now != Source) { 212 uint64_t Pred = Nodes[Now].ParentNode; 213 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 214 215 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); 216 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); 217 PathCapacity = std::min(PathCapacity, EdgeCapacity); 218 219 Now = Pred; 220 } 221 return PathCapacity; 222 } 223 224 /// Check for existence of an augmenting path with a positive capacity. 225 bool findAugmentingPath() { 226 // Initialize data structures 227 for (auto &Node : Nodes) { 228 Node.Distance = INF; 229 Node.ParentNode = uint64_t(-1); 230 Node.ParentEdgeIndex = uint64_t(-1); 231 Node.Taken = false; 232 } 233 234 std::queue<uint64_t> Queue; 235 Queue.push(Source); 236 Nodes[Source].Distance = 0; 237 Nodes[Source].Taken = true; 238 while (!Queue.empty()) { 239 uint64_t Src = Queue.front(); 240 Queue.pop(); 241 Nodes[Src].Taken = false; 242 // Although the residual network contains edges with negative costs 243 // (in particular, backward edges), it can be shown that there are no 244 // negative-weight cycles and the following two invariants are maintained: 245 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 246 // where Dist is the length of the shortest path between two nodes. This 247 // allows to prune the search-space of the path-finding algorithm using 248 // the following early-stop criteria: 249 // -- If we find a path with zero-distance from Source to Target, stop the 250 // search, as the path is the shortest since Dist[Source, Target] >= 0; 251 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 252 // process node V, as it is guaranteed _not_ to be on a shortest path 253 // from Source to Target; it follows from inequalities 254 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 255 // >= Dist[Source, V] 256 if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) 257 break; 258 if (Nodes[Src].Distance > Nodes[Target].Distance) 259 continue; 260 261 // Process adjacent edges 262 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 263 auto &Edge = Edges[Src][EdgeIdx]; 264 if (Edge.Flow < Edge.Capacity) { 265 uint64_t Dst = Edge.Dst; 266 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 267 if (Nodes[Dst].Distance > NewDistance) { 268 // Update the distance and the parent node/edge 269 Nodes[Dst].Distance = NewDistance; 270 Nodes[Dst].ParentNode = Src; 271 Nodes[Dst].ParentEdgeIndex = EdgeIdx; 272 // Add the node to the queue, if it is not there yet 273 if (!Nodes[Dst].Taken) { 274 Queue.push(Dst); 275 Nodes[Dst].Taken = true; 276 } 277 } 278 } 279 } 280 } 281 282 return Nodes[Target].Distance != INF; 283 } 284 285 /// Update the current flow along the augmenting path. 286 void augmentFlowAlongPath(uint64_t PathCapacity) { 287 assert(PathCapacity > 0 && "found an incorrect augmenting path"); 288 uint64_t Now = Target; 289 while (Now != Source) { 290 uint64_t Pred = Nodes[Now].ParentNode; 291 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 292 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 293 294 Edge.Flow += PathCapacity; 295 RevEdge.Flow -= PathCapacity; 296 297 Now = Pred; 298 } 299 } 300 301 /// Find an Augmenting DAG order using a modified version of DFS in which we 302 /// can visit a node multiple times. In the DFS search, when scanning each 303 /// edge out of a node, continue search at Edge.Dst endpoint if it has not 304 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm 305 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. 306 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) 307 /// that starts with Source and ends with Target. 308 std::vector<uint64_t> findAugmentingDAG() { 309 // We use a stack based implemenation of DFS to avoid recursion. 310 // Defining DFS data structures: 311 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that 312 // - we are currently visiting Nodes[NodeIdx] and 313 // - the next edge to scan is Edges[NodeIdx][EdgeIdx] 314 typedef std::pair<uint64_t, uint64_t> StackItemType; 315 std::stack<StackItemType> Stack; 316 std::vector<uint64_t> AugmentingOrder; 317 318 // Phase 0: Initialize Node attributes and Time for DFS run 319 for (auto &Node : Nodes) { 320 Node.Discovery = 0; 321 Node.Finish = 0; 322 Node.NumCalls = 0; 323 Node.Taken = false; 324 } 325 uint64_t Time = 0; 326 // Mark Target as Taken 327 // Taken attribute will be propagated backwards from Target towards Source 328 Nodes[Target].Taken = true; 329 330 // Phase 1: Start DFS traversal from Source 331 Stack.emplace(Source, 0); 332 Nodes[Source].Discovery = ++Time; 333 while (!Stack.empty()) { 334 auto NodeIdx = Stack.top().first; 335 auto EdgeIdx = Stack.top().second; 336 337 // If we haven't scanned all edges out of NodeIdx, continue scanning 338 if (EdgeIdx < Edges[NodeIdx].size()) { 339 auto &Edge = Edges[NodeIdx][EdgeIdx]; 340 auto &Dst = Nodes[Edge.Dst]; 341 Stack.top().second++; 342 343 if (Edge.OnShortestPath) { 344 // If we haven't seen Edge.Dst so far, continue DFS search there 345 if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { 346 Dst.Discovery = ++Time; 347 Stack.emplace(Edge.Dst, 0); 348 Dst.NumCalls++; 349 } else if (Dst.Taken && Dst.Finish != 0) { 350 // Else, if Edge.Dst already have a path to Target, so that NodeIdx 351 Nodes[NodeIdx].Taken = true; 352 } 353 } 354 } else { 355 // If we are done scanning all edge out of NodeIdx 356 Stack.pop(); 357 // If we haven't found a path from NodeIdx to Target, forget about it 358 if (!Nodes[NodeIdx].Taken) { 359 Nodes[NodeIdx].Discovery = 0; 360 } else { 361 // If we have found a path from NodeIdx to Target, then finish NodeIdx 362 // and propagate Taken flag to DFS parent unless at the Source 363 Nodes[NodeIdx].Finish = ++Time; 364 // NodeIdx == Source if and only if the stack is empty 365 if (NodeIdx != Source) { 366 assert(!Stack.empty() && "empty stack while running dfs"); 367 Nodes[Stack.top().first].Taken = true; 368 } 369 AugmentingOrder.push_back(NodeIdx); 370 } 371 } 372 } 373 // Nodes are collected decreasing Finish time, so the order is reversed 374 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); 375 376 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges 377 for (size_t Src : AugmentingOrder) { 378 AugmentingEdges[Src].clear(); 379 for (auto &Edge : Edges[Src]) { 380 uint64_t Dst = Edge.Dst; 381 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && 382 Nodes[Dst].Finish < Nodes[Src].Finish) { 383 AugmentingEdges[Src].push_back(&Edge); 384 } 385 } 386 assert((Src == Target || !AugmentingEdges[Src].empty()) && 387 "incorrectly constructed augmenting edges"); 388 } 389 390 return AugmentingOrder; 391 } 392 393 /// Update the current flow along the given (acyclic) subgraph specified by 394 /// the vertex order, AugmentingOrder. The objective is to send as much flow 395 /// as possible while evenly distributing flow among successors of each node. 396 /// After the update at least one edge is saturated. 397 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { 398 // Phase 0: Initialization 399 for (uint64_t Src : AugmentingOrder) { 400 Nodes[Src].FracFlow = 0; 401 Nodes[Src].IntFlow = 0; 402 for (auto &Edge : AugmentingEdges[Src]) { 403 Edge->AugmentedFlow = 0; 404 } 405 } 406 407 // Phase 1: Send a unit of fractional flow along the DAG 408 uint64_t MaxFlowAmount = INF; 409 Nodes[Source].FracFlow = 1.0; 410 for (uint64_t Src : AugmentingOrder) { 411 assert((Src == Target || Nodes[Src].FracFlow > 0.0) && 412 "incorrectly computed fractional flow"); 413 // Distribute flow evenly among successors of Src 414 uint64_t Degree = AugmentingEdges[Src].size(); 415 for (auto &Edge : AugmentingEdges[Src]) { 416 double EdgeFlow = Nodes[Src].FracFlow / Degree; 417 Nodes[Edge->Dst].FracFlow += EdgeFlow; 418 if (Edge->Capacity == INF) 419 continue; 420 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; 421 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); 422 } 423 } 424 // Stop early if we cannot send any (integral) flow from Source to Target 425 if (MaxFlowAmount == 0) 426 return false; 427 428 // Phase 2: Send an integral flow of MaxFlowAmount 429 Nodes[Source].IntFlow = MaxFlowAmount; 430 for (uint64_t Src : AugmentingOrder) { 431 if (Src == Target) 432 break; 433 // Distribute flow evenly among successors of Src, rounding up to make 434 // sure all flow is sent 435 uint64_t Degree = AugmentingEdges[Src].size(); 436 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree 437 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; 438 for (auto &Edge : AugmentingEdges[Src]) { 439 uint64_t Dst = Edge->Dst; 440 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); 441 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); 442 Nodes[Dst].IntFlow += EdgeFlow; 443 Nodes[Src].IntFlow -= EdgeFlow; 444 Edge->AugmentedFlow += EdgeFlow; 445 } 446 } 447 assert(Nodes[Target].IntFlow <= MaxFlowAmount); 448 Nodes[Target].IntFlow = 0; 449 450 // Phase 3: Send excess flow back traversing the nodes backwards. 451 // Because of rounding, not all flow can be sent along the edges of Src. 452 // Hence, sending the remaining flow back to maintain flow conservation 453 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { 454 uint64_t Src = AugmentingOrder[Idx - 1]; 455 // Try to send excess flow back along each edge. 456 // Make sure we only send back flow we just augmented (AugmentedFlow). 457 for (auto &Edge : AugmentingEdges[Src]) { 458 uint64_t Dst = Edge->Dst; 459 if (Nodes[Dst].IntFlow == 0) 460 continue; 461 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); 462 Nodes[Dst].IntFlow -= EdgeFlow; 463 Nodes[Src].IntFlow += EdgeFlow; 464 Edge->AugmentedFlow -= EdgeFlow; 465 } 466 } 467 468 // Phase 4: Update flow values along all edges 469 bool HasSaturatedEdges = false; 470 for (uint64_t Src : AugmentingOrder) { 471 // Verify that we have sent all the excess flow from the node 472 assert(Src == Source || Nodes[Src].IntFlow == 0); 473 for (auto &Edge : AugmentingEdges[Src]) { 474 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); 475 // Update flow values along the edge and its reverse copy 476 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; 477 Edge->Flow += Edge->AugmentedFlow; 478 RevEdge.Flow -= Edge->AugmentedFlow; 479 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) 480 HasSaturatedEdges = true; 481 } 482 } 483 484 // The augmentation is successful iff at least one edge becomes saturated 485 return HasSaturatedEdges; 486 } 487 488 /// Identify candidate (shortest) edges for augmentation. 489 void identifyShortestEdges(uint64_t PathCapacity) { 490 assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); 491 // To make sure the augmentation DAG contains only edges with large residual 492 // capacity, we prune all edges whose capacity is below a fraction of 493 // the capacity of the augmented path. 494 // (All edges of the path itself are always in the DAG) 495 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); 496 497 // Decide which edges are on a shortest path from Source to Target 498 for (size_t Src = 0; Src < Nodes.size(); Src++) { 499 // An edge cannot be augmenting if the endpoint has large distance 500 if (Nodes[Src].Distance > Nodes[Target].Distance) 501 continue; 502 503 for (auto &Edge : Edges[Src]) { 504 uint64_t Dst = Edge.Dst; 505 Edge.OnShortestPath = 506 Src != Target && Dst != Source && 507 Nodes[Dst].Distance <= Nodes[Target].Distance && 508 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && 509 Edge.Capacity > Edge.Flow && 510 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; 511 } 512 } 513 } 514 515 /// A node in a flow network. 516 struct Node { 517 /// The cost of the cheapest path from the source to the current node. 518 int64_t Distance; 519 /// The node preceding the current one in the path. 520 uint64_t ParentNode; 521 /// The index of the edge between ParentNode and the current node. 522 uint64_t ParentEdgeIndex; 523 /// An indicator of whether the current node is in a queue. 524 bool Taken; 525 526 /// Data fields utilized in DAG-augmentation: 527 /// Fractional flow. 528 double FracFlow; 529 /// Integral flow. 530 uint64_t IntFlow; 531 /// Discovery time. 532 uint64_t Discovery; 533 /// Finish time. 534 uint64_t Finish; 535 /// NumCalls. 536 uint64_t NumCalls; 537 }; 538 539 /// An edge in a flow network. 540 struct Edge { 541 /// The cost of the edge. 542 int64_t Cost; 543 /// The capacity of the edge. 544 int64_t Capacity; 545 /// The current flow on the edge. 546 int64_t Flow; 547 /// The destination node of the edge. 548 uint64_t Dst; 549 /// The index of the reverse edge between Dst and the current node. 550 uint64_t RevEdgeIndex; 551 552 /// Data fields utilized in DAG-augmentation: 553 /// Whether the edge is currently on a shortest path from Source to Target. 554 bool OnShortestPath; 555 /// Extra flow along the edge. 556 uint64_t AugmentedFlow; 557 }; 558 559 /// The set of network nodes. 560 std::vector<Node> Nodes; 561 /// The set of network edges. 562 std::vector<std::vector<Edge>> Edges; 563 /// Source node of the flow. 564 uint64_t Source; 565 /// Target (sink) node of the flow. 566 uint64_t Target; 567 /// Augmenting edges. 568 std::vector<std::vector<Edge *>> AugmentingEdges; 569 }; 570 571 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; 572 constexpr uint64_t MinCostMaxFlow::MinBaseDistance; 573 574 /// A post-processing adjustment of control flow. It applies two steps by 575 /// rerouting some flow and making it more realistic: 576 /// 577 /// - First, it removes all isolated components ("islands") with a positive flow 578 /// that are unreachable from the entry block. For every such component, we 579 /// find the shortest from the entry to an exit passing through the component, 580 /// and increase the flow by one unit along the path. 581 /// 582 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 583 /// with no sampled counts. Then it rebalnces the flow that goes through such 584 /// a subgraph so that each branch is taken with probability 50%. 585 /// An unknown subgraph is such that for every two nodes u and v: 586 /// - u dominates v and u is not unknown; 587 /// - v post-dominates u; and 588 /// - all inner-nodes of all (u,v)-paths are unknown. 589 /// 590 class FlowAdjuster { 591 public: 592 FlowAdjuster(FlowFunction &Func) : Func(Func) { 593 assert(Func.Blocks[Func.Entry].isEntry() && 594 "incorrect index of the entry block"); 595 } 596 597 // Run the post-processing 598 void run() { 599 /// Adjust the flow to get rid of isolated components. 600 joinIsolatedComponents(); 601 602 /// Rebalance the flow inside unknown subgraphs. 603 rebalanceUnknownSubgraphs(); 604 } 605 606 private: 607 void joinIsolatedComponents() { 608 // Find blocks that are reachable from the source 609 auto Visited = BitVector(NumBlocks(), false); 610 findReachable(Func.Entry, Visited); 611 612 // Iterate over all non-reachable blocks and adjust their weights 613 for (uint64_t I = 0; I < NumBlocks(); I++) { 614 auto &Block = Func.Blocks[I]; 615 if (Block.Flow > 0 && !Visited[I]) { 616 // Find a path from the entry to an exit passing through the block I 617 auto Path = findShortestPath(I); 618 // Increase the flow along the path 619 assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 620 "incorrectly computed path adjusting control flow"); 621 Func.Blocks[Func.Entry].Flow += 1; 622 for (auto &Jump : Path) { 623 Jump->Flow += 1; 624 Func.Blocks[Jump->Target].Flow += 1; 625 // Update reachability 626 findReachable(Jump->Target, Visited); 627 } 628 } 629 } 630 } 631 632 /// Run BFS from a given block along the jumps with a positive flow and mark 633 /// all reachable blocks. 634 void findReachable(uint64_t Src, BitVector &Visited) { 635 if (Visited[Src]) 636 return; 637 std::queue<uint64_t> Queue; 638 Queue.push(Src); 639 Visited[Src] = true; 640 while (!Queue.empty()) { 641 Src = Queue.front(); 642 Queue.pop(); 643 for (auto Jump : Func.Blocks[Src].SuccJumps) { 644 uint64_t Dst = Jump->Target; 645 if (Jump->Flow > 0 && !Visited[Dst]) { 646 Queue.push(Dst); 647 Visited[Dst] = true; 648 } 649 } 650 } 651 } 652 653 /// Find the shortest path from the entry block to an exit block passing 654 /// through a given block. 655 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 656 // A path from the entry block to BlockIdx 657 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 658 // A path from BlockIdx to an exit block 659 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 660 661 // Concatenate the two paths 662 std::vector<FlowJump *> Result; 663 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 664 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 665 return Result; 666 } 667 668 /// Apply the Dijkstra algorithm to find the shortest path from a given 669 /// Source to a given Target block. 670 /// If Target == -1, then the path ends at an exit block. 671 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 672 // Quit early, if possible 673 if (Source == Target) 674 return std::vector<FlowJump *>(); 675 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 676 return std::vector<FlowJump *>(); 677 678 // Initialize data structures 679 auto Distance = std::vector<int64_t>(NumBlocks(), INF); 680 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 681 Distance[Source] = 0; 682 std::set<std::pair<uint64_t, uint64_t>> Queue; 683 Queue.insert(std::make_pair(Distance[Source], Source)); 684 685 // Run the Dijkstra algorithm 686 while (!Queue.empty()) { 687 uint64_t Src = Queue.begin()->second; 688 Queue.erase(Queue.begin()); 689 // If we found a solution, quit early 690 if (Src == Target || 691 (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 692 break; 693 694 for (auto Jump : Func.Blocks[Src].SuccJumps) { 695 uint64_t Dst = Jump->Target; 696 int64_t JumpDist = jumpDistance(Jump); 697 if (Distance[Dst] > Distance[Src] + JumpDist) { 698 Queue.erase(std::make_pair(Distance[Dst], Dst)); 699 700 Distance[Dst] = Distance[Src] + JumpDist; 701 Parent[Dst] = Jump; 702 703 Queue.insert(std::make_pair(Distance[Dst], Dst)); 704 } 705 } 706 } 707 // If Target is not provided, find the closest exit block 708 if (Target == AnyExitBlock) { 709 for (uint64_t I = 0; I < NumBlocks(); I++) { 710 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 711 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 712 Target = I; 713 } 714 } 715 } 716 } 717 assert(Parent[Target] != nullptr && "a path does not exist"); 718 719 // Extract the constructed path 720 std::vector<FlowJump *> Result; 721 uint64_t Now = Target; 722 while (Now != Source) { 723 assert(Now == Parent[Now]->Target && "incorrect parent jump"); 724 Result.push_back(Parent[Now]); 725 Now = Parent[Now]->Source; 726 } 727 // Reverse the path, since it is extracted from Target to Source 728 std::reverse(Result.begin(), Result.end()); 729 return Result; 730 } 731 732 /// A distance of a path for a given jump. 733 /// In order to incite the path to use blocks/jumps with large positive flow, 734 /// and avoid changing branch probability of outgoing edges drastically, 735 /// set the jump distance so as: 736 /// - to minimize the number of unlikely jumps used and subject to that, 737 /// - to minimize the number of Flow == 0 jumps used and subject to that, 738 /// - minimizes total multiplicative Flow increase for the remaining edges. 739 /// To capture this objective with integer distances, we round off fractional 740 /// parts to a multiple of 1 / BaseDistance. 741 int64_t jumpDistance(FlowJump *Jump) const { 742 uint64_t BaseDistance = 743 std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance), 744 std::min(Func.Blocks[Func.Entry].Flow, 745 MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); 746 if (Jump->IsUnlikely) 747 return MinCostMaxFlow::AuxCostUnlikely; 748 if (Jump->Flow > 0) 749 return BaseDistance + BaseDistance / Jump->Flow; 750 return BaseDistance * NumBlocks(); 751 }; 752 753 uint64_t NumBlocks() const { return Func.Blocks.size(); } 754 755 /// Rebalance unknown subgraphs so that the flow is split evenly across the 756 /// outgoing branches of every block of the subgraph. The method iterates over 757 /// blocks with known weight and identifies unknown subgraphs rooted at the 758 /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 759 void rebalanceUnknownSubgraphs() { 760 // Try to find unknown subgraphs from each block 761 for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 762 auto SrcBlock = &Func.Blocks[I]; 763 // Verify if rebalancing rooted at SrcBlock is feasible 764 if (!canRebalanceAtRoot(SrcBlock)) 765 continue; 766 767 // Find an unknown subgraphs starting at SrcBlock. Along the way, 768 // fill in known destinations and intermediate unknown blocks. 769 std::vector<FlowBlock *> UnknownBlocks; 770 std::vector<FlowBlock *> KnownDstBlocks; 771 findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 772 773 // Verify if rebalancing of the subgraph is feasible. If the search is 774 // successful, find the unique destination block (which can be null) 775 FlowBlock *DstBlock = nullptr; 776 if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 777 DstBlock)) 778 continue; 779 780 // We cannot rebalance subgraphs containing cycles among unknown blocks 781 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 782 continue; 783 784 // Rebalance the flow 785 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 786 } 787 } 788 789 /// Verify if rebalancing rooted at a given block is possible. 790 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 791 // Do not attempt to find unknown subgraphs from an unknown or a 792 // zero-flow block 793 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 794 return false; 795 796 // Do not attempt to process subgraphs from a block w/o unknown sucessors 797 bool HasUnknownSuccs = false; 798 for (auto Jump : SrcBlock->SuccJumps) { 799 if (Func.Blocks[Jump->Target].UnknownWeight) { 800 HasUnknownSuccs = true; 801 break; 802 } 803 } 804 if (!HasUnknownSuccs) 805 return false; 806 807 return true; 808 } 809 810 /// Find an unknown subgraph starting at block SrcBlock. The method sets 811 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 812 void findUnknownSubgraph(const FlowBlock *SrcBlock, 813 std::vector<FlowBlock *> &KnownDstBlocks, 814 std::vector<FlowBlock *> &UnknownBlocks) { 815 // Run BFS from SrcBlock and make sure all paths are going through unknown 816 // blocks and end at a known DstBlock 817 auto Visited = BitVector(NumBlocks(), false); 818 std::queue<uint64_t> Queue; 819 820 Queue.push(SrcBlock->Index); 821 Visited[SrcBlock->Index] = true; 822 while (!Queue.empty()) { 823 auto &Block = Func.Blocks[Queue.front()]; 824 Queue.pop(); 825 // Process blocks reachable from Block 826 for (auto Jump : Block.SuccJumps) { 827 // If Jump can be ignored, skip it 828 if (ignoreJump(SrcBlock, nullptr, Jump)) 829 continue; 830 831 uint64_t Dst = Jump->Target; 832 // If Dst has been visited, skip Jump 833 if (Visited[Dst]) 834 continue; 835 // Process block Dst 836 Visited[Dst] = true; 837 if (!Func.Blocks[Dst].UnknownWeight) { 838 KnownDstBlocks.push_back(&Func.Blocks[Dst]); 839 } else { 840 Queue.push(Dst); 841 UnknownBlocks.push_back(&Func.Blocks[Dst]); 842 } 843 } 844 } 845 } 846 847 /// Verify if rebalancing of the subgraph is feasible. If the checks are 848 /// successful, set the unique destination block, DstBlock (can be null). 849 bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 850 const std::vector<FlowBlock *> &KnownDstBlocks, 851 const std::vector<FlowBlock *> &UnknownBlocks, 852 FlowBlock *&DstBlock) { 853 // If the list of unknown blocks is empty, we don't need rebalancing 854 if (UnknownBlocks.empty()) 855 return false; 856 857 // If there are multiple known sinks, we can't rebalance 858 if (KnownDstBlocks.size() > 1) 859 return false; 860 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 861 862 // Verify sinks of the subgraph 863 for (auto Block : UnknownBlocks) { 864 if (Block->SuccJumps.empty()) { 865 // If there are multiple (known and unknown) sinks, we can't rebalance 866 if (DstBlock != nullptr) 867 return false; 868 continue; 869 } 870 size_t NumIgnoredJumps = 0; 871 for (auto Jump : Block->SuccJumps) { 872 if (ignoreJump(SrcBlock, DstBlock, Jump)) 873 NumIgnoredJumps++; 874 } 875 // If there is a non-sink block in UnknownBlocks with all jumps ignored, 876 // then we can't rebalance 877 if (NumIgnoredJumps == Block->SuccJumps.size()) 878 return false; 879 } 880 881 return true; 882 } 883 884 /// Decide whether the Jump is ignored while processing an unknown subgraphs 885 /// rooted at basic block SrcBlock with the destination block, DstBlock. 886 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 887 const FlowJump *Jump) { 888 // Ignore unlikely jumps with zero flow 889 if (Jump->IsUnlikely && Jump->Flow == 0) 890 return true; 891 892 auto JumpSource = &Func.Blocks[Jump->Source]; 893 auto JumpTarget = &Func.Blocks[Jump->Target]; 894 895 // Do not ignore jumps coming into DstBlock 896 if (DstBlock != nullptr && JumpTarget == DstBlock) 897 return false; 898 899 // Ignore jumps out of SrcBlock to known blocks 900 if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 901 return true; 902 903 // Ignore jumps to known blocks with zero flow 904 if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 905 return true; 906 907 return false; 908 } 909 910 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 911 /// UnknownBlocks in the topological order (so that all jumps are "forward"). 912 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 913 std::vector<FlowBlock *> &UnknownBlocks) { 914 // Extract local in-degrees in the considered subgraph 915 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 916 auto fillInDegree = [&](const FlowBlock *Block) { 917 for (auto Jump : Block->SuccJumps) { 918 if (ignoreJump(SrcBlock, DstBlock, Jump)) 919 continue; 920 LocalInDegree[Jump->Target]++; 921 } 922 }; 923 fillInDegree(SrcBlock); 924 for (auto Block : UnknownBlocks) { 925 fillInDegree(Block); 926 } 927 // A loop containing SrcBlock 928 if (LocalInDegree[SrcBlock->Index] > 0) 929 return false; 930 931 std::vector<FlowBlock *> AcyclicOrder; 932 std::queue<uint64_t> Queue; 933 Queue.push(SrcBlock->Index); 934 while (!Queue.empty()) { 935 FlowBlock *Block = &Func.Blocks[Queue.front()]; 936 Queue.pop(); 937 // Stop propagation once we reach DstBlock, if any 938 if (DstBlock != nullptr && Block == DstBlock) 939 break; 940 941 // Keep an acyclic order of unknown blocks 942 if (Block->UnknownWeight && Block != SrcBlock) 943 AcyclicOrder.push_back(Block); 944 945 // Add to the queue all successors with zero local in-degree 946 for (auto Jump : Block->SuccJumps) { 947 if (ignoreJump(SrcBlock, DstBlock, Jump)) 948 continue; 949 uint64_t Dst = Jump->Target; 950 LocalInDegree[Dst]--; 951 if (LocalInDegree[Dst] == 0) { 952 Queue.push(Dst); 953 } 954 } 955 } 956 957 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 958 // of all blocks 959 if (UnknownBlocks.size() != AcyclicOrder.size()) 960 return false; 961 UnknownBlocks = AcyclicOrder; 962 return true; 963 } 964 965 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 966 /// having UnknownBlocks intermediate blocks. 967 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 968 const FlowBlock *DstBlock, 969 const std::vector<FlowBlock *> &UnknownBlocks) { 970 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 971 972 // Ditribute flow from the source block 973 uint64_t BlockFlow = 0; 974 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 975 for (auto Jump : SrcBlock->SuccJumps) { 976 if (ignoreJump(SrcBlock, DstBlock, Jump)) 977 continue; 978 BlockFlow += Jump->Flow; 979 } 980 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 981 982 // Ditribute flow from the remaining blocks 983 for (auto Block : UnknownBlocks) { 984 assert(Block->UnknownWeight && "incorrect unknown subgraph"); 985 uint64_t BlockFlow = 0; 986 // Block's flow is the sum of incoming flows 987 for (auto Jump : Block->PredJumps) { 988 BlockFlow += Jump->Flow; 989 } 990 Block->Flow = BlockFlow; 991 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 992 } 993 } 994 995 /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 996 /// and ending at DstBlock. 997 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 998 const FlowBlock *Block, uint64_t BlockFlow) { 999 // Process all successor jumps and update corresponding flow values 1000 size_t BlockDegree = 0; 1001 for (auto Jump : Block->SuccJumps) { 1002 if (ignoreJump(SrcBlock, DstBlock, Jump)) 1003 continue; 1004 BlockDegree++; 1005 } 1006 // If all successor jumps of the block are ignored, skip it 1007 if (DstBlock == nullptr && BlockDegree == 0) 1008 return; 1009 assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 1010 1011 // Each of the Block's successors gets the following amount of flow. 1012 // Rounding the value up so that all flow is propagated 1013 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 1014 for (auto Jump : Block->SuccJumps) { 1015 if (ignoreJump(SrcBlock, DstBlock, Jump)) 1016 continue; 1017 uint64_t Flow = std::min(SuccFlow, BlockFlow); 1018 Jump->Flow = Flow; 1019 BlockFlow -= Flow; 1020 } 1021 assert(BlockFlow == 0 && "not all flow is propagated"); 1022 } 1023 1024 /// A constant indicating an arbitrary exit block of a function. 1025 static constexpr uint64_t AnyExitBlock = uint64_t(-1); 1026 1027 /// The function. 1028 FlowFunction &Func; 1029 }; 1030 1031 /// Initializing flow network for a given function. 1032 /// 1033 /// Every block is split into three nodes that are responsible for (i) an 1034 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 1035 /// reduction of the block weight. 1036 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 1037 uint64_t NumBlocks = Func.Blocks.size(); 1038 assert(NumBlocks > 1 && "Too few blocks in a function"); 1039 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 1040 1041 // Pre-process data: make sure the entry weight is at least 1 1042 if (Func.Blocks[Func.Entry].Weight == 0) { 1043 Func.Blocks[Func.Entry].Weight = 1; 1044 } 1045 // Introducing dummy source/sink pairs to allow flow circulation. 1046 // The nodes corresponding to blocks of Func have indicies in the range 1047 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 1048 uint64_t S = 3 * NumBlocks; 1049 uint64_t T = S + 1; 1050 uint64_t S1 = S + 2; 1051 uint64_t T1 = S + 3; 1052 1053 Network.initialize(3 * NumBlocks + 4, S1, T1); 1054 1055 // Create three nodes for every block of the function 1056 for (uint64_t B = 0; B < NumBlocks; B++) { 1057 auto &Block = Func.Blocks[B]; 1058 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 1059 "non-zero weight of a block w/o weight except for an entry"); 1060 1061 // Split every block into two nodes 1062 uint64_t Bin = 3 * B; 1063 uint64_t Bout = 3 * B + 1; 1064 uint64_t Baux = 3 * B + 2; 1065 if (Block.Weight > 0) { 1066 Network.addEdge(S1, Bout, Block.Weight, 0); 1067 Network.addEdge(Bin, T1, Block.Weight, 0); 1068 } 1069 1070 // Edges from S and to T 1071 assert((!Block.isEntry() || !Block.isExit()) && 1072 "a block cannot be an entry and an exit"); 1073 if (Block.isEntry()) { 1074 Network.addEdge(S, Bin, 0); 1075 } else if (Block.isExit()) { 1076 Network.addEdge(Bout, T, 0); 1077 } 1078 1079 // An auxiliary node to allow increase/reduction of block counts: 1080 // We assume that decreasing block counts is more expensive than increasing, 1081 // and thus, setting separate costs here. In the future we may want to tune 1082 // the relative costs so as to maximize the quality of generated profiles. 1083 int64_t AuxCostInc = SampleProfileProfiCostInc; 1084 int64_t AuxCostDec = SampleProfileProfiCostDec; 1085 if (Block.UnknownWeight) { 1086 // Do not penalize changing weights of blocks w/o known profile count 1087 AuxCostInc = 0; 1088 AuxCostDec = 0; 1089 } else { 1090 // Increasing the count for "cold" blocks with zero initial count is more 1091 // expensive than for "hot" ones 1092 if (Block.Weight == 0) { 1093 AuxCostInc = SampleProfileProfiCostIncZero; 1094 } 1095 // Modifying the count of the entry block is expensive 1096 if (Block.isEntry()) { 1097 AuxCostInc = SampleProfileProfiCostIncEntry; 1098 AuxCostDec = SampleProfileProfiCostDecEntry; 1099 } 1100 } 1101 // For blocks with self-edges, do not penalize a reduction of the count, 1102 // as all of the increase can be attributed to the self-edge 1103 if (Block.HasSelfEdge) { 1104 AuxCostDec = 0; 1105 } 1106 1107 Network.addEdge(Bin, Baux, AuxCostInc); 1108 Network.addEdge(Baux, Bout, AuxCostInc); 1109 if (Block.Weight > 0) { 1110 Network.addEdge(Bout, Baux, AuxCostDec); 1111 Network.addEdge(Baux, Bin, AuxCostDec); 1112 } 1113 } 1114 1115 // Creating edges for every jump 1116 for (auto &Jump : Func.Jumps) { 1117 uint64_t Src = Jump.Source; 1118 uint64_t Dst = Jump.Target; 1119 if (Src != Dst) { 1120 uint64_t SrcOut = 3 * Src + 1; 1121 uint64_t DstIn = 3 * Dst; 1122 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 1123 Network.addEdge(SrcOut, DstIn, Cost); 1124 } 1125 } 1126 1127 // Make sure we have a valid flow circulation 1128 Network.addEdge(T, S, 0); 1129 } 1130 1131 /// Extract resulting block and edge counts from the flow network. 1132 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 1133 uint64_t NumBlocks = Func.Blocks.size(); 1134 1135 // Extract resulting block counts 1136 for (uint64_t Src = 0; Src < NumBlocks; Src++) { 1137 auto &Block = Func.Blocks[Src]; 1138 uint64_t SrcOut = 3 * Src + 1; 1139 int64_t Flow = 0; 1140 for (auto &Adj : Network.getFlow(SrcOut)) { 1141 uint64_t DstIn = Adj.first; 1142 int64_t DstFlow = Adj.second; 1143 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 1144 if (!IsAuxNode || Block.HasSelfEdge) { 1145 Flow += DstFlow; 1146 } 1147 } 1148 Block.Flow = Flow; 1149 assert(Flow >= 0 && "negative block flow"); 1150 } 1151 1152 // Extract resulting jump counts 1153 for (auto &Jump : Func.Jumps) { 1154 uint64_t Src = Jump.Source; 1155 uint64_t Dst = Jump.Target; 1156 int64_t Flow = 0; 1157 if (Src != Dst) { 1158 uint64_t SrcOut = 3 * Src + 1; 1159 uint64_t DstIn = 3 * Dst; 1160 Flow = Network.getFlow(SrcOut, DstIn); 1161 } else { 1162 uint64_t SrcOut = 3 * Src + 1; 1163 uint64_t SrcAux = 3 * Src + 2; 1164 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 1165 if (AuxFlow > 0) 1166 Flow = AuxFlow; 1167 } 1168 Jump.Flow = Flow; 1169 assert(Flow >= 0 && "negative jump flow"); 1170 } 1171 } 1172 1173 #ifndef NDEBUG 1174 /// Verify that the computed flow values satisfy flow conservation rules 1175 void verifyWeights(const FlowFunction &Func) { 1176 const uint64_t NumBlocks = Func.Blocks.size(); 1177 auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 1178 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 1179 for (auto &Jump : Func.Jumps) { 1180 InFlow[Jump.Target] += Jump.Flow; 1181 OutFlow[Jump.Source] += Jump.Flow; 1182 } 1183 1184 uint64_t TotalInFlow = 0; 1185 uint64_t TotalOutFlow = 0; 1186 for (uint64_t I = 0; I < NumBlocks; I++) { 1187 auto &Block = Func.Blocks[I]; 1188 if (Block.isEntry()) { 1189 TotalInFlow += Block.Flow; 1190 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1191 } else if (Block.isExit()) { 1192 TotalOutFlow += Block.Flow; 1193 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1194 } else { 1195 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1196 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1197 } 1198 } 1199 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 1200 1201 // Verify that there are no isolated flow components 1202 // One could modify FlowFunction to hold edges indexed by the sources, which 1203 // will avoid a creation of the object 1204 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 1205 for (auto &Jump : Func.Jumps) { 1206 if (Jump.Flow > 0) { 1207 PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 1208 } 1209 } 1210 1211 // Run BFS from the source along edges with positive flow 1212 std::queue<uint64_t> Queue; 1213 auto Visited = BitVector(NumBlocks, false); 1214 Queue.push(Func.Entry); 1215 Visited[Func.Entry] = true; 1216 while (!Queue.empty()) { 1217 uint64_t Src = Queue.front(); 1218 Queue.pop(); 1219 for (uint64_t Dst : PositiveFlowEdges[Src]) { 1220 if (!Visited[Dst]) { 1221 Queue.push(Dst); 1222 Visited[Dst] = true; 1223 } 1224 } 1225 } 1226 1227 // Verify that every block that has a positive flow is reached from the source 1228 // along edges with a positive flow 1229 for (uint64_t I = 0; I < NumBlocks; I++) { 1230 auto &Block = Func.Blocks[I]; 1231 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 1232 } 1233 } 1234 #endif 1235 1236 } // end of anonymous namespace 1237 1238 /// Apply the profile inference algorithm for a given flow function 1239 void llvm::applyFlowInference(FlowFunction &Func) { 1240 // Create and apply an inference network model 1241 auto InferenceNetwork = MinCostMaxFlow(); 1242 initializeNetwork(InferenceNetwork, Func); 1243 InferenceNetwork.run(); 1244 1245 // Extract flow values for every block and every edge 1246 extractWeights(InferenceNetwork, Func); 1247 1248 // Post-processing adjustments to the flow 1249 auto Adjuster = FlowAdjuster(Func); 1250 Adjuster.run(); 1251 1252 #ifndef NDEBUG 1253 // Verify the result 1254 verifyWeights(Func); 1255 #endif 1256 } 1257