1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a profile inference algorithm. Given an incomplete and 10 // possibly imprecise block counts, the algorithm reconstructs realistic block 11 // and edge counts that satisfy flow conservation rules, while minimally modify 12 // input block counts. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Utils/SampleProfileInference.h" 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Support/Debug.h" 19 #include <queue> 20 #include <set> 21 22 using namespace llvm; 23 #define DEBUG_TYPE "sample-profile-inference" 24 25 namespace { 26 27 /// A value indicating an infinite flow/capacity/weight of a block/edge. 28 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 29 /// during the execution. 30 static constexpr int64_t INF = ((int64_t)1) << 50; 31 32 /// The minimum-cost maximum flow algorithm. 33 /// 34 /// The algorithm finds the maximum flow of minimum cost on a given (directed) 35 /// network using a modified version of the classical Moore-Bellman-Ford 36 /// approach. The algorithm applies a number of augmentation iterations in which 37 /// flow is sent along paths of positive capacity from the source to the sink. 38 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 39 /// where m is the number of edges, n is the number of vertices, and v(f) is the 40 /// value of the maximum flow. However, the observed running time on typical 41 /// instances is sub-quadratic, that is, o(n^2). 42 /// 43 /// The input is a set of edges with specified costs and capacities, and a pair 44 /// of nodes (source and sink). The output is the flow along each edge of the 45 /// minimum total cost respecting the given edge capacities. 46 class MinCostMaxFlow { 47 public: 48 // Initialize algorithm's data structures for a network of a given size. 49 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 50 Source = SourceNode; 51 Target = SinkNode; 52 53 Nodes = std::vector<Node>(NodeCount); 54 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 55 } 56 57 // Run the algorithm. 58 int64_t run() { 59 // Find an augmenting path and update the flow along the path 60 size_t AugmentationIters = 0; 61 while (findAugmentingPath()) { 62 augmentFlowAlongPath(); 63 AugmentationIters++; 64 } 65 66 // Compute the total flow and its cost 67 int64_t TotalCost = 0; 68 int64_t TotalFlow = 0; 69 for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 70 for (auto &Edge : Edges[Src]) { 71 if (Edge.Flow > 0) { 72 TotalCost += Edge.Cost * Edge.Flow; 73 if (Src == Source) 74 TotalFlow += Edge.Flow; 75 } 76 } 77 } 78 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 79 << " iterations with " << TotalFlow << " total flow" 80 << " of " << TotalCost << " cost\n"); 81 (void)TotalFlow; 82 return TotalCost; 83 } 84 85 /// Adding an edge to the network with a specified capacity and a cost. 86 /// Multiple edges between a pair of nodes are allowed but self-edges 87 /// are not supported. 88 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 89 assert(Capacity > 0 && "adding an edge of zero capacity"); 90 assert(Src != Dst && "loop edge are not supported"); 91 92 Edge SrcEdge; 93 SrcEdge.Dst = Dst; 94 SrcEdge.Cost = Cost; 95 SrcEdge.Capacity = Capacity; 96 SrcEdge.Flow = 0; 97 SrcEdge.RevEdgeIndex = Edges[Dst].size(); 98 99 Edge DstEdge; 100 DstEdge.Dst = Src; 101 DstEdge.Cost = -Cost; 102 DstEdge.Capacity = 0; 103 DstEdge.Flow = 0; 104 DstEdge.RevEdgeIndex = Edges[Src].size(); 105 106 Edges[Src].push_back(SrcEdge); 107 Edges[Dst].push_back(DstEdge); 108 } 109 110 /// Adding an edge to the network of infinite capacity and a given cost. 111 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 112 addEdge(Src, Dst, INF, Cost); 113 } 114 115 /// Get the total flow from a given source node. 116 /// Returns a list of pairs (target node, amount of flow to the target). 117 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 118 std::vector<std::pair<uint64_t, int64_t>> Flow; 119 for (auto &Edge : Edges[Src]) { 120 if (Edge.Flow > 0) 121 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 122 } 123 return Flow; 124 } 125 126 /// Get the total flow between a pair of nodes. 127 int64_t getFlow(uint64_t Src, uint64_t Dst) const { 128 int64_t Flow = 0; 129 for (auto &Edge : Edges[Src]) { 130 if (Edge.Dst == Dst) { 131 Flow += Edge.Flow; 132 } 133 } 134 return Flow; 135 } 136 137 /// A cost of increasing a block's count by one. 138 static constexpr int64_t AuxCostInc = 10; 139 /// A cost of decreasing a block's count by one. 140 static constexpr int64_t AuxCostDec = 20; 141 /// A cost of increasing a count of zero-weight block by one. 142 static constexpr int64_t AuxCostIncZero = 11; 143 /// A cost of increasing the entry block's count by one. 144 static constexpr int64_t AuxCostIncEntry = 40; 145 /// A cost of decreasing the entry block's count by one. 146 static constexpr int64_t AuxCostDecEntry = 10; 147 /// A cost of taking an unlikely jump. 148 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 149 150 private: 151 /// Check for existence of an augmenting path with a positive capacity. 152 bool findAugmentingPath() { 153 // Initialize data structures 154 for (auto &Node : Nodes) { 155 Node.Distance = INF; 156 Node.ParentNode = uint64_t(-1); 157 Node.ParentEdgeIndex = uint64_t(-1); 158 Node.Taken = false; 159 } 160 161 std::queue<uint64_t> Queue; 162 Queue.push(Source); 163 Nodes[Source].Distance = 0; 164 Nodes[Source].Taken = true; 165 while (!Queue.empty()) { 166 uint64_t Src = Queue.front(); 167 Queue.pop(); 168 Nodes[Src].Taken = false; 169 // Although the residual network contains edges with negative costs 170 // (in particular, backward edges), it can be shown that there are no 171 // negative-weight cycles and the following two invariants are maintained: 172 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 173 // where Dist is the length of the shortest path between two nodes. This 174 // allows to prune the search-space of the path-finding algorithm using 175 // the following early-stop criteria: 176 // -- If we find a path with zero-distance from Source to Target, stop the 177 // search, as the path is the shortest since Dist[Source, Target] >= 0; 178 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 179 // process node V, as it is guaranteed _not_ to be on a shortest path 180 // from Source to Target; it follows from inequalities 181 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 182 // >= Dist[Source, V] 183 if (Nodes[Target].Distance == 0) 184 break; 185 if (Nodes[Src].Distance > Nodes[Target].Distance) 186 continue; 187 188 // Process adjacent edges 189 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 190 auto &Edge = Edges[Src][EdgeIdx]; 191 if (Edge.Flow < Edge.Capacity) { 192 uint64_t Dst = Edge.Dst; 193 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 194 if (Nodes[Dst].Distance > NewDistance) { 195 // Update the distance and the parent node/edge 196 Nodes[Dst].Distance = NewDistance; 197 Nodes[Dst].ParentNode = Src; 198 Nodes[Dst].ParentEdgeIndex = EdgeIdx; 199 // Add the node to the queue, if it is not there yet 200 if (!Nodes[Dst].Taken) { 201 Queue.push(Dst); 202 Nodes[Dst].Taken = true; 203 } 204 } 205 } 206 } 207 } 208 209 return Nodes[Target].Distance != INF; 210 } 211 212 /// Update the current flow along the augmenting path. 213 void augmentFlowAlongPath() { 214 // Find path capacity 215 int64_t PathCapacity = INF; 216 uint64_t Now = Target; 217 while (Now != Source) { 218 uint64_t Pred = Nodes[Now].ParentNode; 219 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 220 PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 221 Now = Pred; 222 } 223 224 assert(PathCapacity > 0 && "found an incorrect augmenting path"); 225 226 // Update the flow along the path 227 Now = Target; 228 while (Now != Source) { 229 uint64_t Pred = Nodes[Now].ParentNode; 230 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 231 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 232 233 Edge.Flow += PathCapacity; 234 RevEdge.Flow -= PathCapacity; 235 236 Now = Pred; 237 } 238 } 239 240 /// A node in a flow network. 241 struct Node { 242 /// The cost of the cheapest path from the source to the current node. 243 int64_t Distance; 244 /// The node preceding the current one in the path. 245 uint64_t ParentNode; 246 /// The index of the edge between ParentNode and the current node. 247 uint64_t ParentEdgeIndex; 248 /// An indicator of whether the current node is in a queue. 249 bool Taken; 250 }; 251 /// An edge in a flow network. 252 struct Edge { 253 /// The cost of the edge. 254 int64_t Cost; 255 /// The capacity of the edge. 256 int64_t Capacity; 257 /// The current flow on the edge. 258 int64_t Flow; 259 /// The destination node of the edge. 260 uint64_t Dst; 261 /// The index of the reverse edge between Dst and the current node. 262 uint64_t RevEdgeIndex; 263 }; 264 265 /// The set of network nodes. 266 std::vector<Node> Nodes; 267 /// The set of network edges. 268 std::vector<std::vector<Edge>> Edges; 269 /// Source node of the flow. 270 uint64_t Source; 271 /// Target (sink) node of the flow. 272 uint64_t Target; 273 }; 274 275 /// A post-processing adjustment of control flow. It applies two steps by 276 /// rerouting some flow and making it more realistic: 277 /// 278 /// - First, it removes all isolated components ("islands") with a positive flow 279 /// that are unreachable from the entry block. For every such component, we 280 /// find the shortest from the entry to an exit passing through the component, 281 /// and increase the flow by one unit along the path. 282 /// 283 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 284 /// with no sampled counts. Then it rebalnces the flow that goes through such 285 /// a subgraph so that each branch is taken with probability 50%. 286 /// An unknown subgraph is such that for every two nodes u and v: 287 /// - u dominates v and u is not unknown; 288 /// - v post-dominates u; and 289 /// - all inner-nodes of all (u,v)-paths are unknown. 290 /// 291 class FlowAdjuster { 292 public: 293 FlowAdjuster(FlowFunction &Func) : Func(Func) { 294 assert(Func.Blocks[Func.Entry].isEntry() && 295 "incorrect index of the entry block"); 296 } 297 298 // Run the post-processing 299 void run() { 300 /// Adjust the flow to get rid of isolated components. 301 joinIsolatedComponents(); 302 303 /// Rebalance the flow inside unknown subgraphs. 304 rebalanceUnknownSubgraphs(); 305 } 306 307 private: 308 void joinIsolatedComponents() { 309 // Find blocks that are reachable from the source 310 auto Visited = BitVector(NumBlocks(), false); 311 findReachable(Func.Entry, Visited); 312 313 // Iterate over all non-reachable blocks and adjust their weights 314 for (uint64_t I = 0; I < NumBlocks(); I++) { 315 auto &Block = Func.Blocks[I]; 316 if (Block.Flow > 0 && !Visited[I]) { 317 // Find a path from the entry to an exit passing through the block I 318 auto Path = findShortestPath(I); 319 // Increase the flow along the path 320 assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 321 "incorrectly computed path adjusting control flow"); 322 Func.Blocks[Func.Entry].Flow += 1; 323 for (auto &Jump : Path) { 324 Jump->Flow += 1; 325 Func.Blocks[Jump->Target].Flow += 1; 326 // Update reachability 327 findReachable(Jump->Target, Visited); 328 } 329 } 330 } 331 } 332 333 /// Run BFS from a given block along the jumps with a positive flow and mark 334 /// all reachable blocks. 335 void findReachable(uint64_t Src, BitVector &Visited) { 336 if (Visited[Src]) 337 return; 338 std::queue<uint64_t> Queue; 339 Queue.push(Src); 340 Visited[Src] = true; 341 while (!Queue.empty()) { 342 Src = Queue.front(); 343 Queue.pop(); 344 for (auto Jump : Func.Blocks[Src].SuccJumps) { 345 uint64_t Dst = Jump->Target; 346 if (Jump->Flow > 0 && !Visited[Dst]) { 347 Queue.push(Dst); 348 Visited[Dst] = true; 349 } 350 } 351 } 352 } 353 354 /// Find the shortest path from the entry block to an exit block passing 355 /// through a given block. 356 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 357 // A path from the entry block to BlockIdx 358 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 359 // A path from BlockIdx to an exit block 360 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 361 362 // Concatenate the two paths 363 std::vector<FlowJump *> Result; 364 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 365 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 366 return Result; 367 } 368 369 /// Apply the Dijkstra algorithm to find the shortest path from a given 370 /// Source to a given Target block. 371 /// If Target == -1, then the path ends at an exit block. 372 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 373 // Quit early, if possible 374 if (Source == Target) 375 return std::vector<FlowJump *>(); 376 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 377 return std::vector<FlowJump *>(); 378 379 // Initialize data structures 380 auto Distance = std::vector<int64_t>(NumBlocks(), INF); 381 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 382 Distance[Source] = 0; 383 std::set<std::pair<uint64_t, uint64_t>> Queue; 384 Queue.insert(std::make_pair(Distance[Source], Source)); 385 386 // Run the Dijkstra algorithm 387 while (!Queue.empty()) { 388 uint64_t Src = Queue.begin()->second; 389 Queue.erase(Queue.begin()); 390 // If we found a solution, quit early 391 if (Src == Target || 392 (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 393 break; 394 395 for (auto Jump : Func.Blocks[Src].SuccJumps) { 396 uint64_t Dst = Jump->Target; 397 int64_t JumpDist = jumpDistance(Jump); 398 if (Distance[Dst] > Distance[Src] + JumpDist) { 399 Queue.erase(std::make_pair(Distance[Dst], Dst)); 400 401 Distance[Dst] = Distance[Src] + JumpDist; 402 Parent[Dst] = Jump; 403 404 Queue.insert(std::make_pair(Distance[Dst], Dst)); 405 } 406 } 407 } 408 // If Target is not provided, find the closest exit block 409 if (Target == AnyExitBlock) { 410 for (uint64_t I = 0; I < NumBlocks(); I++) { 411 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 412 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 413 Target = I; 414 } 415 } 416 } 417 } 418 assert(Parent[Target] != nullptr && "a path does not exist"); 419 420 // Extract the constructed path 421 std::vector<FlowJump *> Result; 422 uint64_t Now = Target; 423 while (Now != Source) { 424 assert(Now == Parent[Now]->Target && "incorrect parent jump"); 425 Result.push_back(Parent[Now]); 426 Now = Parent[Now]->Source; 427 } 428 // Reverse the path, since it is extracted from Target to Source 429 std::reverse(Result.begin(), Result.end()); 430 return Result; 431 } 432 433 /// A distance of a path for a given jump. 434 /// In order to incite the path to use blocks/jumps with large positive flow, 435 /// and avoid changing branch probability of outgoing edges drastically, 436 /// set the distance as follows: 437 /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 438 /// if Block.Weight > 0, then distance = 1 439 /// otherwise distance >> 1 440 int64_t jumpDistance(FlowJump *Jump) const { 441 int64_t BaseDistance = 100; 442 if (Jump->IsUnlikely) 443 return MinCostMaxFlow::AuxCostUnlikely; 444 if (Jump->Flow > 0) 445 return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 446 if (Func.Blocks[Jump->Target].Weight > 0) 447 return BaseDistance; 448 return BaseDistance * (NumBlocks() + 1); 449 }; 450 451 uint64_t NumBlocks() const { return Func.Blocks.size(); } 452 453 /// Rebalance unknown subgraphs so that the flow is split evenly across the 454 /// outgoing branches of every block of the subgraph. The method iterates over 455 /// blocks with known weight and identifies unknown subgraphs rooted at the 456 /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 457 void rebalanceUnknownSubgraphs() { 458 // Try to find unknown subgraphs from each block 459 for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 460 auto SrcBlock = &Func.Blocks[I]; 461 // Verify if rebalancing rooted at SrcBlock is feasible 462 if (!canRebalanceAtRoot(SrcBlock)) 463 continue; 464 465 // Find an unknown subgraphs starting at SrcBlock. Along the way, 466 // fill in known destinations and intermediate unknown blocks. 467 std::vector<FlowBlock *> UnknownBlocks; 468 std::vector<FlowBlock *> KnownDstBlocks; 469 findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 470 471 // Verify if rebalancing of the subgraph is feasible. If the search is 472 // successful, find the unique destination block (which can be null) 473 FlowBlock *DstBlock = nullptr; 474 if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 475 DstBlock)) 476 continue; 477 478 // We cannot rebalance subgraphs containing cycles among unknown blocks 479 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 480 continue; 481 482 // Rebalance the flow 483 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 484 } 485 } 486 487 /// Verify if rebalancing rooted at a given block is possible. 488 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 489 // Do not attempt to find unknown subgraphs from an unknown or a 490 // zero-flow block 491 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 492 return false; 493 494 // Do not attempt to process subgraphs from a block w/o unknown sucessors 495 bool HasUnknownSuccs = false; 496 for (auto Jump : SrcBlock->SuccJumps) { 497 if (Func.Blocks[Jump->Target].UnknownWeight) { 498 HasUnknownSuccs = true; 499 break; 500 } 501 } 502 if (!HasUnknownSuccs) 503 return false; 504 505 return true; 506 } 507 508 /// Find an unknown subgraph starting at block SrcBlock. The method sets 509 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 510 void findUnknownSubgraph(const FlowBlock *SrcBlock, 511 std::vector<FlowBlock *> &KnownDstBlocks, 512 std::vector<FlowBlock *> &UnknownBlocks) { 513 // Run BFS from SrcBlock and make sure all paths are going through unknown 514 // blocks and end at a non-unknown DstBlock 515 auto Visited = BitVector(NumBlocks(), false); 516 std::queue<uint64_t> Queue; 517 518 Queue.push(SrcBlock->Index); 519 Visited[SrcBlock->Index] = true; 520 while (!Queue.empty()) { 521 auto &Block = Func.Blocks[Queue.front()]; 522 Queue.pop(); 523 // Process blocks reachable from Block 524 for (auto Jump : Block.SuccJumps) { 525 // If Jump can be ignored, skip it 526 if (ignoreJump(SrcBlock, nullptr, Jump)) 527 continue; 528 529 uint64_t Dst = Jump->Target; 530 // If Dst has been visited, skip Jump 531 if (Visited[Dst]) 532 continue; 533 // Process block Dst 534 Visited[Dst] = true; 535 if (!Func.Blocks[Dst].UnknownWeight) { 536 KnownDstBlocks.push_back(&Func.Blocks[Dst]); 537 } else { 538 Queue.push(Dst); 539 UnknownBlocks.push_back(&Func.Blocks[Dst]); 540 } 541 } 542 } 543 } 544 545 /// Verify if rebalancing of the subgraph is feasible. If the checks are 546 /// successful, set the unique destination block, DstBlock (can be null). 547 bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 548 const std::vector<FlowBlock *> &KnownDstBlocks, 549 const std::vector<FlowBlock *> &UnknownBlocks, 550 FlowBlock *&DstBlock) { 551 // If the list of unknown blocks is empty, we don't need rebalancing 552 if (UnknownBlocks.empty()) 553 return false; 554 555 // If there are multiple known sinks, we can't rebalance 556 if (KnownDstBlocks.size() > 1) 557 return false; 558 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 559 560 // Verify sinks of the subgraph 561 for (auto Block : UnknownBlocks) { 562 if (Block->SuccJumps.empty()) { 563 // If there are multiple (known and unknown) sinks, we can't rebalance 564 if (DstBlock != nullptr) 565 return false; 566 continue; 567 } 568 size_t NumIgnoredJumps = 0; 569 for (auto Jump : Block->SuccJumps) { 570 if (ignoreJump(SrcBlock, DstBlock, Jump)) 571 NumIgnoredJumps++; 572 } 573 // If there is a non-sink block in UnknownBlocks with all jumps ignored, 574 // then we can't rebalance 575 if (NumIgnoredJumps == Block->SuccJumps.size()) 576 return false; 577 } 578 579 return true; 580 } 581 582 /// Decide whether the Jump is ignored while processing an unknown subgraphs 583 /// rooted at basic block SrcBlock with the destination block, DstBlock. 584 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 585 const FlowJump *Jump) { 586 // Ignore unlikely jumps with zero flow 587 if (Jump->IsUnlikely && Jump->Flow == 0) 588 return true; 589 590 auto JumpSource = &Func.Blocks[Jump->Source]; 591 auto JumpTarget = &Func.Blocks[Jump->Target]; 592 593 // Do not ignore jumps coming into DstBlock 594 if (DstBlock != nullptr && JumpTarget == DstBlock) 595 return false; 596 597 // Ignore jumps out of SrcBlock to known blocks 598 if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 599 return true; 600 601 // Ignore jumps to known blocks with zero flow 602 if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 603 return true; 604 605 return false; 606 } 607 608 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 609 /// UnknownBlocks in the topological order (so that all jumps are "forward"). 610 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 611 std::vector<FlowBlock *> &UnknownBlocks) { 612 // Extract local in-degrees in the considered subgraph 613 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 614 auto fillInDegree = [&](const FlowBlock *Block) { 615 for (auto Jump : Block->SuccJumps) { 616 if (ignoreJump(SrcBlock, DstBlock, Jump)) 617 continue; 618 LocalInDegree[Jump->Target]++; 619 } 620 }; 621 fillInDegree(SrcBlock); 622 for (auto Block : UnknownBlocks) { 623 fillInDegree(Block); 624 } 625 // A loop containing SrcBlock 626 if (LocalInDegree[SrcBlock->Index] > 0) 627 return false; 628 629 std::vector<FlowBlock *> AcyclicOrder; 630 std::queue<uint64_t> Queue; 631 Queue.push(SrcBlock->Index); 632 while (!Queue.empty()) { 633 FlowBlock *Block = &Func.Blocks[Queue.front()]; 634 Queue.pop(); 635 // Stop propagation once we reach DstBlock, if any 636 if (DstBlock != nullptr && Block == DstBlock) 637 break; 638 639 // Keep an acyclic order of unknown blocks 640 if (Block->UnknownWeight && Block != SrcBlock) 641 AcyclicOrder.push_back(Block); 642 643 // Add to the queue all successors with zero local in-degree 644 for (auto Jump : Block->SuccJumps) { 645 if (ignoreJump(SrcBlock, DstBlock, Jump)) 646 continue; 647 uint64_t Dst = Jump->Target; 648 LocalInDegree[Dst]--; 649 if (LocalInDegree[Dst] == 0) { 650 Queue.push(Dst); 651 } 652 } 653 } 654 655 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 656 // of all blocks 657 if (UnknownBlocks.size() != AcyclicOrder.size()) 658 return false; 659 UnknownBlocks = AcyclicOrder; 660 return true; 661 } 662 663 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 664 /// having UnknownBlocks intermediate blocks. 665 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 666 const FlowBlock *DstBlock, 667 const std::vector<FlowBlock *> &UnknownBlocks) { 668 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 669 670 // Ditribute flow from the source block 671 uint64_t BlockFlow = 0; 672 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 673 for (auto Jump : SrcBlock->SuccJumps) { 674 if (ignoreJump(SrcBlock, DstBlock, Jump)) 675 continue; 676 BlockFlow += Jump->Flow; 677 } 678 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 679 680 // Ditribute flow from the remaining blocks 681 for (auto Block : UnknownBlocks) { 682 assert(Block->UnknownWeight && "incorrect unknown subgraph"); 683 uint64_t BlockFlow = 0; 684 // Block's flow is the sum of incoming flows 685 for (auto Jump : Block->PredJumps) { 686 BlockFlow += Jump->Flow; 687 } 688 Block->Flow = BlockFlow; 689 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 690 } 691 } 692 693 /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 694 /// and ending at DstBlock. 695 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 696 const FlowBlock *Block, uint64_t BlockFlow) { 697 // Process all successor jumps and update corresponding flow values 698 size_t BlockDegree = 0; 699 for (auto Jump : Block->SuccJumps) { 700 if (ignoreJump(SrcBlock, DstBlock, Jump)) 701 continue; 702 BlockDegree++; 703 } 704 // If all successor jumps of the block are ignored, skip it 705 if (DstBlock == nullptr && BlockDegree == 0) 706 return; 707 assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 708 709 // Each of the Block's successors gets the following amount of flow. 710 // Rounding the value up so that all flow is propagated 711 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 712 for (auto Jump : Block->SuccJumps) { 713 if (ignoreJump(SrcBlock, DstBlock, Jump)) 714 continue; 715 uint64_t Flow = std::min(SuccFlow, BlockFlow); 716 Jump->Flow = Flow; 717 BlockFlow -= Flow; 718 } 719 assert(BlockFlow == 0 && "not all flow is propagated"); 720 } 721 722 /// A constant indicating an arbitrary exit block of a function. 723 static constexpr uint64_t AnyExitBlock = uint64_t(-1); 724 725 /// The function. 726 FlowFunction &Func; 727 }; 728 729 /// Initializing flow network for a given function. 730 /// 731 /// Every block is split into three nodes that are responsible for (i) an 732 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 733 /// reduction of the block weight. 734 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 735 uint64_t NumBlocks = Func.Blocks.size(); 736 assert(NumBlocks > 1 && "Too few blocks in a function"); 737 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 738 739 // Pre-process data: make sure the entry weight is at least 1 740 if (Func.Blocks[Func.Entry].Weight == 0) { 741 Func.Blocks[Func.Entry].Weight = 1; 742 } 743 // Introducing dummy source/sink pairs to allow flow circulation. 744 // The nodes corresponding to blocks of Func have indicies in the range 745 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 746 uint64_t S = 3 * NumBlocks; 747 uint64_t T = S + 1; 748 uint64_t S1 = S + 2; 749 uint64_t T1 = S + 3; 750 751 Network.initialize(3 * NumBlocks + 4, S1, T1); 752 753 // Create three nodes for every block of the function 754 for (uint64_t B = 0; B < NumBlocks; B++) { 755 auto &Block = Func.Blocks[B]; 756 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 757 "non-zero weight of a block w/o weight except for an entry"); 758 759 // Split every block into two nodes 760 uint64_t Bin = 3 * B; 761 uint64_t Bout = 3 * B + 1; 762 uint64_t Baux = 3 * B + 2; 763 if (Block.Weight > 0) { 764 Network.addEdge(S1, Bout, Block.Weight, 0); 765 Network.addEdge(Bin, T1, Block.Weight, 0); 766 } 767 768 // Edges from S and to T 769 assert((!Block.isEntry() || !Block.isExit()) && 770 "a block cannot be an entry and an exit"); 771 if (Block.isEntry()) { 772 Network.addEdge(S, Bin, 0); 773 } else if (Block.isExit()) { 774 Network.addEdge(Bout, T, 0); 775 } 776 777 // An auxiliary node to allow increase/reduction of block counts: 778 // We assume that decreasing block counts is more expensive than increasing, 779 // and thus, setting separate costs here. In the future we may want to tune 780 // the relative costs so as to maximize the quality of generated profiles. 781 int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 782 int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 783 if (Block.UnknownWeight) { 784 // Do not penalize changing weights of blocks w/o known profile count 785 AuxCostInc = 0; 786 AuxCostDec = 0; 787 } else { 788 // Increasing the count for "cold" blocks with zero initial count is more 789 // expensive than for "hot" ones 790 if (Block.Weight == 0) { 791 AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 792 } 793 // Modifying the count of the entry block is expensive 794 if (Block.isEntry()) { 795 AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 796 AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 797 } 798 } 799 // For blocks with self-edges, do not penalize a reduction of the count, 800 // as all of the increase can be attributed to the self-edge 801 if (Block.HasSelfEdge) { 802 AuxCostDec = 0; 803 } 804 805 Network.addEdge(Bin, Baux, AuxCostInc); 806 Network.addEdge(Baux, Bout, AuxCostInc); 807 if (Block.Weight > 0) { 808 Network.addEdge(Bout, Baux, AuxCostDec); 809 Network.addEdge(Baux, Bin, AuxCostDec); 810 } 811 } 812 813 // Creating edges for every jump 814 for (auto &Jump : Func.Jumps) { 815 uint64_t Src = Jump.Source; 816 uint64_t Dst = Jump.Target; 817 if (Src != Dst) { 818 uint64_t SrcOut = 3 * Src + 1; 819 uint64_t DstIn = 3 * Dst; 820 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 821 Network.addEdge(SrcOut, DstIn, Cost); 822 } 823 } 824 825 // Make sure we have a valid flow circulation 826 Network.addEdge(T, S, 0); 827 } 828 829 /// Extract resulting block and edge counts from the flow network. 830 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 831 uint64_t NumBlocks = Func.Blocks.size(); 832 833 // Extract resulting block counts 834 for (uint64_t Src = 0; Src < NumBlocks; Src++) { 835 auto &Block = Func.Blocks[Src]; 836 uint64_t SrcOut = 3 * Src + 1; 837 int64_t Flow = 0; 838 for (auto &Adj : Network.getFlow(SrcOut)) { 839 uint64_t DstIn = Adj.first; 840 int64_t DstFlow = Adj.second; 841 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 842 if (!IsAuxNode || Block.HasSelfEdge) { 843 Flow += DstFlow; 844 } 845 } 846 Block.Flow = Flow; 847 assert(Flow >= 0 && "negative block flow"); 848 } 849 850 // Extract resulting jump counts 851 for (auto &Jump : Func.Jumps) { 852 uint64_t Src = Jump.Source; 853 uint64_t Dst = Jump.Target; 854 int64_t Flow = 0; 855 if (Src != Dst) { 856 uint64_t SrcOut = 3 * Src + 1; 857 uint64_t DstIn = 3 * Dst; 858 Flow = Network.getFlow(SrcOut, DstIn); 859 } else { 860 uint64_t SrcOut = 3 * Src + 1; 861 uint64_t SrcAux = 3 * Src + 2; 862 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 863 if (AuxFlow > 0) 864 Flow = AuxFlow; 865 } 866 Jump.Flow = Flow; 867 assert(Flow >= 0 && "negative jump flow"); 868 } 869 } 870 871 #ifndef NDEBUG 872 /// Verify that the computed flow values satisfy flow conservation rules 873 void verifyWeights(const FlowFunction &Func) { 874 const uint64_t NumBlocks = Func.Blocks.size(); 875 auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 876 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 877 for (auto &Jump : Func.Jumps) { 878 InFlow[Jump.Target] += Jump.Flow; 879 OutFlow[Jump.Source] += Jump.Flow; 880 } 881 882 uint64_t TotalInFlow = 0; 883 uint64_t TotalOutFlow = 0; 884 for (uint64_t I = 0; I < NumBlocks; I++) { 885 auto &Block = Func.Blocks[I]; 886 if (Block.isEntry()) { 887 TotalInFlow += Block.Flow; 888 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 889 } else if (Block.isExit()) { 890 TotalOutFlow += Block.Flow; 891 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 892 } else { 893 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 894 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 895 } 896 } 897 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 898 899 // Verify that there are no isolated flow components 900 // One could modify FlowFunction to hold edges indexed by the sources, which 901 // will avoid a creation of the object 902 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 903 for (auto &Jump : Func.Jumps) { 904 if (Jump.Flow > 0) { 905 PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 906 } 907 } 908 909 // Run BFS from the source along edges with positive flow 910 std::queue<uint64_t> Queue; 911 auto Visited = BitVector(NumBlocks, false); 912 Queue.push(Func.Entry); 913 Visited[Func.Entry] = true; 914 while (!Queue.empty()) { 915 uint64_t Src = Queue.front(); 916 Queue.pop(); 917 for (uint64_t Dst : PositiveFlowEdges[Src]) { 918 if (!Visited[Dst]) { 919 Queue.push(Dst); 920 Visited[Dst] = true; 921 } 922 } 923 } 924 925 // Verify that every block that has a positive flow is reached from the source 926 // along edges with a positive flow 927 for (uint64_t I = 0; I < NumBlocks; I++) { 928 auto &Block = Func.Blocks[I]; 929 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 930 } 931 } 932 #endif 933 934 } // end of anonymous namespace 935 936 /// Apply the profile inference algorithm for a given flow function 937 void llvm::applyFlowInference(FlowFunction &Func) { 938 // Create and apply an inference network model 939 auto InferenceNetwork = MinCostMaxFlow(); 940 initializeNetwork(InferenceNetwork, Func); 941 InferenceNetwork.run(); 942 943 // Extract flow values for every block and every edge 944 extractWeights(InferenceNetwork, Func); 945 946 // Post-processing adjustments to the flow 947 auto Adjuster = FlowAdjuster(Func); 948 Adjuster.run(); 949 950 #ifndef NDEBUG 951 // Verify the result 952 verifyWeights(Func); 953 #endif 954 } 955