xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 5956d97f4b3204318ceb6aa9c77bd0bc6ea87a41)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <queue>
20 #include <set>
21 
22 using namespace llvm;
23 #define DEBUG_TYPE "sample-profile-inference"
24 
25 namespace {
26 
27 /// A value indicating an infinite flow/capacity/weight of a block/edge.
28 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
29 /// during the execution.
30 static constexpr int64_t INF = ((int64_t)1) << 50;
31 
32 /// The minimum-cost maximum flow algorithm.
33 ///
34 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
35 /// network using a modified version of the classical Moore-Bellman-Ford
36 /// approach. The algorithm applies a number of augmentation iterations in which
37 /// flow is sent along paths of positive capacity from the source to the sink.
38 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
39 /// where m is the number of edges, n is the number of vertices, and v(f) is the
40 /// value of the maximum flow. However, the observed running time on typical
41 /// instances is sub-quadratic, that is, o(n^2).
42 ///
43 /// The input is a set of edges with specified costs and capacities, and a pair
44 /// of nodes (source and sink). The output is the flow along each edge of the
45 /// minimum total cost respecting the given edge capacities.
46 class MinCostMaxFlow {
47 public:
48   // Initialize algorithm's data structures for a network of a given size.
49   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
50     Source = SourceNode;
51     Target = SinkNode;
52 
53     Nodes = std::vector<Node>(NodeCount);
54     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
55   }
56 
57   // Run the algorithm.
58   int64_t run() {
59     // Find an augmenting path and update the flow along the path
60     size_t AugmentationIters = 0;
61     while (findAugmentingPath()) {
62       augmentFlowAlongPath();
63       AugmentationIters++;
64     }
65 
66     // Compute the total flow and its cost
67     int64_t TotalCost = 0;
68     int64_t TotalFlow = 0;
69     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
70       for (auto &Edge : Edges[Src]) {
71         if (Edge.Flow > 0) {
72           TotalCost += Edge.Cost * Edge.Flow;
73           if (Src == Source)
74             TotalFlow += Edge.Flow;
75         }
76       }
77     }
78     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
79                       << " iterations with " << TotalFlow << " total flow"
80                       << " of " << TotalCost << " cost\n");
81     (void)TotalFlow;
82     return TotalCost;
83   }
84 
85   /// Adding an edge to the network with a specified capacity and a cost.
86   /// Multiple edges between a pair of nodes are allowed but self-edges
87   /// are not supported.
88   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
89     assert(Capacity > 0 && "adding an edge of zero capacity");
90     assert(Src != Dst && "loop edge are not supported");
91 
92     Edge SrcEdge;
93     SrcEdge.Dst = Dst;
94     SrcEdge.Cost = Cost;
95     SrcEdge.Capacity = Capacity;
96     SrcEdge.Flow = 0;
97     SrcEdge.RevEdgeIndex = Edges[Dst].size();
98 
99     Edge DstEdge;
100     DstEdge.Dst = Src;
101     DstEdge.Cost = -Cost;
102     DstEdge.Capacity = 0;
103     DstEdge.Flow = 0;
104     DstEdge.RevEdgeIndex = Edges[Src].size();
105 
106     Edges[Src].push_back(SrcEdge);
107     Edges[Dst].push_back(DstEdge);
108   }
109 
110   /// Adding an edge to the network of infinite capacity and a given cost.
111   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
112     addEdge(Src, Dst, INF, Cost);
113   }
114 
115   /// Get the total flow from a given source node.
116   /// Returns a list of pairs (target node, amount of flow to the target).
117   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
118     std::vector<std::pair<uint64_t, int64_t>> Flow;
119     for (auto &Edge : Edges[Src]) {
120       if (Edge.Flow > 0)
121         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
122     }
123     return Flow;
124   }
125 
126   /// Get the total flow between a pair of nodes.
127   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
128     int64_t Flow = 0;
129     for (auto &Edge : Edges[Src]) {
130       if (Edge.Dst == Dst) {
131         Flow += Edge.Flow;
132       }
133     }
134     return Flow;
135   }
136 
137   /// A cost of increasing a block's count by one.
138   static constexpr int64_t AuxCostInc = 10;
139   /// A cost of decreasing a block's count by one.
140   static constexpr int64_t AuxCostDec = 20;
141   /// A cost of increasing a count of zero-weight block by one.
142   static constexpr int64_t AuxCostIncZero = 11;
143   /// A cost of increasing the entry block's count by one.
144   static constexpr int64_t AuxCostIncEntry = 40;
145   /// A cost of decreasing the entry block's count by one.
146   static constexpr int64_t AuxCostDecEntry = 10;
147   /// A cost of taking an unlikely jump.
148   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
149 
150 private:
151   /// Check for existence of an augmenting path with a positive capacity.
152   bool findAugmentingPath() {
153     // Initialize data structures
154     for (auto &Node : Nodes) {
155       Node.Distance = INF;
156       Node.ParentNode = uint64_t(-1);
157       Node.ParentEdgeIndex = uint64_t(-1);
158       Node.Taken = false;
159     }
160 
161     std::queue<uint64_t> Queue;
162     Queue.push(Source);
163     Nodes[Source].Distance = 0;
164     Nodes[Source].Taken = true;
165     while (!Queue.empty()) {
166       uint64_t Src = Queue.front();
167       Queue.pop();
168       Nodes[Src].Taken = false;
169       // Although the residual network contains edges with negative costs
170       // (in particular, backward edges), it can be shown that there are no
171       // negative-weight cycles and the following two invariants are maintained:
172       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
173       // where Dist is the length of the shortest path between two nodes. This
174       // allows to prune the search-space of the path-finding algorithm using
175       // the following early-stop criteria:
176       // -- If we find a path with zero-distance from Source to Target, stop the
177       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
178       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
179       //    process node V, as it is guaranteed _not_ to be on a shortest path
180       //    from Source to Target; it follows from inequalities
181       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
182       //                         >= Dist[Source, V]
183       if (Nodes[Target].Distance == 0)
184         break;
185       if (Nodes[Src].Distance > Nodes[Target].Distance)
186         continue;
187 
188       // Process adjacent edges
189       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
190         auto &Edge = Edges[Src][EdgeIdx];
191         if (Edge.Flow < Edge.Capacity) {
192           uint64_t Dst = Edge.Dst;
193           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
194           if (Nodes[Dst].Distance > NewDistance) {
195             // Update the distance and the parent node/edge
196             Nodes[Dst].Distance = NewDistance;
197             Nodes[Dst].ParentNode = Src;
198             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
199             // Add the node to the queue, if it is not there yet
200             if (!Nodes[Dst].Taken) {
201               Queue.push(Dst);
202               Nodes[Dst].Taken = true;
203             }
204           }
205         }
206       }
207     }
208 
209     return Nodes[Target].Distance != INF;
210   }
211 
212   /// Update the current flow along the augmenting path.
213   void augmentFlowAlongPath() {
214     // Find path capacity
215     int64_t PathCapacity = INF;
216     uint64_t Now = Target;
217     while (Now != Source) {
218       uint64_t Pred = Nodes[Now].ParentNode;
219       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
220       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
221       Now = Pred;
222     }
223 
224     assert(PathCapacity > 0 && "found an incorrect augmenting path");
225 
226     // Update the flow along the path
227     Now = Target;
228     while (Now != Source) {
229       uint64_t Pred = Nodes[Now].ParentNode;
230       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
231       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
232 
233       Edge.Flow += PathCapacity;
234       RevEdge.Flow -= PathCapacity;
235 
236       Now = Pred;
237     }
238   }
239 
240   /// A node in a flow network.
241   struct Node {
242     /// The cost of the cheapest path from the source to the current node.
243     int64_t Distance;
244     /// The node preceding the current one in the path.
245     uint64_t ParentNode;
246     /// The index of the edge between ParentNode and the current node.
247     uint64_t ParentEdgeIndex;
248     /// An indicator of whether the current node is in a queue.
249     bool Taken;
250   };
251   /// An edge in a flow network.
252   struct Edge {
253     /// The cost of the edge.
254     int64_t Cost;
255     /// The capacity of the edge.
256     int64_t Capacity;
257     /// The current flow on the edge.
258     int64_t Flow;
259     /// The destination node of the edge.
260     uint64_t Dst;
261     /// The index of the reverse edge between Dst and the current node.
262     uint64_t RevEdgeIndex;
263   };
264 
265   /// The set of network nodes.
266   std::vector<Node> Nodes;
267   /// The set of network edges.
268   std::vector<std::vector<Edge>> Edges;
269   /// Source node of the flow.
270   uint64_t Source;
271   /// Target (sink) node of the flow.
272   uint64_t Target;
273 };
274 
275 /// A post-processing adjustment of control flow. It applies two steps by
276 /// rerouting some flow and making it more realistic:
277 ///
278 /// - First, it removes all isolated components ("islands") with a positive flow
279 ///   that are unreachable from the entry block. For every such component, we
280 ///   find the shortest from the entry to an exit passing through the component,
281 ///   and increase the flow by one unit along the path.
282 ///
283 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
284 ///   with no sampled counts. Then it rebalnces the flow that goes through such
285 ///   a subgraph so that each branch is taken with probability 50%.
286 ///   An unknown subgraph is such that for every two nodes u and v:
287 ///     - u dominates v and u is not unknown;
288 ///     - v post-dominates u; and
289 ///     - all inner-nodes of all (u,v)-paths are unknown.
290 ///
291 class FlowAdjuster {
292 public:
293   FlowAdjuster(FlowFunction &Func) : Func(Func) {
294     assert(Func.Blocks[Func.Entry].isEntry() &&
295            "incorrect index of the entry block");
296   }
297 
298   // Run the post-processing
299   void run() {
300     /// Adjust the flow to get rid of isolated components.
301     joinIsolatedComponents();
302 
303     /// Rebalance the flow inside unknown subgraphs.
304     rebalanceUnknownSubgraphs();
305   }
306 
307 private:
308   void joinIsolatedComponents() {
309     // Find blocks that are reachable from the source
310     auto Visited = BitVector(NumBlocks(), false);
311     findReachable(Func.Entry, Visited);
312 
313     // Iterate over all non-reachable blocks and adjust their weights
314     for (uint64_t I = 0; I < NumBlocks(); I++) {
315       auto &Block = Func.Blocks[I];
316       if (Block.Flow > 0 && !Visited[I]) {
317         // Find a path from the entry to an exit passing through the block I
318         auto Path = findShortestPath(I);
319         // Increase the flow along the path
320         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
321                "incorrectly computed path adjusting control flow");
322         Func.Blocks[Func.Entry].Flow += 1;
323         for (auto &Jump : Path) {
324           Jump->Flow += 1;
325           Func.Blocks[Jump->Target].Flow += 1;
326           // Update reachability
327           findReachable(Jump->Target, Visited);
328         }
329       }
330     }
331   }
332 
333   /// Run BFS from a given block along the jumps with a positive flow and mark
334   /// all reachable blocks.
335   void findReachable(uint64_t Src, BitVector &Visited) {
336     if (Visited[Src])
337       return;
338     std::queue<uint64_t> Queue;
339     Queue.push(Src);
340     Visited[Src] = true;
341     while (!Queue.empty()) {
342       Src = Queue.front();
343       Queue.pop();
344       for (auto Jump : Func.Blocks[Src].SuccJumps) {
345         uint64_t Dst = Jump->Target;
346         if (Jump->Flow > 0 && !Visited[Dst]) {
347           Queue.push(Dst);
348           Visited[Dst] = true;
349         }
350       }
351     }
352   }
353 
354   /// Find the shortest path from the entry block to an exit block passing
355   /// through a given block.
356   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
357     // A path from the entry block to BlockIdx
358     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
359     // A path from BlockIdx to an exit block
360     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
361 
362     // Concatenate the two paths
363     std::vector<FlowJump *> Result;
364     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
365     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
366     return Result;
367   }
368 
369   /// Apply the Dijkstra algorithm to find the shortest path from a given
370   /// Source to a given Target block.
371   /// If Target == -1, then the path ends at an exit block.
372   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
373     // Quit early, if possible
374     if (Source == Target)
375       return std::vector<FlowJump *>();
376     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
377       return std::vector<FlowJump *>();
378 
379     // Initialize data structures
380     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
381     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
382     Distance[Source] = 0;
383     std::set<std::pair<uint64_t, uint64_t>> Queue;
384     Queue.insert(std::make_pair(Distance[Source], Source));
385 
386     // Run the Dijkstra algorithm
387     while (!Queue.empty()) {
388       uint64_t Src = Queue.begin()->second;
389       Queue.erase(Queue.begin());
390       // If we found a solution, quit early
391       if (Src == Target ||
392           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
393         break;
394 
395       for (auto Jump : Func.Blocks[Src].SuccJumps) {
396         uint64_t Dst = Jump->Target;
397         int64_t JumpDist = jumpDistance(Jump);
398         if (Distance[Dst] > Distance[Src] + JumpDist) {
399           Queue.erase(std::make_pair(Distance[Dst], Dst));
400 
401           Distance[Dst] = Distance[Src] + JumpDist;
402           Parent[Dst] = Jump;
403 
404           Queue.insert(std::make_pair(Distance[Dst], Dst));
405         }
406       }
407     }
408     // If Target is not provided, find the closest exit block
409     if (Target == AnyExitBlock) {
410       for (uint64_t I = 0; I < NumBlocks(); I++) {
411         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
412           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
413             Target = I;
414           }
415         }
416       }
417     }
418     assert(Parent[Target] != nullptr && "a path does not exist");
419 
420     // Extract the constructed path
421     std::vector<FlowJump *> Result;
422     uint64_t Now = Target;
423     while (Now != Source) {
424       assert(Now == Parent[Now]->Target && "incorrect parent jump");
425       Result.push_back(Parent[Now]);
426       Now = Parent[Now]->Source;
427     }
428     // Reverse the path, since it is extracted from Target to Source
429     std::reverse(Result.begin(), Result.end());
430     return Result;
431   }
432 
433   /// A distance of a path for a given jump.
434   /// In order to incite the path to use blocks/jumps with large positive flow,
435   /// and avoid changing branch probability of outgoing edges drastically,
436   /// set the distance as follows:
437   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
438   ///   if Block.Weight > 0, then distance = 1
439   ///   otherwise distance >> 1
440   int64_t jumpDistance(FlowJump *Jump) const {
441     int64_t BaseDistance = 100;
442     if (Jump->IsUnlikely)
443       return MinCostMaxFlow::AuxCostUnlikely;
444     if (Jump->Flow > 0)
445       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
446     if (Func.Blocks[Jump->Target].Weight > 0)
447       return BaseDistance;
448     return BaseDistance * (NumBlocks() + 1);
449   };
450 
451   uint64_t NumBlocks() const { return Func.Blocks.size(); }
452 
453   /// Rebalance unknown subgraphs so that the flow is split evenly across the
454   /// outgoing branches of every block of the subgraph. The method iterates over
455   /// blocks with known weight and identifies unknown subgraphs rooted at the
456   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
457   void rebalanceUnknownSubgraphs() {
458     // Try to find unknown subgraphs from each block
459     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
460       auto SrcBlock = &Func.Blocks[I];
461       // Verify if rebalancing rooted at SrcBlock is feasible
462       if (!canRebalanceAtRoot(SrcBlock))
463         continue;
464 
465       // Find an unknown subgraphs starting at SrcBlock. Along the way,
466       // fill in known destinations and intermediate unknown blocks.
467       std::vector<FlowBlock *> UnknownBlocks;
468       std::vector<FlowBlock *> KnownDstBlocks;
469       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
470 
471       // Verify if rebalancing of the subgraph is feasible. If the search is
472       // successful, find the unique destination block (which can be null)
473       FlowBlock *DstBlock = nullptr;
474       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
475                                 DstBlock))
476         continue;
477 
478       // We cannot rebalance subgraphs containing cycles among unknown blocks
479       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
480         continue;
481 
482       // Rebalance the flow
483       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
484     }
485   }
486 
487   /// Verify if rebalancing rooted at a given block is possible.
488   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
489     // Do not attempt to find unknown subgraphs from an unknown or a
490     // zero-flow block
491     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
492       return false;
493 
494     // Do not attempt to process subgraphs from a block w/o unknown sucessors
495     bool HasUnknownSuccs = false;
496     for (auto Jump : SrcBlock->SuccJumps) {
497       if (Func.Blocks[Jump->Target].UnknownWeight) {
498         HasUnknownSuccs = true;
499         break;
500       }
501     }
502     if (!HasUnknownSuccs)
503       return false;
504 
505     return true;
506   }
507 
508   /// Find an unknown subgraph starting at block SrcBlock. The method sets
509   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
510   void findUnknownSubgraph(const FlowBlock *SrcBlock,
511                            std::vector<FlowBlock *> &KnownDstBlocks,
512                            std::vector<FlowBlock *> &UnknownBlocks) {
513     // Run BFS from SrcBlock and make sure all paths are going through unknown
514     // blocks and end at a non-unknown DstBlock
515     auto Visited = BitVector(NumBlocks(), false);
516     std::queue<uint64_t> Queue;
517 
518     Queue.push(SrcBlock->Index);
519     Visited[SrcBlock->Index] = true;
520     while (!Queue.empty()) {
521       auto &Block = Func.Blocks[Queue.front()];
522       Queue.pop();
523       // Process blocks reachable from Block
524       for (auto Jump : Block.SuccJumps) {
525         // If Jump can be ignored, skip it
526         if (ignoreJump(SrcBlock, nullptr, Jump))
527           continue;
528 
529         uint64_t Dst = Jump->Target;
530         // If Dst has been visited, skip Jump
531         if (Visited[Dst])
532           continue;
533         // Process block Dst
534         Visited[Dst] = true;
535         if (!Func.Blocks[Dst].UnknownWeight) {
536           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
537         } else {
538           Queue.push(Dst);
539           UnknownBlocks.push_back(&Func.Blocks[Dst]);
540         }
541       }
542     }
543   }
544 
545   /// Verify if rebalancing of the subgraph is feasible. If the checks are
546   /// successful, set the unique destination block, DstBlock (can be null).
547   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
548                             const std::vector<FlowBlock *> &KnownDstBlocks,
549                             const std::vector<FlowBlock *> &UnknownBlocks,
550                             FlowBlock *&DstBlock) {
551     // If the list of unknown blocks is empty, we don't need rebalancing
552     if (UnknownBlocks.empty())
553       return false;
554 
555     // If there are multiple known sinks, we can't rebalance
556     if (KnownDstBlocks.size() > 1)
557       return false;
558     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
559 
560     // Verify sinks of the subgraph
561     for (auto Block : UnknownBlocks) {
562       if (Block->SuccJumps.empty()) {
563         // If there are multiple (known and unknown) sinks, we can't rebalance
564         if (DstBlock != nullptr)
565           return false;
566         continue;
567       }
568       size_t NumIgnoredJumps = 0;
569       for (auto Jump : Block->SuccJumps) {
570         if (ignoreJump(SrcBlock, DstBlock, Jump))
571           NumIgnoredJumps++;
572       }
573       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
574       // then we can't rebalance
575       if (NumIgnoredJumps == Block->SuccJumps.size())
576         return false;
577     }
578 
579     return true;
580   }
581 
582   /// Decide whether the Jump is ignored while processing an unknown subgraphs
583   /// rooted at basic block SrcBlock with the destination block, DstBlock.
584   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
585                   const FlowJump *Jump) {
586     // Ignore unlikely jumps with zero flow
587     if (Jump->IsUnlikely && Jump->Flow == 0)
588       return true;
589 
590     auto JumpSource = &Func.Blocks[Jump->Source];
591     auto JumpTarget = &Func.Blocks[Jump->Target];
592 
593     // Do not ignore jumps coming into DstBlock
594     if (DstBlock != nullptr && JumpTarget == DstBlock)
595       return false;
596 
597     // Ignore jumps out of SrcBlock to known blocks
598     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
599       return true;
600 
601     // Ignore jumps to known blocks with zero flow
602     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
603       return true;
604 
605     return false;
606   }
607 
608   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
609   /// UnknownBlocks in the topological order (so that all jumps are "forward").
610   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
611                          std::vector<FlowBlock *> &UnknownBlocks) {
612     // Extract local in-degrees in the considered subgraph
613     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
614     auto fillInDegree = [&](const FlowBlock *Block) {
615       for (auto Jump : Block->SuccJumps) {
616         if (ignoreJump(SrcBlock, DstBlock, Jump))
617           continue;
618         LocalInDegree[Jump->Target]++;
619       }
620     };
621     fillInDegree(SrcBlock);
622     for (auto Block : UnknownBlocks) {
623       fillInDegree(Block);
624     }
625     // A loop containing SrcBlock
626     if (LocalInDegree[SrcBlock->Index] > 0)
627       return false;
628 
629     std::vector<FlowBlock *> AcyclicOrder;
630     std::queue<uint64_t> Queue;
631     Queue.push(SrcBlock->Index);
632     while (!Queue.empty()) {
633       FlowBlock *Block = &Func.Blocks[Queue.front()];
634       Queue.pop();
635       // Stop propagation once we reach DstBlock, if any
636       if (DstBlock != nullptr && Block == DstBlock)
637         break;
638 
639       // Keep an acyclic order of unknown blocks
640       if (Block->UnknownWeight && Block != SrcBlock)
641         AcyclicOrder.push_back(Block);
642 
643       // Add to the queue all successors with zero local in-degree
644       for (auto Jump : Block->SuccJumps) {
645         if (ignoreJump(SrcBlock, DstBlock, Jump))
646           continue;
647         uint64_t Dst = Jump->Target;
648         LocalInDegree[Dst]--;
649         if (LocalInDegree[Dst] == 0) {
650           Queue.push(Dst);
651         }
652       }
653     }
654 
655     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
656     // of all blocks
657     if (UnknownBlocks.size() != AcyclicOrder.size())
658       return false;
659     UnknownBlocks = AcyclicOrder;
660     return true;
661   }
662 
663   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
664   /// having UnknownBlocks intermediate blocks.
665   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
666                                 const FlowBlock *DstBlock,
667                                 const std::vector<FlowBlock *> &UnknownBlocks) {
668     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
669 
670     // Ditribute flow from the source block
671     uint64_t BlockFlow = 0;
672     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
673     for (auto Jump : SrcBlock->SuccJumps) {
674       if (ignoreJump(SrcBlock, DstBlock, Jump))
675         continue;
676       BlockFlow += Jump->Flow;
677     }
678     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
679 
680     // Ditribute flow from the remaining blocks
681     for (auto Block : UnknownBlocks) {
682       assert(Block->UnknownWeight && "incorrect unknown subgraph");
683       uint64_t BlockFlow = 0;
684       // Block's flow is the sum of incoming flows
685       for (auto Jump : Block->PredJumps) {
686         BlockFlow += Jump->Flow;
687       }
688       Block->Flow = BlockFlow;
689       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
690     }
691   }
692 
693   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
694   /// and ending at DstBlock.
695   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
696                       const FlowBlock *Block, uint64_t BlockFlow) {
697     // Process all successor jumps and update corresponding flow values
698     size_t BlockDegree = 0;
699     for (auto Jump : Block->SuccJumps) {
700       if (ignoreJump(SrcBlock, DstBlock, Jump))
701         continue;
702       BlockDegree++;
703     }
704     // If all successor jumps of the block are ignored, skip it
705     if (DstBlock == nullptr && BlockDegree == 0)
706       return;
707     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
708 
709     // Each of the Block's successors gets the following amount of flow.
710     // Rounding the value up so that all flow is propagated
711     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
712     for (auto Jump : Block->SuccJumps) {
713       if (ignoreJump(SrcBlock, DstBlock, Jump))
714         continue;
715       uint64_t Flow = std::min(SuccFlow, BlockFlow);
716       Jump->Flow = Flow;
717       BlockFlow -= Flow;
718     }
719     assert(BlockFlow == 0 && "not all flow is propagated");
720   }
721 
722   /// A constant indicating an arbitrary exit block of a function.
723   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
724 
725   /// The function.
726   FlowFunction &Func;
727 };
728 
729 /// Initializing flow network for a given function.
730 ///
731 /// Every block is split into three nodes that are responsible for (i) an
732 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
733 /// reduction of the block weight.
734 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
735   uint64_t NumBlocks = Func.Blocks.size();
736   assert(NumBlocks > 1 && "Too few blocks in a function");
737   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
738 
739   // Pre-process data: make sure the entry weight is at least 1
740   if (Func.Blocks[Func.Entry].Weight == 0) {
741     Func.Blocks[Func.Entry].Weight = 1;
742   }
743   // Introducing dummy source/sink pairs to allow flow circulation.
744   // The nodes corresponding to blocks of Func have indicies in the range
745   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
746   uint64_t S = 3 * NumBlocks;
747   uint64_t T = S + 1;
748   uint64_t S1 = S + 2;
749   uint64_t T1 = S + 3;
750 
751   Network.initialize(3 * NumBlocks + 4, S1, T1);
752 
753   // Create three nodes for every block of the function
754   for (uint64_t B = 0; B < NumBlocks; B++) {
755     auto &Block = Func.Blocks[B];
756     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
757            "non-zero weight of a block w/o weight except for an entry");
758 
759     // Split every block into two nodes
760     uint64_t Bin = 3 * B;
761     uint64_t Bout = 3 * B + 1;
762     uint64_t Baux = 3 * B + 2;
763     if (Block.Weight > 0) {
764       Network.addEdge(S1, Bout, Block.Weight, 0);
765       Network.addEdge(Bin, T1, Block.Weight, 0);
766     }
767 
768     // Edges from S and to T
769     assert((!Block.isEntry() || !Block.isExit()) &&
770            "a block cannot be an entry and an exit");
771     if (Block.isEntry()) {
772       Network.addEdge(S, Bin, 0);
773     } else if (Block.isExit()) {
774       Network.addEdge(Bout, T, 0);
775     }
776 
777     // An auxiliary node to allow increase/reduction of block counts:
778     // We assume that decreasing block counts is more expensive than increasing,
779     // and thus, setting separate costs here. In the future we may want to tune
780     // the relative costs so as to maximize the quality of generated profiles.
781     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
782     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
783     if (Block.UnknownWeight) {
784       // Do not penalize changing weights of blocks w/o known profile count
785       AuxCostInc = 0;
786       AuxCostDec = 0;
787     } else {
788       // Increasing the count for "cold" blocks with zero initial count is more
789       // expensive than for "hot" ones
790       if (Block.Weight == 0) {
791         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
792       }
793       // Modifying the count of the entry block is expensive
794       if (Block.isEntry()) {
795         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
796         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
797       }
798     }
799     // For blocks with self-edges, do not penalize a reduction of the count,
800     // as all of the increase can be attributed to the self-edge
801     if (Block.HasSelfEdge) {
802       AuxCostDec = 0;
803     }
804 
805     Network.addEdge(Bin, Baux, AuxCostInc);
806     Network.addEdge(Baux, Bout, AuxCostInc);
807     if (Block.Weight > 0) {
808       Network.addEdge(Bout, Baux, AuxCostDec);
809       Network.addEdge(Baux, Bin, AuxCostDec);
810     }
811   }
812 
813   // Creating edges for every jump
814   for (auto &Jump : Func.Jumps) {
815     uint64_t Src = Jump.Source;
816     uint64_t Dst = Jump.Target;
817     if (Src != Dst) {
818       uint64_t SrcOut = 3 * Src + 1;
819       uint64_t DstIn = 3 * Dst;
820       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
821       Network.addEdge(SrcOut, DstIn, Cost);
822     }
823   }
824 
825   // Make sure we have a valid flow circulation
826   Network.addEdge(T, S, 0);
827 }
828 
829 /// Extract resulting block and edge counts from the flow network.
830 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
831   uint64_t NumBlocks = Func.Blocks.size();
832 
833   // Extract resulting block counts
834   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
835     auto &Block = Func.Blocks[Src];
836     uint64_t SrcOut = 3 * Src + 1;
837     int64_t Flow = 0;
838     for (auto &Adj : Network.getFlow(SrcOut)) {
839       uint64_t DstIn = Adj.first;
840       int64_t DstFlow = Adj.second;
841       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
842       if (!IsAuxNode || Block.HasSelfEdge) {
843         Flow += DstFlow;
844       }
845     }
846     Block.Flow = Flow;
847     assert(Flow >= 0 && "negative block flow");
848   }
849 
850   // Extract resulting jump counts
851   for (auto &Jump : Func.Jumps) {
852     uint64_t Src = Jump.Source;
853     uint64_t Dst = Jump.Target;
854     int64_t Flow = 0;
855     if (Src != Dst) {
856       uint64_t SrcOut = 3 * Src + 1;
857       uint64_t DstIn = 3 * Dst;
858       Flow = Network.getFlow(SrcOut, DstIn);
859     } else {
860       uint64_t SrcOut = 3 * Src + 1;
861       uint64_t SrcAux = 3 * Src + 2;
862       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
863       if (AuxFlow > 0)
864         Flow = AuxFlow;
865     }
866     Jump.Flow = Flow;
867     assert(Flow >= 0 && "negative jump flow");
868   }
869 }
870 
871 #ifndef NDEBUG
872 /// Verify that the computed flow values satisfy flow conservation rules
873 void verifyWeights(const FlowFunction &Func) {
874   const uint64_t NumBlocks = Func.Blocks.size();
875   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
876   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
877   for (auto &Jump : Func.Jumps) {
878     InFlow[Jump.Target] += Jump.Flow;
879     OutFlow[Jump.Source] += Jump.Flow;
880   }
881 
882   uint64_t TotalInFlow = 0;
883   uint64_t TotalOutFlow = 0;
884   for (uint64_t I = 0; I < NumBlocks; I++) {
885     auto &Block = Func.Blocks[I];
886     if (Block.isEntry()) {
887       TotalInFlow += Block.Flow;
888       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
889     } else if (Block.isExit()) {
890       TotalOutFlow += Block.Flow;
891       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
892     } else {
893       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
894       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
895     }
896   }
897   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
898 
899   // Verify that there are no isolated flow components
900   // One could modify FlowFunction to hold edges indexed by the sources, which
901   // will avoid a creation of the object
902   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
903   for (auto &Jump : Func.Jumps) {
904     if (Jump.Flow > 0) {
905       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
906     }
907   }
908 
909   // Run BFS from the source along edges with positive flow
910   std::queue<uint64_t> Queue;
911   auto Visited = BitVector(NumBlocks, false);
912   Queue.push(Func.Entry);
913   Visited[Func.Entry] = true;
914   while (!Queue.empty()) {
915     uint64_t Src = Queue.front();
916     Queue.pop();
917     for (uint64_t Dst : PositiveFlowEdges[Src]) {
918       if (!Visited[Dst]) {
919         Queue.push(Dst);
920         Visited[Dst] = true;
921       }
922     }
923   }
924 
925   // Verify that every block that has a positive flow is reached from the source
926   // along edges with a positive flow
927   for (uint64_t I = 0; I < NumBlocks; I++) {
928     auto &Block = Func.Blocks[I];
929     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
930   }
931 }
932 #endif
933 
934 } // end of anonymous namespace
935 
936 /// Apply the profile inference algorithm for a given flow function
937 void llvm::applyFlowInference(FlowFunction &Func) {
938   // Create and apply an inference network model
939   auto InferenceNetwork = MinCostMaxFlow();
940   initializeNetwork(InferenceNetwork, Func);
941   InferenceNetwork.run();
942 
943   // Extract flow values for every block and every edge
944   extractWeights(InferenceNetwork, Func);
945 
946   // Post-processing adjustments to the flow
947   auto Adjuster = FlowAdjuster(Func);
948   Adjuster.run();
949 
950 #ifndef NDEBUG
951   // Verify the result
952   verifyWeights(Func);
953 #endif
954 }
955