xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <queue>
21 #include <set>
22 #include <stack>
23 
24 using namespace llvm;
25 #define DEBUG_TYPE "sample-profile-inference"
26 
27 namespace {
28 
29 static cl::opt<bool> SampleProfileEvenFlowDistribution(
30     "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
31     cl::desc("Try to evenly distribute flow when there are multiple equally "
32              "likely options."));
33 
34 static cl::opt<bool> SampleProfileRebalanceUnknown(
35     "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
36     cl::desc("Evenly re-distribute flow among unknown subgraphs."));
37 
38 static cl::opt<bool> SampleProfileJoinIslands(
39     "sample-profile-join-islands", cl::init(true), cl::Hidden,
40     cl::desc("Join isolated components having positive flow."));
41 
42 static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
43     "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
44     cl::desc("The cost of increasing a block's count by one."));
45 
46 static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
47     "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
48     cl::desc("The cost of decreasing a block's count by one."));
49 
50 static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
51     "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
52     cl::desc("The cost of increasing the entry block's count by one."));
53 
54 static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
55     "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
56     cl::desc("The cost of decreasing the entry block's count by one."));
57 
58 static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
59     "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
60     cl::desc("The cost of increasing a count of zero-weight block by one."));
61 
62 static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
63     "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
64     cl::desc("The cost of increasing an unknown block's count by one."));
65 
66 /// A value indicating an infinite flow/capacity/weight of a block/edge.
67 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
68 /// during the execution.
69 static constexpr int64_t INF = ((int64_t)1) << 50;
70 
71 /// The minimum-cost maximum flow algorithm.
72 ///
73 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
74 /// network using a modified version of the classical Moore-Bellman-Ford
75 /// approach. The algorithm applies a number of augmentation iterations in which
76 /// flow is sent along paths of positive capacity from the source to the sink.
77 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
78 /// where m is the number of edges, n is the number of vertices, and v(f) is the
79 /// value of the maximum flow. However, the observed running time on typical
80 /// instances is sub-quadratic, that is, o(n^2).
81 ///
82 /// The input is a set of edges with specified costs and capacities, and a pair
83 /// of nodes (source and sink). The output is the flow along each edge of the
84 /// minimum total cost respecting the given edge capacities.
85 class MinCostMaxFlow {
86 public:
87   MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
88 
89   // Initialize algorithm's data structures for a network of a given size.
90   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
91     Source = SourceNode;
92     Target = SinkNode;
93 
94     Nodes = std::vector<Node>(NodeCount);
95     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
96     if (Params.EvenFlowDistribution)
97       AugmentingEdges =
98           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
99   }
100 
101   // Run the algorithm.
102   int64_t run() {
103     LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
104 
105     // Iteratively find an augmentation path/dag in the network and send the
106     // flow along its edges
107     size_t AugmentationIters = applyFlowAugmentation();
108 
109     // Compute the total flow and its cost
110     int64_t TotalCost = 0;
111     int64_t TotalFlow = 0;
112     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
113       for (auto &Edge : Edges[Src]) {
114         if (Edge.Flow > 0) {
115           TotalCost += Edge.Cost * Edge.Flow;
116           if (Src == Source)
117             TotalFlow += Edge.Flow;
118         }
119       }
120     }
121     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
122                       << " iterations with " << TotalFlow << " total flow"
123                       << " of " << TotalCost << " cost\n");
124     (void)TotalFlow;
125     (void)AugmentationIters;
126     return TotalCost;
127   }
128 
129   /// Adding an edge to the network with a specified capacity and a cost.
130   /// Multiple edges between a pair of nodes are allowed but self-edges
131   /// are not supported.
132   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
133     assert(Capacity > 0 && "adding an edge of zero capacity");
134     assert(Src != Dst && "loop edge are not supported");
135 
136     Edge SrcEdge;
137     SrcEdge.Dst = Dst;
138     SrcEdge.Cost = Cost;
139     SrcEdge.Capacity = Capacity;
140     SrcEdge.Flow = 0;
141     SrcEdge.RevEdgeIndex = Edges[Dst].size();
142 
143     Edge DstEdge;
144     DstEdge.Dst = Src;
145     DstEdge.Cost = -Cost;
146     DstEdge.Capacity = 0;
147     DstEdge.Flow = 0;
148     DstEdge.RevEdgeIndex = Edges[Src].size();
149 
150     Edges[Src].push_back(SrcEdge);
151     Edges[Dst].push_back(DstEdge);
152   }
153 
154   /// Adding an edge to the network of infinite capacity and a given cost.
155   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
156     addEdge(Src, Dst, INF, Cost);
157   }
158 
159   /// Get the total flow from a given source node.
160   /// Returns a list of pairs (target node, amount of flow to the target).
161   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
162     std::vector<std::pair<uint64_t, int64_t>> Flow;
163     for (const auto &Edge : Edges[Src]) {
164       if (Edge.Flow > 0)
165         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
166     }
167     return Flow;
168   }
169 
170   /// Get the total flow between a pair of nodes.
171   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
172     int64_t Flow = 0;
173     for (const auto &Edge : Edges[Src]) {
174       if (Edge.Dst == Dst) {
175         Flow += Edge.Flow;
176       }
177     }
178     return Flow;
179   }
180 
181 private:
182   /// Iteratively find an augmentation path/dag in the network and send the
183   /// flow along its edges. The method returns the number of applied iterations.
184   size_t applyFlowAugmentation() {
185     size_t AugmentationIters = 0;
186     while (findAugmentingPath()) {
187       uint64_t PathCapacity = computeAugmentingPathCapacity();
188       while (PathCapacity > 0) {
189         bool Progress = false;
190         if (Params.EvenFlowDistribution) {
191           // Identify node/edge candidates for augmentation
192           identifyShortestEdges(PathCapacity);
193 
194           // Find an augmenting DAG
195           auto AugmentingOrder = findAugmentingDAG();
196 
197           // Apply the DAG augmentation
198           Progress = augmentFlowAlongDAG(AugmentingOrder);
199           PathCapacity = computeAugmentingPathCapacity();
200         }
201 
202         if (!Progress) {
203           augmentFlowAlongPath(PathCapacity);
204           PathCapacity = 0;
205         }
206 
207         AugmentationIters++;
208       }
209     }
210     return AugmentationIters;
211   }
212 
213   /// Compute the capacity of the cannonical augmenting path. If the path is
214   /// saturated (that is, no flow can be sent along the path), then return 0.
215   uint64_t computeAugmentingPathCapacity() {
216     uint64_t PathCapacity = INF;
217     uint64_t Now = Target;
218     while (Now != Source) {
219       uint64_t Pred = Nodes[Now].ParentNode;
220       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
221 
222       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
223       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
224       PathCapacity = std::min(PathCapacity, EdgeCapacity);
225 
226       Now = Pred;
227     }
228     return PathCapacity;
229   }
230 
231   /// Check for existence of an augmenting path with a positive capacity.
232   bool findAugmentingPath() {
233     // Initialize data structures
234     for (auto &Node : Nodes) {
235       Node.Distance = INF;
236       Node.ParentNode = uint64_t(-1);
237       Node.ParentEdgeIndex = uint64_t(-1);
238       Node.Taken = false;
239     }
240 
241     std::queue<uint64_t> Queue;
242     Queue.push(Source);
243     Nodes[Source].Distance = 0;
244     Nodes[Source].Taken = true;
245     while (!Queue.empty()) {
246       uint64_t Src = Queue.front();
247       Queue.pop();
248       Nodes[Src].Taken = false;
249       // Although the residual network contains edges with negative costs
250       // (in particular, backward edges), it can be shown that there are no
251       // negative-weight cycles and the following two invariants are maintained:
252       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
253       // where Dist is the length of the shortest path between two nodes. This
254       // allows to prune the search-space of the path-finding algorithm using
255       // the following early-stop criteria:
256       // -- If we find a path with zero-distance from Source to Target, stop the
257       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
258       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
259       //    process node V, as it is guaranteed _not_ to be on a shortest path
260       //    from Source to Target; it follows from inequalities
261       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
262       //                         >= Dist[Source, V]
263       if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
264         break;
265       if (Nodes[Src].Distance > Nodes[Target].Distance)
266         continue;
267 
268       // Process adjacent edges
269       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
270         auto &Edge = Edges[Src][EdgeIdx];
271         if (Edge.Flow < Edge.Capacity) {
272           uint64_t Dst = Edge.Dst;
273           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
274           if (Nodes[Dst].Distance > NewDistance) {
275             // Update the distance and the parent node/edge
276             Nodes[Dst].Distance = NewDistance;
277             Nodes[Dst].ParentNode = Src;
278             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
279             // Add the node to the queue, if it is not there yet
280             if (!Nodes[Dst].Taken) {
281               Queue.push(Dst);
282               Nodes[Dst].Taken = true;
283             }
284           }
285         }
286       }
287     }
288 
289     return Nodes[Target].Distance != INF;
290   }
291 
292   /// Update the current flow along the augmenting path.
293   void augmentFlowAlongPath(uint64_t PathCapacity) {
294     assert(PathCapacity > 0 && "found an incorrect augmenting path");
295     uint64_t Now = Target;
296     while (Now != Source) {
297       uint64_t Pred = Nodes[Now].ParentNode;
298       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
299       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
300 
301       Edge.Flow += PathCapacity;
302       RevEdge.Flow -= PathCapacity;
303 
304       Now = Pred;
305     }
306   }
307 
308   /// Find an Augmenting DAG order using a modified version of DFS in which we
309   /// can visit a node multiple times. In the DFS search, when scanning each
310   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
311   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
312   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
313   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
314   /// that starts with Source and ends with Target.
315   std::vector<uint64_t> findAugmentingDAG() {
316     // We use a stack based implemenation of DFS to avoid recursion.
317     // Defining DFS data structures:
318     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
319     //  - we are currently visiting Nodes[NodeIdx] and
320     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
321     typedef std::pair<uint64_t, uint64_t> StackItemType;
322     std::stack<StackItemType> Stack;
323     std::vector<uint64_t> AugmentingOrder;
324 
325     // Phase 0: Initialize Node attributes and Time for DFS run
326     for (auto &Node : Nodes) {
327       Node.Discovery = 0;
328       Node.Finish = 0;
329       Node.NumCalls = 0;
330       Node.Taken = false;
331     }
332     uint64_t Time = 0;
333     // Mark Target as Taken
334     // Taken attribute will be propagated backwards from Target towards Source
335     Nodes[Target].Taken = true;
336 
337     // Phase 1: Start DFS traversal from Source
338     Stack.emplace(Source, 0);
339     Nodes[Source].Discovery = ++Time;
340     while (!Stack.empty()) {
341       auto NodeIdx = Stack.top().first;
342       auto EdgeIdx = Stack.top().second;
343 
344       // If we haven't scanned all edges out of NodeIdx, continue scanning
345       if (EdgeIdx < Edges[NodeIdx].size()) {
346         auto &Edge = Edges[NodeIdx][EdgeIdx];
347         auto &Dst = Nodes[Edge.Dst];
348         Stack.top().second++;
349 
350         if (Edge.OnShortestPath) {
351           // If we haven't seen Edge.Dst so far, continue DFS search there
352           if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
353             Dst.Discovery = ++Time;
354             Stack.emplace(Edge.Dst, 0);
355             Dst.NumCalls++;
356           } else if (Dst.Taken && Dst.Finish != 0) {
357             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
358             Nodes[NodeIdx].Taken = true;
359           }
360         }
361       } else {
362         // If we are done scanning all edge out of NodeIdx
363         Stack.pop();
364         // If we haven't found a path from NodeIdx to Target, forget about it
365         if (!Nodes[NodeIdx].Taken) {
366           Nodes[NodeIdx].Discovery = 0;
367         } else {
368           // If we have found a path from NodeIdx to Target, then finish NodeIdx
369           // and propagate Taken flag to DFS parent unless at the Source
370           Nodes[NodeIdx].Finish = ++Time;
371           // NodeIdx == Source if and only if the stack is empty
372           if (NodeIdx != Source) {
373             assert(!Stack.empty() && "empty stack while running dfs");
374             Nodes[Stack.top().first].Taken = true;
375           }
376           AugmentingOrder.push_back(NodeIdx);
377         }
378       }
379     }
380     // Nodes are collected decreasing Finish time, so the order is reversed
381     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
382 
383     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
384     for (size_t Src : AugmentingOrder) {
385       AugmentingEdges[Src].clear();
386       for (auto &Edge : Edges[Src]) {
387         uint64_t Dst = Edge.Dst;
388         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
389             Nodes[Dst].Finish < Nodes[Src].Finish) {
390           AugmentingEdges[Src].push_back(&Edge);
391         }
392       }
393       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
394              "incorrectly constructed augmenting edges");
395     }
396 
397     return AugmentingOrder;
398   }
399 
400   /// Update the current flow along the given (acyclic) subgraph specified by
401   /// the vertex order, AugmentingOrder. The objective is to send as much flow
402   /// as possible while evenly distributing flow among successors of each node.
403   /// After the update at least one edge is saturated.
404   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
405     // Phase 0: Initialization
406     for (uint64_t Src : AugmentingOrder) {
407       Nodes[Src].FracFlow = 0;
408       Nodes[Src].IntFlow = 0;
409       for (auto &Edge : AugmentingEdges[Src]) {
410         Edge->AugmentedFlow = 0;
411       }
412     }
413 
414     // Phase 1: Send a unit of fractional flow along the DAG
415     uint64_t MaxFlowAmount = INF;
416     Nodes[Source].FracFlow = 1.0;
417     for (uint64_t Src : AugmentingOrder) {
418       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
419              "incorrectly computed fractional flow");
420       // Distribute flow evenly among successors of Src
421       uint64_t Degree = AugmentingEdges[Src].size();
422       for (auto &Edge : AugmentingEdges[Src]) {
423         double EdgeFlow = Nodes[Src].FracFlow / Degree;
424         Nodes[Edge->Dst].FracFlow += EdgeFlow;
425         if (Edge->Capacity == INF)
426           continue;
427         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
428         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
429       }
430     }
431     // Stop early if we cannot send any (integral) flow from Source to Target
432     if (MaxFlowAmount == 0)
433       return false;
434 
435     // Phase 2: Send an integral flow of MaxFlowAmount
436     Nodes[Source].IntFlow = MaxFlowAmount;
437     for (uint64_t Src : AugmentingOrder) {
438       if (Src == Target)
439         break;
440       // Distribute flow evenly among successors of Src, rounding up to make
441       // sure all flow is sent
442       uint64_t Degree = AugmentingEdges[Src].size();
443       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
444       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
445       for (auto &Edge : AugmentingEdges[Src]) {
446         uint64_t Dst = Edge->Dst;
447         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
448         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
449         Nodes[Dst].IntFlow += EdgeFlow;
450         Nodes[Src].IntFlow -= EdgeFlow;
451         Edge->AugmentedFlow += EdgeFlow;
452       }
453     }
454     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
455     Nodes[Target].IntFlow = 0;
456 
457     // Phase 3: Send excess flow back traversing the nodes backwards.
458     // Because of rounding, not all flow can be sent along the edges of Src.
459     // Hence, sending the remaining flow back to maintain flow conservation
460     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
461       uint64_t Src = AugmentingOrder[Idx - 1];
462       // Try to send excess flow back along each edge.
463       // Make sure we only send back flow we just augmented (AugmentedFlow).
464       for (auto &Edge : AugmentingEdges[Src]) {
465         uint64_t Dst = Edge->Dst;
466         if (Nodes[Dst].IntFlow == 0)
467           continue;
468         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
469         Nodes[Dst].IntFlow -= EdgeFlow;
470         Nodes[Src].IntFlow += EdgeFlow;
471         Edge->AugmentedFlow -= EdgeFlow;
472       }
473     }
474 
475     // Phase 4: Update flow values along all edges
476     bool HasSaturatedEdges = false;
477     for (uint64_t Src : AugmentingOrder) {
478       // Verify that we have sent all the excess flow from the node
479       assert(Src == Source || Nodes[Src].IntFlow == 0);
480       for (auto &Edge : AugmentingEdges[Src]) {
481         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
482         // Update flow values along the edge and its reverse copy
483         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
484         Edge->Flow += Edge->AugmentedFlow;
485         RevEdge.Flow -= Edge->AugmentedFlow;
486         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
487           HasSaturatedEdges = true;
488       }
489     }
490 
491     // The augmentation is successful iff at least one edge becomes saturated
492     return HasSaturatedEdges;
493   }
494 
495   /// Identify candidate (shortest) edges for augmentation.
496   void identifyShortestEdges(uint64_t PathCapacity) {
497     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
498     // To make sure the augmentation DAG contains only edges with large residual
499     // capacity, we prune all edges whose capacity is below a fraction of
500     // the capacity of the augmented path.
501     // (All edges of the path itself are always in the DAG)
502     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
503 
504     // Decide which edges are on a shortest path from Source to Target
505     for (size_t Src = 0; Src < Nodes.size(); Src++) {
506       // An edge cannot be augmenting if the endpoint has large distance
507       if (Nodes[Src].Distance > Nodes[Target].Distance)
508         continue;
509 
510       for (auto &Edge : Edges[Src]) {
511         uint64_t Dst = Edge.Dst;
512         Edge.OnShortestPath =
513             Src != Target && Dst != Source &&
514             Nodes[Dst].Distance <= Nodes[Target].Distance &&
515             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
516             Edge.Capacity > Edge.Flow &&
517             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
518       }
519     }
520   }
521 
522   /// Maximum number of DFS iterations for DAG finding.
523   static constexpr uint64_t MaxDfsCalls = 10;
524 
525   /// A node in a flow network.
526   struct Node {
527     /// The cost of the cheapest path from the source to the current node.
528     int64_t Distance;
529     /// The node preceding the current one in the path.
530     uint64_t ParentNode;
531     /// The index of the edge between ParentNode and the current node.
532     uint64_t ParentEdgeIndex;
533     /// An indicator of whether the current node is in a queue.
534     bool Taken;
535 
536     /// Data fields utilized in DAG-augmentation:
537     /// Fractional flow.
538     double FracFlow;
539     /// Integral flow.
540     uint64_t IntFlow;
541     /// Discovery time.
542     uint64_t Discovery;
543     /// Finish time.
544     uint64_t Finish;
545     /// NumCalls.
546     uint64_t NumCalls;
547   };
548 
549   /// An edge in a flow network.
550   struct Edge {
551     /// The cost of the edge.
552     int64_t Cost;
553     /// The capacity of the edge.
554     int64_t Capacity;
555     /// The current flow on the edge.
556     int64_t Flow;
557     /// The destination node of the edge.
558     uint64_t Dst;
559     /// The index of the reverse edge between Dst and the current node.
560     uint64_t RevEdgeIndex;
561 
562     /// Data fields utilized in DAG-augmentation:
563     /// Whether the edge is currently on a shortest path from Source to Target.
564     bool OnShortestPath;
565     /// Extra flow along the edge.
566     uint64_t AugmentedFlow;
567   };
568 
569   /// The set of network nodes.
570   std::vector<Node> Nodes;
571   /// The set of network edges.
572   std::vector<std::vector<Edge>> Edges;
573   /// Source node of the flow.
574   uint64_t Source;
575   /// Target (sink) node of the flow.
576   uint64_t Target;
577   /// Augmenting edges.
578   std::vector<std::vector<Edge *>> AugmentingEdges;
579   /// Params for flow computation.
580   const ProfiParams &Params;
581 };
582 
583 /// A post-processing adjustment of the control flow. It applies two steps by
584 /// rerouting some flow and making it more realistic:
585 ///
586 /// - First, it removes all isolated components ("islands") with a positive flow
587 ///   that are unreachable from the entry block. For every such component, we
588 ///   find the shortest from the entry to an exit passing through the component,
589 ///   and increase the flow by one unit along the path.
590 ///
591 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
592 ///   with no sampled counts. Then it rebalnces the flow that goes through such
593 ///   a subgraph so that each branch is taken with probability 50%.
594 ///   An unknown subgraph is such that for every two nodes u and v:
595 ///     - u dominates v and u is not unknown;
596 ///     - v post-dominates u; and
597 ///     - all inner-nodes of all (u,v)-paths are unknown.
598 ///
599 class FlowAdjuster {
600 public:
601   FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
602       : Params(Params), Func(Func) {}
603 
604   /// Apply the post-processing.
605   void run() {
606     if (Params.JoinIslands) {
607       // Adjust the flow to get rid of isolated components
608       joinIsolatedComponents();
609     }
610 
611     if (Params.RebalanceUnknown) {
612       // Rebalance the flow inside unknown subgraphs
613       rebalanceUnknownSubgraphs();
614     }
615   }
616 
617 private:
618   void joinIsolatedComponents() {
619     // Find blocks that are reachable from the source
620     auto Visited = BitVector(NumBlocks(), false);
621     findReachable(Func.Entry, Visited);
622 
623     // Iterate over all non-reachable blocks and adjust their weights
624     for (uint64_t I = 0; I < NumBlocks(); I++) {
625       auto &Block = Func.Blocks[I];
626       if (Block.Flow > 0 && !Visited[I]) {
627         // Find a path from the entry to an exit passing through the block I
628         auto Path = findShortestPath(I);
629         // Increase the flow along the path
630         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
631                "incorrectly computed path adjusting control flow");
632         Func.Blocks[Func.Entry].Flow += 1;
633         for (auto &Jump : Path) {
634           Jump->Flow += 1;
635           Func.Blocks[Jump->Target].Flow += 1;
636           // Update reachability
637           findReachable(Jump->Target, Visited);
638         }
639       }
640     }
641   }
642 
643   /// Run BFS from a given block along the jumps with a positive flow and mark
644   /// all reachable blocks.
645   void findReachable(uint64_t Src, BitVector &Visited) {
646     if (Visited[Src])
647       return;
648     std::queue<uint64_t> Queue;
649     Queue.push(Src);
650     Visited[Src] = true;
651     while (!Queue.empty()) {
652       Src = Queue.front();
653       Queue.pop();
654       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
655         uint64_t Dst = Jump->Target;
656         if (Jump->Flow > 0 && !Visited[Dst]) {
657           Queue.push(Dst);
658           Visited[Dst] = true;
659         }
660       }
661     }
662   }
663 
664   /// Find the shortest path from the entry block to an exit block passing
665   /// through a given block.
666   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
667     // A path from the entry block to BlockIdx
668     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
669     // A path from BlockIdx to an exit block
670     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
671 
672     // Concatenate the two paths
673     std::vector<FlowJump *> Result;
674     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
675     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
676     return Result;
677   }
678 
679   /// Apply the Dijkstra algorithm to find the shortest path from a given
680   /// Source to a given Target block.
681   /// If Target == -1, then the path ends at an exit block.
682   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
683     // Quit early, if possible
684     if (Source == Target)
685       return std::vector<FlowJump *>();
686     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
687       return std::vector<FlowJump *>();
688 
689     // Initialize data structures
690     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
691     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
692     Distance[Source] = 0;
693     std::set<std::pair<uint64_t, uint64_t>> Queue;
694     Queue.insert(std::make_pair(Distance[Source], Source));
695 
696     // Run the Dijkstra algorithm
697     while (!Queue.empty()) {
698       uint64_t Src = Queue.begin()->second;
699       Queue.erase(Queue.begin());
700       // If we found a solution, quit early
701       if (Src == Target ||
702           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
703         break;
704 
705       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
706         uint64_t Dst = Jump->Target;
707         int64_t JumpDist = jumpDistance(Jump);
708         if (Distance[Dst] > Distance[Src] + JumpDist) {
709           Queue.erase(std::make_pair(Distance[Dst], Dst));
710 
711           Distance[Dst] = Distance[Src] + JumpDist;
712           Parent[Dst] = Jump;
713 
714           Queue.insert(std::make_pair(Distance[Dst], Dst));
715         }
716       }
717     }
718     // If Target is not provided, find the closest exit block
719     if (Target == AnyExitBlock) {
720       for (uint64_t I = 0; I < NumBlocks(); I++) {
721         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
722           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
723             Target = I;
724           }
725         }
726       }
727     }
728     assert(Parent[Target] != nullptr && "a path does not exist");
729 
730     // Extract the constructed path
731     std::vector<FlowJump *> Result;
732     uint64_t Now = Target;
733     while (Now != Source) {
734       assert(Now == Parent[Now]->Target && "incorrect parent jump");
735       Result.push_back(Parent[Now]);
736       Now = Parent[Now]->Source;
737     }
738     // Reverse the path, since it is extracted from Target to Source
739     std::reverse(Result.begin(), Result.end());
740     return Result;
741   }
742 
743   /// A distance of a path for a given jump.
744   /// In order to incite the path to use blocks/jumps with large positive flow,
745   /// and avoid changing branch probability of outgoing edges drastically,
746   /// set the jump distance so as:
747   ///   - to minimize the number of unlikely jumps used and subject to that,
748   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
749   ///   - minimizes total multiplicative Flow increase for the remaining edges.
750   /// To capture this objective with integer distances, we round off fractional
751   /// parts to a multiple of 1 / BaseDistance.
752   int64_t jumpDistance(FlowJump *Jump) const {
753     if (Jump->IsUnlikely)
754       return Params.CostUnlikely;
755     uint64_t BaseDistance =
756         std::max(FlowAdjuster::MinBaseDistance,
757                  std::min(Func.Blocks[Func.Entry].Flow,
758                           Params.CostUnlikely / (2 * (NumBlocks() + 1))));
759     if (Jump->Flow > 0)
760       return BaseDistance + BaseDistance / Jump->Flow;
761     return 2 * BaseDistance * (NumBlocks() + 1);
762   };
763 
764   uint64_t NumBlocks() const { return Func.Blocks.size(); }
765 
766   /// Rebalance unknown subgraphs so that the flow is split evenly across the
767   /// outgoing branches of every block of the subgraph. The method iterates over
768   /// blocks with known weight and identifies unknown subgraphs rooted at the
769   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
770   void rebalanceUnknownSubgraphs() {
771     // Try to find unknown subgraphs from each block
772     for (const FlowBlock &SrcBlock : Func.Blocks) {
773       // Verify if rebalancing rooted at SrcBlock is feasible
774       if (!canRebalanceAtRoot(&SrcBlock))
775         continue;
776 
777       // Find an unknown subgraphs starting at SrcBlock. Along the way,
778       // fill in known destinations and intermediate unknown blocks.
779       std::vector<FlowBlock *> UnknownBlocks;
780       std::vector<FlowBlock *> KnownDstBlocks;
781       findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
782 
783       // Verify if rebalancing of the subgraph is feasible. If the search is
784       // successful, find the unique destination block (which can be null)
785       FlowBlock *DstBlock = nullptr;
786       if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
787                                 DstBlock))
788         continue;
789 
790       // We cannot rebalance subgraphs containing cycles among unknown blocks
791       if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
792         continue;
793 
794       // Rebalance the flow
795       rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
796     }
797   }
798 
799   /// Verify if rebalancing rooted at a given block is possible.
800   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
801     // Do not attempt to find unknown subgraphs from an unknown or a
802     // zero-flow block
803     if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
804       return false;
805 
806     // Do not attempt to process subgraphs from a block w/o unknown sucessors
807     bool HasUnknownSuccs = false;
808     for (auto *Jump : SrcBlock->SuccJumps) {
809       if (Func.Blocks[Jump->Target].HasUnknownWeight) {
810         HasUnknownSuccs = true;
811         break;
812       }
813     }
814     if (!HasUnknownSuccs)
815       return false;
816 
817     return true;
818   }
819 
820   /// Find an unknown subgraph starting at block SrcBlock. The method sets
821   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
822   void findUnknownSubgraph(const FlowBlock *SrcBlock,
823                            std::vector<FlowBlock *> &KnownDstBlocks,
824                            std::vector<FlowBlock *> &UnknownBlocks) {
825     // Run BFS from SrcBlock and make sure all paths are going through unknown
826     // blocks and end at a known DstBlock
827     auto Visited = BitVector(NumBlocks(), false);
828     std::queue<uint64_t> Queue;
829 
830     Queue.push(SrcBlock->Index);
831     Visited[SrcBlock->Index] = true;
832     while (!Queue.empty()) {
833       auto &Block = Func.Blocks[Queue.front()];
834       Queue.pop();
835       // Process blocks reachable from Block
836       for (auto *Jump : Block.SuccJumps) {
837         // If Jump can be ignored, skip it
838         if (ignoreJump(SrcBlock, nullptr, Jump))
839           continue;
840 
841         uint64_t Dst = Jump->Target;
842         // If Dst has been visited, skip Jump
843         if (Visited[Dst])
844           continue;
845         // Process block Dst
846         Visited[Dst] = true;
847         if (!Func.Blocks[Dst].HasUnknownWeight) {
848           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
849         } else {
850           Queue.push(Dst);
851           UnknownBlocks.push_back(&Func.Blocks[Dst]);
852         }
853       }
854     }
855   }
856 
857   /// Verify if rebalancing of the subgraph is feasible. If the checks are
858   /// successful, set the unique destination block, DstBlock (can be null).
859   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
860                             const std::vector<FlowBlock *> &KnownDstBlocks,
861                             const std::vector<FlowBlock *> &UnknownBlocks,
862                             FlowBlock *&DstBlock) {
863     // If the list of unknown blocks is empty, we don't need rebalancing
864     if (UnknownBlocks.empty())
865       return false;
866 
867     // If there are multiple known sinks, we can't rebalance
868     if (KnownDstBlocks.size() > 1)
869       return false;
870     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
871 
872     // Verify sinks of the subgraph
873     for (auto *Block : UnknownBlocks) {
874       if (Block->SuccJumps.empty()) {
875         // If there are multiple (known and unknown) sinks, we can't rebalance
876         if (DstBlock != nullptr)
877           return false;
878         continue;
879       }
880       size_t NumIgnoredJumps = 0;
881       for (auto *Jump : Block->SuccJumps) {
882         if (ignoreJump(SrcBlock, DstBlock, Jump))
883           NumIgnoredJumps++;
884       }
885       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
886       // then we can't rebalance
887       if (NumIgnoredJumps == Block->SuccJumps.size())
888         return false;
889     }
890 
891     return true;
892   }
893 
894   /// Decide whether the Jump is ignored while processing an unknown subgraphs
895   /// rooted at basic block SrcBlock with the destination block, DstBlock.
896   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
897                   const FlowJump *Jump) {
898     // Ignore unlikely jumps with zero flow
899     if (Jump->IsUnlikely && Jump->Flow == 0)
900       return true;
901 
902     auto JumpSource = &Func.Blocks[Jump->Source];
903     auto JumpTarget = &Func.Blocks[Jump->Target];
904 
905     // Do not ignore jumps coming into DstBlock
906     if (DstBlock != nullptr && JumpTarget == DstBlock)
907       return false;
908 
909     // Ignore jumps out of SrcBlock to known blocks
910     if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
911       return true;
912 
913     // Ignore jumps to known blocks with zero flow
914     if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
915       return true;
916 
917     return false;
918   }
919 
920   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
921   /// UnknownBlocks in the topological order (so that all jumps are "forward").
922   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
923                          std::vector<FlowBlock *> &UnknownBlocks) {
924     // Extract local in-degrees in the considered subgraph
925     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
926     auto fillInDegree = [&](const FlowBlock *Block) {
927       for (auto *Jump : Block->SuccJumps) {
928         if (ignoreJump(SrcBlock, DstBlock, Jump))
929           continue;
930         LocalInDegree[Jump->Target]++;
931       }
932     };
933     fillInDegree(SrcBlock);
934     for (auto *Block : UnknownBlocks) {
935       fillInDegree(Block);
936     }
937     // A loop containing SrcBlock
938     if (LocalInDegree[SrcBlock->Index] > 0)
939       return false;
940 
941     std::vector<FlowBlock *> AcyclicOrder;
942     std::queue<uint64_t> Queue;
943     Queue.push(SrcBlock->Index);
944     while (!Queue.empty()) {
945       FlowBlock *Block = &Func.Blocks[Queue.front()];
946       Queue.pop();
947       // Stop propagation once we reach DstBlock, if any
948       if (DstBlock != nullptr && Block == DstBlock)
949         break;
950 
951       // Keep an acyclic order of unknown blocks
952       if (Block->HasUnknownWeight && Block != SrcBlock)
953         AcyclicOrder.push_back(Block);
954 
955       // Add to the queue all successors with zero local in-degree
956       for (auto *Jump : Block->SuccJumps) {
957         if (ignoreJump(SrcBlock, DstBlock, Jump))
958           continue;
959         uint64_t Dst = Jump->Target;
960         LocalInDegree[Dst]--;
961         if (LocalInDegree[Dst] == 0) {
962           Queue.push(Dst);
963         }
964       }
965     }
966 
967     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
968     // of all blocks
969     if (UnknownBlocks.size() != AcyclicOrder.size())
970       return false;
971     UnknownBlocks = AcyclicOrder;
972     return true;
973   }
974 
975   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
976   /// having UnknownBlocks intermediate blocks.
977   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
978                                 const FlowBlock *DstBlock,
979                                 const std::vector<FlowBlock *> &UnknownBlocks) {
980     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
981 
982     // Ditribute flow from the source block
983     uint64_t BlockFlow = 0;
984     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
985     for (auto *Jump : SrcBlock->SuccJumps) {
986       if (ignoreJump(SrcBlock, DstBlock, Jump))
987         continue;
988       BlockFlow += Jump->Flow;
989     }
990     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
991 
992     // Ditribute flow from the remaining blocks
993     for (auto *Block : UnknownBlocks) {
994       assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
995       uint64_t BlockFlow = 0;
996       // Block's flow is the sum of incoming flows
997       for (auto *Jump : Block->PredJumps) {
998         BlockFlow += Jump->Flow;
999       }
1000       Block->Flow = BlockFlow;
1001       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1002     }
1003   }
1004 
1005   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1006   /// and ending at DstBlock.
1007   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1008                       const FlowBlock *Block, uint64_t BlockFlow) {
1009     // Process all successor jumps and update corresponding flow values
1010     size_t BlockDegree = 0;
1011     for (auto *Jump : Block->SuccJumps) {
1012       if (ignoreJump(SrcBlock, DstBlock, Jump))
1013         continue;
1014       BlockDegree++;
1015     }
1016     // If all successor jumps of the block are ignored, skip it
1017     if (DstBlock == nullptr && BlockDegree == 0)
1018       return;
1019     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1020 
1021     // Each of the Block's successors gets the following amount of flow.
1022     // Rounding the value up so that all flow is propagated
1023     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1024     for (auto *Jump : Block->SuccJumps) {
1025       if (ignoreJump(SrcBlock, DstBlock, Jump))
1026         continue;
1027       uint64_t Flow = std::min(SuccFlow, BlockFlow);
1028       Jump->Flow = Flow;
1029       BlockFlow -= Flow;
1030     }
1031     assert(BlockFlow == 0 && "not all flow is propagated");
1032   }
1033 
1034   /// A constant indicating an arbitrary exit block of a function.
1035   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1036   /// Minimum BaseDistance for the jump distance values in island joining.
1037   static constexpr uint64_t MinBaseDistance = 10000;
1038 
1039   /// Params for flow computation.
1040   const ProfiParams &Params;
1041   /// The function.
1042   FlowFunction &Func;
1043 };
1044 
1045 std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1046                                              const FlowBlock &Block);
1047 std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1048                                             const FlowJump &Jump);
1049 
1050 /// Initializing flow network for a given function.
1051 ///
1052 /// Every block is split into two nodes that are responsible for (i) an
1053 /// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1054 /// reduction of the block weight.
1055 void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1056                        FlowFunction &Func) {
1057   uint64_t NumBlocks = Func.Blocks.size();
1058   assert(NumBlocks > 1 && "Too few blocks in a function");
1059   uint64_t NumJumps = Func.Jumps.size();
1060   assert(NumJumps > 0 && "Too few jumps in a function");
1061 
1062   // Introducing dummy source/sink pairs to allow flow circulation.
1063   // The nodes corresponding to blocks of the function have indicies in
1064   // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1065   // next four values.
1066   uint64_t S = 2 * NumBlocks;
1067   uint64_t T = S + 1;
1068   uint64_t S1 = S + 2;
1069   uint64_t T1 = S + 3;
1070 
1071   Network.initialize(2 * NumBlocks + 4, S1, T1);
1072 
1073   // Initialize nodes of the flow network
1074   for (uint64_t B = 0; B < NumBlocks; B++) {
1075     auto &Block = Func.Blocks[B];
1076 
1077     // Split every block into two auxiliary nodes to allow
1078     // increase/reduction of the block count.
1079     uint64_t Bin = 2 * B;
1080     uint64_t Bout = 2 * B + 1;
1081 
1082     // Edges from S and to T
1083     if (Block.isEntry()) {
1084       Network.addEdge(S, Bin, 0);
1085     } else if (Block.isExit()) {
1086       Network.addEdge(Bout, T, 0);
1087     }
1088 
1089     // Assign costs for increasing/decreasing the block counts
1090     auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1091 
1092     // Add the corresponding edges to the network
1093     Network.addEdge(Bin, Bout, AuxCostInc);
1094     if (Block.Weight > 0) {
1095       Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
1096       Network.addEdge(S1, Bout, Block.Weight, 0);
1097       Network.addEdge(Bin, T1, Block.Weight, 0);
1098     }
1099   }
1100 
1101   // Initialize edges of the flow network
1102   for (uint64_t J = 0; J < NumJumps; J++) {
1103     auto &Jump = Func.Jumps[J];
1104 
1105     // Get the endpoints corresponding to the jump
1106     uint64_t Jin = 2 * Jump.Source + 1;
1107     uint64_t Jout = 2 * Jump.Target;
1108 
1109     // Assign costs for increasing/decreasing the jump counts
1110     auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1111 
1112     // Add the corresponding edges to the network
1113     Network.addEdge(Jin, Jout, AuxCostInc);
1114     if (Jump.Weight > 0) {
1115       Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1116       Network.addEdge(S1, Jout, Jump.Weight, 0);
1117       Network.addEdge(Jin, T1, Jump.Weight, 0);
1118     }
1119   }
1120 
1121   // Make sure we have a valid flow circulation
1122   Network.addEdge(T, S, 0);
1123 }
1124 
1125 /// Assign costs for increasing/decreasing the block counts.
1126 std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1127                                              const FlowBlock &Block) {
1128   // Modifying the weight of an unlikely block is expensive
1129   if (Block.IsUnlikely)
1130     return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1131 
1132   // Assign default values for the costs
1133   int64_t CostInc = Params.CostBlockInc;
1134   int64_t CostDec = Params.CostBlockDec;
1135   // Update the costs depending on the block metadata
1136   if (Block.HasUnknownWeight) {
1137     CostInc = Params.CostBlockUnknownInc;
1138     CostDec = 0;
1139   } else {
1140     // Increasing the count for "cold" blocks with zero initial count is more
1141     // expensive than for "hot" ones
1142     if (Block.Weight == 0)
1143       CostInc = Params.CostBlockZeroInc;
1144     // Modifying the count of the entry block is expensive
1145     if (Block.isEntry()) {
1146       CostInc = Params.CostBlockEntryInc;
1147       CostDec = Params.CostBlockEntryDec;
1148     }
1149   }
1150   return std::make_pair(CostInc, CostDec);
1151 }
1152 
1153 /// Assign costs for increasing/decreasing the jump counts.
1154 std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1155                                             const FlowJump &Jump) {
1156   // Modifying the weight of an unlikely jump is expensive
1157   if (Jump.IsUnlikely)
1158     return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1159 
1160   // Assign default values for the costs
1161   int64_t CostInc = Params.CostJumpInc;
1162   int64_t CostDec = Params.CostJumpDec;
1163   // Update the costs depending on the block metadata
1164   if (Jump.Source + 1 == Jump.Target) {
1165     // Adjusting the fall-through branch
1166     CostInc = Params.CostJumpFTInc;
1167     CostDec = Params.CostJumpFTDec;
1168   }
1169   if (Jump.HasUnknownWeight) {
1170     // The cost is different for fall-through and non-fall-through branches
1171     if (Jump.Source + 1 == Jump.Target)
1172       CostInc = Params.CostJumpUnknownFTInc;
1173     else
1174       CostInc = Params.CostJumpUnknownInc;
1175     CostDec = 0;
1176   } else {
1177     assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
1178   }
1179   return std::make_pair(CostInc, CostDec);
1180 }
1181 
1182 /// Extract resulting block and edge counts from the flow network.
1183 void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1184                     FlowFunction &Func) {
1185   uint64_t NumBlocks = Func.Blocks.size();
1186   uint64_t NumJumps = Func.Jumps.size();
1187 
1188   // Extract resulting jump counts
1189   for (uint64_t J = 0; J < NumJumps; J++) {
1190     auto &Jump = Func.Jumps[J];
1191     uint64_t SrcOut = 2 * Jump.Source + 1;
1192     uint64_t DstIn = 2 * Jump.Target;
1193 
1194     int64_t Flow = 0;
1195     int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1196     if (Jump.Source != Jump.Target)
1197       Flow = int64_t(Jump.Weight) + AuxFlow;
1198     else
1199       Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1200 
1201     Jump.Flow = Flow;
1202     assert(Flow >= 0 && "negative jump flow");
1203   }
1204 
1205   // Extract resulting block counts
1206   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1207   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1208   for (auto &Jump : Func.Jumps) {
1209     InFlow[Jump.Target] += Jump.Flow;
1210     OutFlow[Jump.Source] += Jump.Flow;
1211   }
1212   for (uint64_t B = 0; B < NumBlocks; B++) {
1213     auto &Block = Func.Blocks[B];
1214     Block.Flow = std::max(OutFlow[B], InFlow[B]);
1215   }
1216 }
1217 
1218 #ifndef NDEBUG
1219 /// Verify that the provided block/jump weights are as expected.
1220 void verifyInput(const FlowFunction &Func) {
1221   // Verify the entry block
1222   assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1223   for (size_t I = 1; I < Func.Blocks.size(); I++) {
1224     assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1225   }
1226   // Verify CFG jumps
1227   for (auto &Block : Func.Blocks) {
1228     assert((!Block.isEntry() || !Block.isExit()) &&
1229            "a block cannot be an entry and an exit");
1230   }
1231   // Verify input block weights
1232   for (auto &Block : Func.Blocks) {
1233     assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1234            "non-zero weight of a block w/o weight except for an entry");
1235   }
1236   // Verify input jump weights
1237   for (auto &Jump : Func.Jumps) {
1238     assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
1239            "non-zero weight of a jump w/o weight");
1240   }
1241 }
1242 
1243 /// Verify that the computed flow values satisfy flow conservation rules.
1244 void verifyOutput(const FlowFunction &Func) {
1245   const uint64_t NumBlocks = Func.Blocks.size();
1246   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1247   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1248   for (const auto &Jump : Func.Jumps) {
1249     InFlow[Jump.Target] += Jump.Flow;
1250     OutFlow[Jump.Source] += Jump.Flow;
1251   }
1252 
1253   uint64_t TotalInFlow = 0;
1254   uint64_t TotalOutFlow = 0;
1255   for (uint64_t I = 0; I < NumBlocks; I++) {
1256     auto &Block = Func.Blocks[I];
1257     if (Block.isEntry()) {
1258       TotalInFlow += Block.Flow;
1259       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1260     } else if (Block.isExit()) {
1261       TotalOutFlow += Block.Flow;
1262       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1263     } else {
1264       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1265       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1266     }
1267   }
1268   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1269 
1270   // Verify that there are no isolated flow components
1271   // One could modify FlowFunction to hold edges indexed by the sources, which
1272   // will avoid a creation of the object
1273   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1274   for (const auto &Jump : Func.Jumps) {
1275     if (Jump.Flow > 0) {
1276       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1277     }
1278   }
1279 
1280   // Run BFS from the source along edges with positive flow
1281   std::queue<uint64_t> Queue;
1282   auto Visited = BitVector(NumBlocks, false);
1283   Queue.push(Func.Entry);
1284   Visited[Func.Entry] = true;
1285   while (!Queue.empty()) {
1286     uint64_t Src = Queue.front();
1287     Queue.pop();
1288     for (uint64_t Dst : PositiveFlowEdges[Src]) {
1289       if (!Visited[Dst]) {
1290         Queue.push(Dst);
1291         Visited[Dst] = true;
1292       }
1293     }
1294   }
1295 
1296   // Verify that every block that has a positive flow is reached from the source
1297   // along edges with a positive flow
1298   for (uint64_t I = 0; I < NumBlocks; I++) {
1299     auto &Block = Func.Blocks[I];
1300     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1301   }
1302 }
1303 #endif
1304 
1305 } // end of anonymous namespace
1306 
1307 /// Apply the profile inference algorithm for a given function
1308 void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
1309 #ifndef NDEBUG
1310   // Verify the input data
1311   verifyInput(Func);
1312 #endif
1313 
1314   // Create and apply an inference network model
1315   auto InferenceNetwork = MinCostMaxFlow(Params);
1316   initializeNetwork(Params, InferenceNetwork, Func);
1317   InferenceNetwork.run();
1318 
1319   // Extract flow values for every block and every edge
1320   extractWeights(Params, InferenceNetwork, Func);
1321 
1322   // Post-processing adjustments to the flow
1323   auto Adjuster = FlowAdjuster(Params, Func);
1324   Adjuster.run();
1325 
1326 #ifndef NDEBUG
1327   // Verify the result
1328   verifyOutput(Func);
1329 #endif
1330 }
1331 
1332 /// Apply the profile inference algorithm for a given flow function
1333 void llvm::applyFlowInference(FlowFunction &Func) {
1334   ProfiParams Params;
1335   // Set the params from the command-line flags.
1336   Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1337   Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1338   Params.JoinIslands = SampleProfileJoinIslands;
1339   Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1340   Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1341   Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1342   Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1343   Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1344   Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1345 
1346   applyFlowInference(Params, Func);
1347 }
1348