xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 044243fcc9b4c639cf5655e37b98478bcb312590)
1  //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements a profile inference algorithm. Given an incomplete and
10  // possibly imprecise block counts, the algorithm reconstructs realistic block
11  // and edge counts that satisfy flow conservation rules, while minimally modify
12  // input block counts.
13  //
14  //===----------------------------------------------------------------------===//
15  
16  #include "llvm/Transforms/Utils/SampleProfileInference.h"
17  #include "llvm/ADT/BitVector.h"
18  #include "llvm/Support/CommandLine.h"
19  #include "llvm/Support/Debug.h"
20  #include <queue>
21  #include <set>
22  #include <stack>
23  #include <unordered_set>
24  
25  using namespace llvm;
26  #define DEBUG_TYPE "sample-profile-inference"
27  
28  namespace {
29  
30  static cl::opt<bool> SampleProfileEvenFlowDistribution(
31      "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
32      cl::desc("Try to evenly distribute flow when there are multiple equally "
33               "likely options."));
34  
35  static cl::opt<bool> SampleProfileRebalanceUnknown(
36      "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
37      cl::desc("Evenly re-distribute flow among unknown subgraphs."));
38  
39  static cl::opt<bool> SampleProfileJoinIslands(
40      "sample-profile-join-islands", cl::init(true), cl::Hidden,
41      cl::desc("Join isolated components having positive flow."));
42  
43  static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
44      "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
45      cl::desc("The cost of increasing a block's count by one."));
46  
47  static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
48      "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
49      cl::desc("The cost of decreasing a block's count by one."));
50  
51  static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
52      "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
53      cl::desc("The cost of increasing the entry block's count by one."));
54  
55  static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
56      "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
57      cl::desc("The cost of decreasing the entry block's count by one."));
58  
59  static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
60      "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
61      cl::desc("The cost of increasing a count of zero-weight block by one."));
62  
63  static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
64      "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
65      cl::desc("The cost of increasing an unknown block's count by one."));
66  
67  /// A value indicating an infinite flow/capacity/weight of a block/edge.
68  /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
69  /// during the execution.
70  static constexpr int64_t INF = ((int64_t)1) << 50;
71  
72  /// The minimum-cost maximum flow algorithm.
73  ///
74  /// The algorithm finds the maximum flow of minimum cost on a given (directed)
75  /// network using a modified version of the classical Moore-Bellman-Ford
76  /// approach. The algorithm applies a number of augmentation iterations in which
77  /// flow is sent along paths of positive capacity from the source to the sink.
78  /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
79  /// where m is the number of edges, n is the number of vertices, and v(f) is the
80  /// value of the maximum flow. However, the observed running time on typical
81  /// instances is sub-quadratic, that is, o(n^2).
82  ///
83  /// The input is a set of edges with specified costs and capacities, and a pair
84  /// of nodes (source and sink). The output is the flow along each edge of the
85  /// minimum total cost respecting the given edge capacities.
86  class MinCostMaxFlow {
87  public:
88    MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
89  
90    // Initialize algorithm's data structures for a network of a given size.
91    void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
92      Source = SourceNode;
93      Target = SinkNode;
94  
95      Nodes = std::vector<Node>(NodeCount);
96      Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
97      if (Params.EvenFlowDistribution)
98        AugmentingEdges =
99            std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
100    }
101  
102    // Run the algorithm.
103    int64_t run() {
104      LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
105  
106      // Iteratively find an augmentation path/dag in the network and send the
107      // flow along its edges
108      size_t AugmentationIters = applyFlowAugmentation();
109  
110      // Compute the total flow and its cost
111      int64_t TotalCost = 0;
112      int64_t TotalFlow = 0;
113      for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
114        for (auto &Edge : Edges[Src]) {
115          if (Edge.Flow > 0) {
116            TotalCost += Edge.Cost * Edge.Flow;
117            if (Src == Source)
118              TotalFlow += Edge.Flow;
119          }
120        }
121      }
122      LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
123                        << " iterations with " << TotalFlow << " total flow"
124                        << " of " << TotalCost << " cost\n");
125      (void)TotalFlow;
126      (void)AugmentationIters;
127      return TotalCost;
128    }
129  
130    /// Adding an edge to the network with a specified capacity and a cost.
131    /// Multiple edges between a pair of nodes are allowed but self-edges
132    /// are not supported.
133    void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
134      assert(Capacity > 0 && "adding an edge of zero capacity");
135      assert(Src != Dst && "loop edge are not supported");
136  
137      Edge SrcEdge;
138      SrcEdge.Dst = Dst;
139      SrcEdge.Cost = Cost;
140      SrcEdge.Capacity = Capacity;
141      SrcEdge.Flow = 0;
142      SrcEdge.RevEdgeIndex = Edges[Dst].size();
143  
144      Edge DstEdge;
145      DstEdge.Dst = Src;
146      DstEdge.Cost = -Cost;
147      DstEdge.Capacity = 0;
148      DstEdge.Flow = 0;
149      DstEdge.RevEdgeIndex = Edges[Src].size();
150  
151      Edges[Src].push_back(SrcEdge);
152      Edges[Dst].push_back(DstEdge);
153    }
154  
155    /// Adding an edge to the network of infinite capacity and a given cost.
156    void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
157      addEdge(Src, Dst, INF, Cost);
158    }
159  
160    /// Get the total flow from a given source node.
161    /// Returns a list of pairs (target node, amount of flow to the target).
162    std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
163      std::vector<std::pair<uint64_t, int64_t>> Flow;
164      for (const auto &Edge : Edges[Src]) {
165        if (Edge.Flow > 0)
166          Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
167      }
168      return Flow;
169    }
170  
171    /// Get the total flow between a pair of nodes.
172    int64_t getFlow(uint64_t Src, uint64_t Dst) const {
173      int64_t Flow = 0;
174      for (const auto &Edge : Edges[Src]) {
175        if (Edge.Dst == Dst) {
176          Flow += Edge.Flow;
177        }
178      }
179      return Flow;
180    }
181  
182  private:
183    /// Iteratively find an augmentation path/dag in the network and send the
184    /// flow along its edges. The method returns the number of applied iterations.
185    size_t applyFlowAugmentation() {
186      size_t AugmentationIters = 0;
187      while (findAugmentingPath()) {
188        uint64_t PathCapacity = computeAugmentingPathCapacity();
189        while (PathCapacity > 0) {
190          bool Progress = false;
191          if (Params.EvenFlowDistribution) {
192            // Identify node/edge candidates for augmentation
193            identifyShortestEdges(PathCapacity);
194  
195            // Find an augmenting DAG
196            auto AugmentingOrder = findAugmentingDAG();
197  
198            // Apply the DAG augmentation
199            Progress = augmentFlowAlongDAG(AugmentingOrder);
200            PathCapacity = computeAugmentingPathCapacity();
201          }
202  
203          if (!Progress) {
204            augmentFlowAlongPath(PathCapacity);
205            PathCapacity = 0;
206          }
207  
208          AugmentationIters++;
209        }
210      }
211      return AugmentationIters;
212    }
213  
214    /// Compute the capacity of the cannonical augmenting path. If the path is
215    /// saturated (that is, no flow can be sent along the path), then return 0.
216    uint64_t computeAugmentingPathCapacity() {
217      uint64_t PathCapacity = INF;
218      uint64_t Now = Target;
219      while (Now != Source) {
220        uint64_t Pred = Nodes[Now].ParentNode;
221        auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222  
223        assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
224        uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
225        PathCapacity = std::min(PathCapacity, EdgeCapacity);
226  
227        Now = Pred;
228      }
229      return PathCapacity;
230    }
231  
232    /// Check for existence of an augmenting path with a positive capacity.
233    bool findAugmentingPath() {
234      // Initialize data structures
235      for (auto &Node : Nodes) {
236        Node.Distance = INF;
237        Node.ParentNode = uint64_t(-1);
238        Node.ParentEdgeIndex = uint64_t(-1);
239        Node.Taken = false;
240      }
241  
242      std::queue<uint64_t> Queue;
243      Queue.push(Source);
244      Nodes[Source].Distance = 0;
245      Nodes[Source].Taken = true;
246      while (!Queue.empty()) {
247        uint64_t Src = Queue.front();
248        Queue.pop();
249        Nodes[Src].Taken = false;
250        // Although the residual network contains edges with negative costs
251        // (in particular, backward edges), it can be shown that there are no
252        // negative-weight cycles and the following two invariants are maintained:
253        // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
254        // where Dist is the length of the shortest path between two nodes. This
255        // allows to prune the search-space of the path-finding algorithm using
256        // the following early-stop criteria:
257        // -- If we find a path with zero-distance from Source to Target, stop the
258        //    search, as the path is the shortest since Dist[Source, Target] >= 0;
259        // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
260        //    process node V, as it is guaranteed _not_ to be on a shortest path
261        //    from Source to Target; it follows from inequalities
262        //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
263        //                         >= Dist[Source, V]
264        if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
265          break;
266        if (Nodes[Src].Distance > Nodes[Target].Distance)
267          continue;
268  
269        // Process adjacent edges
270        for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
271          auto &Edge = Edges[Src][EdgeIdx];
272          if (Edge.Flow < Edge.Capacity) {
273            uint64_t Dst = Edge.Dst;
274            int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
275            if (Nodes[Dst].Distance > NewDistance) {
276              // Update the distance and the parent node/edge
277              Nodes[Dst].Distance = NewDistance;
278              Nodes[Dst].ParentNode = Src;
279              Nodes[Dst].ParentEdgeIndex = EdgeIdx;
280              // Add the node to the queue, if it is not there yet
281              if (!Nodes[Dst].Taken) {
282                Queue.push(Dst);
283                Nodes[Dst].Taken = true;
284              }
285            }
286          }
287        }
288      }
289  
290      return Nodes[Target].Distance != INF;
291    }
292  
293    /// Update the current flow along the augmenting path.
294    void augmentFlowAlongPath(uint64_t PathCapacity) {
295      assert(PathCapacity > 0 && "found an incorrect augmenting path");
296      uint64_t Now = Target;
297      while (Now != Source) {
298        uint64_t Pred = Nodes[Now].ParentNode;
299        auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
300        auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
301  
302        Edge.Flow += PathCapacity;
303        RevEdge.Flow -= PathCapacity;
304  
305        Now = Pred;
306      }
307    }
308  
309    /// Find an Augmenting DAG order using a modified version of DFS in which we
310    /// can visit a node multiple times. In the DFS search, when scanning each
311    /// edge out of a node, continue search at Edge.Dst endpoint if it has not
312    /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313    /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
314    /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315    /// that starts with Source and ends with Target.
316    std::vector<uint64_t> findAugmentingDAG() {
317      // We use a stack based implemenation of DFS to avoid recursion.
318      // Defining DFS data structures:
319      // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320      //  - we are currently visiting Nodes[NodeIdx] and
321      //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322      typedef std::pair<uint64_t, uint64_t> StackItemType;
323      std::stack<StackItemType> Stack;
324      std::vector<uint64_t> AugmentingOrder;
325  
326      // Phase 0: Initialize Node attributes and Time for DFS run
327      for (auto &Node : Nodes) {
328        Node.Discovery = 0;
329        Node.Finish = 0;
330        Node.NumCalls = 0;
331        Node.Taken = false;
332      }
333      uint64_t Time = 0;
334      // Mark Target as Taken
335      // Taken attribute will be propagated backwards from Target towards Source
336      Nodes[Target].Taken = true;
337  
338      // Phase 1: Start DFS traversal from Source
339      Stack.emplace(Source, 0);
340      Nodes[Source].Discovery = ++Time;
341      while (!Stack.empty()) {
342        auto NodeIdx = Stack.top().first;
343        auto EdgeIdx = Stack.top().second;
344  
345        // If we haven't scanned all edges out of NodeIdx, continue scanning
346        if (EdgeIdx < Edges[NodeIdx].size()) {
347          auto &Edge = Edges[NodeIdx][EdgeIdx];
348          auto &Dst = Nodes[Edge.Dst];
349          Stack.top().second++;
350  
351          if (Edge.OnShortestPath) {
352            // If we haven't seen Edge.Dst so far, continue DFS search there
353            if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354              Dst.Discovery = ++Time;
355              Stack.emplace(Edge.Dst, 0);
356              Dst.NumCalls++;
357            } else if (Dst.Taken && Dst.Finish != 0) {
358              // Else, if Edge.Dst already have a path to Target, so that NodeIdx
359              Nodes[NodeIdx].Taken = true;
360            }
361          }
362        } else {
363          // If we are done scanning all edge out of NodeIdx
364          Stack.pop();
365          // If we haven't found a path from NodeIdx to Target, forget about it
366          if (!Nodes[NodeIdx].Taken) {
367            Nodes[NodeIdx].Discovery = 0;
368          } else {
369            // If we have found a path from NodeIdx to Target, then finish NodeIdx
370            // and propagate Taken flag to DFS parent unless at the Source
371            Nodes[NodeIdx].Finish = ++Time;
372            // NodeIdx == Source if and only if the stack is empty
373            if (NodeIdx != Source) {
374              assert(!Stack.empty() && "empty stack while running dfs");
375              Nodes[Stack.top().first].Taken = true;
376            }
377            AugmentingOrder.push_back(NodeIdx);
378          }
379        }
380      }
381      // Nodes are collected decreasing Finish time, so the order is reversed
382      std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
383  
384      // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385      for (size_t Src : AugmentingOrder) {
386        AugmentingEdges[Src].clear();
387        for (auto &Edge : Edges[Src]) {
388          uint64_t Dst = Edge.Dst;
389          if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390              Nodes[Dst].Finish < Nodes[Src].Finish) {
391            AugmentingEdges[Src].push_back(&Edge);
392          }
393        }
394        assert((Src == Target || !AugmentingEdges[Src].empty()) &&
395               "incorrectly constructed augmenting edges");
396      }
397  
398      return AugmentingOrder;
399    }
400  
401    /// Update the current flow along the given (acyclic) subgraph specified by
402    /// the vertex order, AugmentingOrder. The objective is to send as much flow
403    /// as possible while evenly distributing flow among successors of each node.
404    /// After the update at least one edge is saturated.
405    bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406      // Phase 0: Initialization
407      for (uint64_t Src : AugmentingOrder) {
408        Nodes[Src].FracFlow = 0;
409        Nodes[Src].IntFlow = 0;
410        for (auto &Edge : AugmentingEdges[Src]) {
411          Edge->AugmentedFlow = 0;
412        }
413      }
414  
415      // Phase 1: Send a unit of fractional flow along the DAG
416      uint64_t MaxFlowAmount = INF;
417      Nodes[Source].FracFlow = 1.0;
418      for (uint64_t Src : AugmentingOrder) {
419        assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
420               "incorrectly computed fractional flow");
421        // Distribute flow evenly among successors of Src
422        uint64_t Degree = AugmentingEdges[Src].size();
423        for (auto &Edge : AugmentingEdges[Src]) {
424          double EdgeFlow = Nodes[Src].FracFlow / Degree;
425          Nodes[Edge->Dst].FracFlow += EdgeFlow;
426          if (Edge->Capacity == INF)
427            continue;
428          uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429          MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
430        }
431      }
432      // Stop early if we cannot send any (integral) flow from Source to Target
433      if (MaxFlowAmount == 0)
434        return false;
435  
436      // Phase 2: Send an integral flow of MaxFlowAmount
437      Nodes[Source].IntFlow = MaxFlowAmount;
438      for (uint64_t Src : AugmentingOrder) {
439        if (Src == Target)
440          break;
441        // Distribute flow evenly among successors of Src, rounding up to make
442        // sure all flow is sent
443        uint64_t Degree = AugmentingEdges[Src].size();
444        // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
445        uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446        for (auto &Edge : AugmentingEdges[Src]) {
447          uint64_t Dst = Edge->Dst;
448          uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449          EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
450          Nodes[Dst].IntFlow += EdgeFlow;
451          Nodes[Src].IntFlow -= EdgeFlow;
452          Edge->AugmentedFlow += EdgeFlow;
453        }
454      }
455      assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456      Nodes[Target].IntFlow = 0;
457  
458      // Phase 3: Send excess flow back traversing the nodes backwards.
459      // Because of rounding, not all flow can be sent along the edges of Src.
460      // Hence, sending the remaining flow back to maintain flow conservation
461      for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
462        uint64_t Src = AugmentingOrder[Idx - 1];
463        // Try to send excess flow back along each edge.
464        // Make sure we only send back flow we just augmented (AugmentedFlow).
465        for (auto &Edge : AugmentingEdges[Src]) {
466          uint64_t Dst = Edge->Dst;
467          if (Nodes[Dst].IntFlow == 0)
468            continue;
469          uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470          Nodes[Dst].IntFlow -= EdgeFlow;
471          Nodes[Src].IntFlow += EdgeFlow;
472          Edge->AugmentedFlow -= EdgeFlow;
473        }
474      }
475  
476      // Phase 4: Update flow values along all edges
477      bool HasSaturatedEdges = false;
478      for (uint64_t Src : AugmentingOrder) {
479        // Verify that we have sent all the excess flow from the node
480        assert(Src == Source || Nodes[Src].IntFlow == 0);
481        for (auto &Edge : AugmentingEdges[Src]) {
482          assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483          // Update flow values along the edge and its reverse copy
484          auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485          Edge->Flow += Edge->AugmentedFlow;
486          RevEdge.Flow -= Edge->AugmentedFlow;
487          if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488            HasSaturatedEdges = true;
489        }
490      }
491  
492      // The augmentation is successful iff at least one edge becomes saturated
493      return HasSaturatedEdges;
494    }
495  
496    /// Identify candidate (shortest) edges for augmentation.
497    void identifyShortestEdges(uint64_t PathCapacity) {
498      assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
499      // To make sure the augmentation DAG contains only edges with large residual
500      // capacity, we prune all edges whose capacity is below a fraction of
501      // the capacity of the augmented path.
502      // (All edges of the path itself are always in the DAG)
503      uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
504  
505      // Decide which edges are on a shortest path from Source to Target
506      for (size_t Src = 0; Src < Nodes.size(); Src++) {
507        // An edge cannot be augmenting if the endpoint has large distance
508        if (Nodes[Src].Distance > Nodes[Target].Distance)
509          continue;
510  
511        for (auto &Edge : Edges[Src]) {
512          uint64_t Dst = Edge.Dst;
513          Edge.OnShortestPath =
514              Src != Target && Dst != Source &&
515              Nodes[Dst].Distance <= Nodes[Target].Distance &&
516              Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517              Edge.Capacity > Edge.Flow &&
518              uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519        }
520      }
521    }
522  
523    /// Maximum number of DFS iterations for DAG finding.
524    static constexpr uint64_t MaxDfsCalls = 10;
525  
526    /// A node in a flow network.
527    struct Node {
528      /// The cost of the cheapest path from the source to the current node.
529      int64_t Distance;
530      /// The node preceding the current one in the path.
531      uint64_t ParentNode;
532      /// The index of the edge between ParentNode and the current node.
533      uint64_t ParentEdgeIndex;
534      /// An indicator of whether the current node is in a queue.
535      bool Taken;
536  
537      /// Data fields utilized in DAG-augmentation:
538      /// Fractional flow.
539      double FracFlow;
540      /// Integral flow.
541      uint64_t IntFlow;
542      /// Discovery time.
543      uint64_t Discovery;
544      /// Finish time.
545      uint64_t Finish;
546      /// NumCalls.
547      uint64_t NumCalls;
548    };
549  
550    /// An edge in a flow network.
551    struct Edge {
552      /// The cost of the edge.
553      int64_t Cost;
554      /// The capacity of the edge.
555      int64_t Capacity;
556      /// The current flow on the edge.
557      int64_t Flow;
558      /// The destination node of the edge.
559      uint64_t Dst;
560      /// The index of the reverse edge between Dst and the current node.
561      uint64_t RevEdgeIndex;
562  
563      /// Data fields utilized in DAG-augmentation:
564      /// Whether the edge is currently on a shortest path from Source to Target.
565      bool OnShortestPath;
566      /// Extra flow along the edge.
567      uint64_t AugmentedFlow;
568    };
569  
570    /// The set of network nodes.
571    std::vector<Node> Nodes;
572    /// The set of network edges.
573    std::vector<std::vector<Edge>> Edges;
574    /// Source node of the flow.
575    uint64_t Source;
576    /// Target (sink) node of the flow.
577    uint64_t Target;
578    /// Augmenting edges.
579    std::vector<std::vector<Edge *>> AugmentingEdges;
580    /// Params for flow computation.
581    const ProfiParams &Params;
582  };
583  
584  /// A post-processing adjustment of the control flow. It applies two steps by
585  /// rerouting some flow and making it more realistic:
586  ///
587  /// - First, it removes all isolated components ("islands") with a positive flow
588  ///   that are unreachable from the entry block. For every such component, we
589  ///   find the shortest from the entry to an exit passing through the component,
590  ///   and increase the flow by one unit along the path.
591  ///
592  /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
593  ///   with no sampled counts. Then it rebalnces the flow that goes through such
594  ///   a subgraph so that each branch is taken with probability 50%.
595  ///   An unknown subgraph is such that for every two nodes u and v:
596  ///     - u dominates v and u is not unknown;
597  ///     - v post-dominates u; and
598  ///     - all inner-nodes of all (u,v)-paths are unknown.
599  ///
600  class FlowAdjuster {
601  public:
602    FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
603        : Params(Params), Func(Func) {}
604  
605    /// Apply the post-processing.
606    void run() {
607      if (Params.JoinIslands) {
608        // Adjust the flow to get rid of isolated components
609        joinIsolatedComponents();
610      }
611  
612      if (Params.RebalanceUnknown) {
613        // Rebalance the flow inside unknown subgraphs
614        rebalanceUnknownSubgraphs();
615      }
616    }
617  
618  private:
619    void joinIsolatedComponents() {
620      // Find blocks that are reachable from the source
621      auto Visited = BitVector(NumBlocks(), false);
622      findReachable(Func.Entry, Visited);
623  
624      // Iterate over all non-reachable blocks and adjust their weights
625      for (uint64_t I = 0; I < NumBlocks(); I++) {
626        auto &Block = Func.Blocks[I];
627        if (Block.Flow > 0 && !Visited[I]) {
628          // Find a path from the entry to an exit passing through the block I
629          auto Path = findShortestPath(I);
630          // Increase the flow along the path
631          assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
632                 "incorrectly computed path adjusting control flow");
633          Func.Blocks[Func.Entry].Flow += 1;
634          for (auto &Jump : Path) {
635            Jump->Flow += 1;
636            Func.Blocks[Jump->Target].Flow += 1;
637            // Update reachability
638            findReachable(Jump->Target, Visited);
639          }
640        }
641      }
642    }
643  
644    /// Run BFS from a given block along the jumps with a positive flow and mark
645    /// all reachable blocks.
646    void findReachable(uint64_t Src, BitVector &Visited) {
647      if (Visited[Src])
648        return;
649      std::queue<uint64_t> Queue;
650      Queue.push(Src);
651      Visited[Src] = true;
652      while (!Queue.empty()) {
653        Src = Queue.front();
654        Queue.pop();
655        for (auto *Jump : Func.Blocks[Src].SuccJumps) {
656          uint64_t Dst = Jump->Target;
657          if (Jump->Flow > 0 && !Visited[Dst]) {
658            Queue.push(Dst);
659            Visited[Dst] = true;
660          }
661        }
662      }
663    }
664  
665    /// Find the shortest path from the entry block to an exit block passing
666    /// through a given block.
667    std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
668      // A path from the entry block to BlockIdx
669      auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
670      // A path from BlockIdx to an exit block
671      auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
672  
673      // Concatenate the two paths
674      std::vector<FlowJump *> Result;
675      Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
676      Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
677      return Result;
678    }
679  
680    /// Apply the Dijkstra algorithm to find the shortest path from a given
681    /// Source to a given Target block.
682    /// If Target == -1, then the path ends at an exit block.
683    std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
684      // Quit early, if possible
685      if (Source == Target)
686        return std::vector<FlowJump *>();
687      if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
688        return std::vector<FlowJump *>();
689  
690      // Initialize data structures
691      auto Distance = std::vector<int64_t>(NumBlocks(), INF);
692      auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
693      Distance[Source] = 0;
694      std::set<std::pair<uint64_t, uint64_t>> Queue;
695      Queue.insert(std::make_pair(Distance[Source], Source));
696  
697      // Run the Dijkstra algorithm
698      while (!Queue.empty()) {
699        uint64_t Src = Queue.begin()->second;
700        Queue.erase(Queue.begin());
701        // If we found a solution, quit early
702        if (Src == Target ||
703            (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
704          break;
705  
706        for (auto *Jump : Func.Blocks[Src].SuccJumps) {
707          uint64_t Dst = Jump->Target;
708          int64_t JumpDist = jumpDistance(Jump);
709          if (Distance[Dst] > Distance[Src] + JumpDist) {
710            Queue.erase(std::make_pair(Distance[Dst], Dst));
711  
712            Distance[Dst] = Distance[Src] + JumpDist;
713            Parent[Dst] = Jump;
714  
715            Queue.insert(std::make_pair(Distance[Dst], Dst));
716          }
717        }
718      }
719      // If Target is not provided, find the closest exit block
720      if (Target == AnyExitBlock) {
721        for (uint64_t I = 0; I < NumBlocks(); I++) {
722          if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
723            if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
724              Target = I;
725            }
726          }
727        }
728      }
729      assert(Parent[Target] != nullptr && "a path does not exist");
730  
731      // Extract the constructed path
732      std::vector<FlowJump *> Result;
733      uint64_t Now = Target;
734      while (Now != Source) {
735        assert(Now == Parent[Now]->Target && "incorrect parent jump");
736        Result.push_back(Parent[Now]);
737        Now = Parent[Now]->Source;
738      }
739      // Reverse the path, since it is extracted from Target to Source
740      std::reverse(Result.begin(), Result.end());
741      return Result;
742    }
743  
744    /// A distance of a path for a given jump.
745    /// In order to incite the path to use blocks/jumps with large positive flow,
746    /// and avoid changing branch probability of outgoing edges drastically,
747    /// set the jump distance so as:
748    ///   - to minimize the number of unlikely jumps used and subject to that,
749    ///   - to minimize the number of Flow == 0 jumps used and subject to that,
750    ///   - minimizes total multiplicative Flow increase for the remaining edges.
751    /// To capture this objective with integer distances, we round off fractional
752    /// parts to a multiple of 1 / BaseDistance.
753    int64_t jumpDistance(FlowJump *Jump) const {
754      if (Jump->IsUnlikely)
755        return Params.CostUnlikely;
756      uint64_t BaseDistance =
757          std::max(FlowAdjuster::MinBaseDistance,
758                   std::min(Func.Blocks[Func.Entry].Flow,
759                            Params.CostUnlikely / (2 * (NumBlocks() + 1))));
760      if (Jump->Flow > 0)
761        return BaseDistance + BaseDistance / Jump->Flow;
762      return 2 * BaseDistance * (NumBlocks() + 1);
763    };
764  
765    uint64_t NumBlocks() const { return Func.Blocks.size(); }
766  
767    /// Rebalance unknown subgraphs so that the flow is split evenly across the
768    /// outgoing branches of every block of the subgraph. The method iterates over
769    /// blocks with known weight and identifies unknown subgraphs rooted at the
770    /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
771    void rebalanceUnknownSubgraphs() {
772      // Try to find unknown subgraphs from each block
773      for (const FlowBlock &SrcBlock : Func.Blocks) {
774        // Verify if rebalancing rooted at SrcBlock is feasible
775        if (!canRebalanceAtRoot(&SrcBlock))
776          continue;
777  
778        // Find an unknown subgraphs starting at SrcBlock. Along the way,
779        // fill in known destinations and intermediate unknown blocks.
780        std::vector<FlowBlock *> UnknownBlocks;
781        std::vector<FlowBlock *> KnownDstBlocks;
782        findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
783  
784        // Verify if rebalancing of the subgraph is feasible. If the search is
785        // successful, find the unique destination block (which can be null)
786        FlowBlock *DstBlock = nullptr;
787        if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
788                                  DstBlock))
789          continue;
790  
791        // We cannot rebalance subgraphs containing cycles among unknown blocks
792        if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
793          continue;
794  
795        // Rebalance the flow
796        rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
797      }
798    }
799  
800    /// Verify if rebalancing rooted at a given block is possible.
801    bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
802      // Do not attempt to find unknown subgraphs from an unknown or a
803      // zero-flow block
804      if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
805        return false;
806  
807      // Do not attempt to process subgraphs from a block w/o unknown sucessors
808      bool HasUnknownSuccs = false;
809      for (auto *Jump : SrcBlock->SuccJumps) {
810        if (Func.Blocks[Jump->Target].HasUnknownWeight) {
811          HasUnknownSuccs = true;
812          break;
813        }
814      }
815      if (!HasUnknownSuccs)
816        return false;
817  
818      return true;
819    }
820  
821    /// Find an unknown subgraph starting at block SrcBlock. The method sets
822    /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
823    void findUnknownSubgraph(const FlowBlock *SrcBlock,
824                             std::vector<FlowBlock *> &KnownDstBlocks,
825                             std::vector<FlowBlock *> &UnknownBlocks) {
826      // Run BFS from SrcBlock and make sure all paths are going through unknown
827      // blocks and end at a known DstBlock
828      auto Visited = BitVector(NumBlocks(), false);
829      std::queue<uint64_t> Queue;
830  
831      Queue.push(SrcBlock->Index);
832      Visited[SrcBlock->Index] = true;
833      while (!Queue.empty()) {
834        auto &Block = Func.Blocks[Queue.front()];
835        Queue.pop();
836        // Process blocks reachable from Block
837        for (auto *Jump : Block.SuccJumps) {
838          // If Jump can be ignored, skip it
839          if (ignoreJump(SrcBlock, nullptr, Jump))
840            continue;
841  
842          uint64_t Dst = Jump->Target;
843          // If Dst has been visited, skip Jump
844          if (Visited[Dst])
845            continue;
846          // Process block Dst
847          Visited[Dst] = true;
848          if (!Func.Blocks[Dst].HasUnknownWeight) {
849            KnownDstBlocks.push_back(&Func.Blocks[Dst]);
850          } else {
851            Queue.push(Dst);
852            UnknownBlocks.push_back(&Func.Blocks[Dst]);
853          }
854        }
855      }
856    }
857  
858    /// Verify if rebalancing of the subgraph is feasible. If the checks are
859    /// successful, set the unique destination block, DstBlock (can be null).
860    bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
861                              const std::vector<FlowBlock *> &KnownDstBlocks,
862                              const std::vector<FlowBlock *> &UnknownBlocks,
863                              FlowBlock *&DstBlock) {
864      // If the list of unknown blocks is empty, we don't need rebalancing
865      if (UnknownBlocks.empty())
866        return false;
867  
868      // If there are multiple known sinks, we can't rebalance
869      if (KnownDstBlocks.size() > 1)
870        return false;
871      DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
872  
873      // Verify sinks of the subgraph
874      for (auto *Block : UnknownBlocks) {
875        if (Block->SuccJumps.empty()) {
876          // If there are multiple (known and unknown) sinks, we can't rebalance
877          if (DstBlock != nullptr)
878            return false;
879          continue;
880        }
881        size_t NumIgnoredJumps = 0;
882        for (auto *Jump : Block->SuccJumps) {
883          if (ignoreJump(SrcBlock, DstBlock, Jump))
884            NumIgnoredJumps++;
885        }
886        // If there is a non-sink block in UnknownBlocks with all jumps ignored,
887        // then we can't rebalance
888        if (NumIgnoredJumps == Block->SuccJumps.size())
889          return false;
890      }
891  
892      return true;
893    }
894  
895    /// Decide whether the Jump is ignored while processing an unknown subgraphs
896    /// rooted at basic block SrcBlock with the destination block, DstBlock.
897    bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
898                    const FlowJump *Jump) {
899      // Ignore unlikely jumps with zero flow
900      if (Jump->IsUnlikely && Jump->Flow == 0)
901        return true;
902  
903      auto JumpSource = &Func.Blocks[Jump->Source];
904      auto JumpTarget = &Func.Blocks[Jump->Target];
905  
906      // Do not ignore jumps coming into DstBlock
907      if (DstBlock != nullptr && JumpTarget == DstBlock)
908        return false;
909  
910      // Ignore jumps out of SrcBlock to known blocks
911      if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
912        return true;
913  
914      // Ignore jumps to known blocks with zero flow
915      if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
916        return true;
917  
918      return false;
919    }
920  
921    /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
922    /// UnknownBlocks in the topological order (so that all jumps are "forward").
923    bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
924                           std::vector<FlowBlock *> &UnknownBlocks) {
925      // Extract local in-degrees in the considered subgraph
926      auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
927      auto fillInDegree = [&](const FlowBlock *Block) {
928        for (auto *Jump : Block->SuccJumps) {
929          if (ignoreJump(SrcBlock, DstBlock, Jump))
930            continue;
931          LocalInDegree[Jump->Target]++;
932        }
933      };
934      fillInDegree(SrcBlock);
935      for (auto *Block : UnknownBlocks) {
936        fillInDegree(Block);
937      }
938      // A loop containing SrcBlock
939      if (LocalInDegree[SrcBlock->Index] > 0)
940        return false;
941  
942      std::vector<FlowBlock *> AcyclicOrder;
943      std::queue<uint64_t> Queue;
944      Queue.push(SrcBlock->Index);
945      while (!Queue.empty()) {
946        FlowBlock *Block = &Func.Blocks[Queue.front()];
947        Queue.pop();
948        // Stop propagation once we reach DstBlock, if any
949        if (DstBlock != nullptr && Block == DstBlock)
950          break;
951  
952        // Keep an acyclic order of unknown blocks
953        if (Block->HasUnknownWeight && Block != SrcBlock)
954          AcyclicOrder.push_back(Block);
955  
956        // Add to the queue all successors with zero local in-degree
957        for (auto *Jump : Block->SuccJumps) {
958          if (ignoreJump(SrcBlock, DstBlock, Jump))
959            continue;
960          uint64_t Dst = Jump->Target;
961          LocalInDegree[Dst]--;
962          if (LocalInDegree[Dst] == 0) {
963            Queue.push(Dst);
964          }
965        }
966      }
967  
968      // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
969      // of all blocks
970      if (UnknownBlocks.size() != AcyclicOrder.size())
971        return false;
972      UnknownBlocks = AcyclicOrder;
973      return true;
974    }
975  
976    /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
977    /// having UnknownBlocks intermediate blocks.
978    void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
979                                  const FlowBlock *DstBlock,
980                                  const std::vector<FlowBlock *> &UnknownBlocks) {
981      assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
982  
983      // Ditribute flow from the source block
984      uint64_t BlockFlow = 0;
985      // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986      for (auto *Jump : SrcBlock->SuccJumps) {
987        if (ignoreJump(SrcBlock, DstBlock, Jump))
988          continue;
989        BlockFlow += Jump->Flow;
990      }
991      rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
992  
993      // Ditribute flow from the remaining blocks
994      for (auto *Block : UnknownBlocks) {
995        assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
996        uint64_t BlockFlow = 0;
997        // Block's flow is the sum of incoming flows
998        for (auto *Jump : Block->PredJumps) {
999          BlockFlow += Jump->Flow;
1000        }
1001        Block->Flow = BlockFlow;
1002        rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
1003      }
1004    }
1005  
1006    /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1007    /// and ending at DstBlock.
1008    void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1009                        const FlowBlock *Block, uint64_t BlockFlow) {
1010      // Process all successor jumps and update corresponding flow values
1011      size_t BlockDegree = 0;
1012      for (auto *Jump : Block->SuccJumps) {
1013        if (ignoreJump(SrcBlock, DstBlock, Jump))
1014          continue;
1015        BlockDegree++;
1016      }
1017      // If all successor jumps of the block are ignored, skip it
1018      if (DstBlock == nullptr && BlockDegree == 0)
1019        return;
1020      assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1021  
1022      // Each of the Block's successors gets the following amount of flow.
1023      // Rounding the value up so that all flow is propagated
1024      uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025      for (auto *Jump : Block->SuccJumps) {
1026        if (ignoreJump(SrcBlock, DstBlock, Jump))
1027          continue;
1028        uint64_t Flow = std::min(SuccFlow, BlockFlow);
1029        Jump->Flow = Flow;
1030        BlockFlow -= Flow;
1031      }
1032      assert(BlockFlow == 0 && "not all flow is propagated");
1033    }
1034  
1035    /// A constant indicating an arbitrary exit block of a function.
1036    static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1037    /// Minimum BaseDistance for the jump distance values in island joining.
1038    static constexpr uint64_t MinBaseDistance = 10000;
1039  
1040    /// Params for flow computation.
1041    const ProfiParams &Params;
1042    /// The function.
1043    FlowFunction &Func;
1044  };
1045  
1046  std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1047                                               const FlowBlock &Block);
1048  std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1049                                              const FlowJump &Jump);
1050  
1051  /// Initializing flow network for a given function.
1052  ///
1053  /// Every block is split into two nodes that are responsible for (i) an
1054  /// incoming flow, (ii) an outgoing flow; they penalize an increase or a
1055  /// reduction of the block weight.
1056  void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
1057                         FlowFunction &Func) {
1058    uint64_t NumBlocks = Func.Blocks.size();
1059    assert(NumBlocks > 1 && "Too few blocks in a function");
1060    uint64_t NumJumps = Func.Jumps.size();
1061    assert(NumJumps > 0 && "Too few jumps in a function");
1062  
1063    // Introducing dummy source/sink pairs to allow flow circulation.
1064    // The nodes corresponding to blocks of the function have indicies in
1065    // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
1066    // next four values.
1067    uint64_t S = 2 * NumBlocks;
1068    uint64_t T = S + 1;
1069    uint64_t S1 = S + 2;
1070    uint64_t T1 = S + 3;
1071  
1072    Network.initialize(2 * NumBlocks + 4, S1, T1);
1073  
1074    // Initialize nodes of the flow network
1075    for (uint64_t B = 0; B < NumBlocks; B++) {
1076      auto &Block = Func.Blocks[B];
1077  
1078      // Split every block into two auxiliary nodes to allow
1079      // increase/reduction of the block count.
1080      uint64_t Bin = 2 * B;
1081      uint64_t Bout = 2 * B + 1;
1082  
1083      // Edges from S and to T
1084      if (Block.isEntry()) {
1085        Network.addEdge(S, Bin, 0);
1086      } else if (Block.isExit()) {
1087        Network.addEdge(Bout, T, 0);
1088      }
1089  
1090      // Assign costs for increasing/decreasing the block counts
1091      auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
1092  
1093      // Add the corresponding edges to the network
1094      Network.addEdge(Bin, Bout, AuxCostInc);
1095      if (Block.Weight > 0) {
1096        Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
1097        Network.addEdge(S1, Bout, Block.Weight, 0);
1098        Network.addEdge(Bin, T1, Block.Weight, 0);
1099      }
1100    }
1101  
1102    // Initialize edges of the flow network
1103    for (uint64_t J = 0; J < NumJumps; J++) {
1104      auto &Jump = Func.Jumps[J];
1105  
1106      // Get the endpoints corresponding to the jump
1107      uint64_t Jin = 2 * Jump.Source + 1;
1108      uint64_t Jout = 2 * Jump.Target;
1109  
1110      // Assign costs for increasing/decreasing the jump counts
1111      auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
1112  
1113      // Add the corresponding edges to the network
1114      Network.addEdge(Jin, Jout, AuxCostInc);
1115      if (Jump.Weight > 0) {
1116        Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
1117        Network.addEdge(S1, Jout, Jump.Weight, 0);
1118        Network.addEdge(Jin, T1, Jump.Weight, 0);
1119      }
1120    }
1121  
1122    // Make sure we have a valid flow circulation
1123    Network.addEdge(T, S, 0);
1124  }
1125  
1126  /// Assign costs for increasing/decreasing the block counts.
1127  std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
1128                                               const FlowBlock &Block) {
1129    // Modifying the weight of an unlikely block is expensive
1130    if (Block.IsUnlikely)
1131      return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1132  
1133    // Assign default values for the costs
1134    int64_t CostInc = Params.CostBlockInc;
1135    int64_t CostDec = Params.CostBlockDec;
1136    // Update the costs depending on the block metadata
1137    if (Block.HasUnknownWeight) {
1138      CostInc = Params.CostBlockUnknownInc;
1139      CostDec = 0;
1140    } else {
1141      // Increasing the count for "cold" blocks with zero initial count is more
1142      // expensive than for "hot" ones
1143      if (Block.Weight == 0)
1144        CostInc = Params.CostBlockZeroInc;
1145      // Modifying the count of the entry block is expensive
1146      if (Block.isEntry()) {
1147        CostInc = Params.CostBlockEntryInc;
1148        CostDec = Params.CostBlockEntryDec;
1149      }
1150    }
1151    return std::make_pair(CostInc, CostDec);
1152  }
1153  
1154  /// Assign costs for increasing/decreasing the jump counts.
1155  std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
1156                                              const FlowJump &Jump) {
1157    // Modifying the weight of an unlikely jump is expensive
1158    if (Jump.IsUnlikely)
1159      return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
1160  
1161    // Assign default values for the costs
1162    int64_t CostInc = Params.CostJumpInc;
1163    int64_t CostDec = Params.CostJumpDec;
1164    // Update the costs depending on the block metadata
1165    if (Jump.Source + 1 == Jump.Target) {
1166      // Adjusting the fall-through branch
1167      CostInc = Params.CostJumpFTInc;
1168      CostDec = Params.CostJumpFTDec;
1169    }
1170    if (Jump.HasUnknownWeight) {
1171      // The cost is different for fall-through and non-fall-through branches
1172      if (Jump.Source + 1 == Jump.Target)
1173        CostInc = Params.CostJumpUnknownFTInc;
1174      else
1175        CostInc = Params.CostJumpUnknownInc;
1176      CostDec = 0;
1177    } else {
1178      assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
1179    }
1180    return std::make_pair(CostInc, CostDec);
1181  }
1182  
1183  /// Extract resulting block and edge counts from the flow network.
1184  void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
1185                      FlowFunction &Func) {
1186    uint64_t NumBlocks = Func.Blocks.size();
1187    uint64_t NumJumps = Func.Jumps.size();
1188  
1189    // Extract resulting jump counts
1190    for (uint64_t J = 0; J < NumJumps; J++) {
1191      auto &Jump = Func.Jumps[J];
1192      uint64_t SrcOut = 2 * Jump.Source + 1;
1193      uint64_t DstIn = 2 * Jump.Target;
1194  
1195      int64_t Flow = 0;
1196      int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
1197      if (Jump.Source != Jump.Target)
1198        Flow = int64_t(Jump.Weight) + AuxFlow;
1199      else
1200        Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
1201  
1202      Jump.Flow = Flow;
1203      assert(Flow >= 0 && "negative jump flow");
1204    }
1205  
1206    // Extract resulting block counts
1207    auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1208    auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1209    for (auto &Jump : Func.Jumps) {
1210      InFlow[Jump.Target] += Jump.Flow;
1211      OutFlow[Jump.Source] += Jump.Flow;
1212    }
1213    for (uint64_t B = 0; B < NumBlocks; B++) {
1214      auto &Block = Func.Blocks[B];
1215      Block.Flow = std::max(OutFlow[B], InFlow[B]);
1216    }
1217  }
1218  
1219  #ifndef NDEBUG
1220  /// Verify that the provided block/jump weights are as expected.
1221  void verifyInput(const FlowFunction &Func) {
1222    // Verify entry and exit blocks
1223    assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
1224    size_t NumExitBlocks = 0;
1225    for (size_t I = 1; I < Func.Blocks.size(); I++) {
1226      assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
1227      if (Func.Blocks[I].isExit())
1228        NumExitBlocks++;
1229    }
1230    assert(NumExitBlocks > 0 && "cannot find exit blocks");
1231  
1232    // Verify that there are no parallel edges
1233    for (auto &Block : Func.Blocks) {
1234      std::unordered_set<uint64_t> UniqueSuccs;
1235      for (auto &Jump : Block.SuccJumps) {
1236        auto It = UniqueSuccs.insert(Jump->Target);
1237        assert(It.second && "input CFG contains parallel edges");
1238      }
1239    }
1240    // Verify CFG jumps
1241    for (auto &Block : Func.Blocks) {
1242      assert((!Block.isEntry() || !Block.isExit()) &&
1243             "a block cannot be an entry and an exit");
1244    }
1245    // Verify input block weights
1246    for (auto &Block : Func.Blocks) {
1247      assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1248             "non-zero weight of a block w/o weight except for an entry");
1249    }
1250    // Verify input jump weights
1251    for (auto &Jump : Func.Jumps) {
1252      assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
1253             "non-zero weight of a jump w/o weight");
1254    }
1255  }
1256  
1257  /// Verify that the computed flow values satisfy flow conservation rules.
1258  void verifyOutput(const FlowFunction &Func) {
1259    const uint64_t NumBlocks = Func.Blocks.size();
1260    auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1261    auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1262    for (const auto &Jump : Func.Jumps) {
1263      InFlow[Jump.Target] += Jump.Flow;
1264      OutFlow[Jump.Source] += Jump.Flow;
1265    }
1266  
1267    uint64_t TotalInFlow = 0;
1268    uint64_t TotalOutFlow = 0;
1269    for (uint64_t I = 0; I < NumBlocks; I++) {
1270      auto &Block = Func.Blocks[I];
1271      if (Block.isEntry()) {
1272        TotalInFlow += Block.Flow;
1273        assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1274      } else if (Block.isExit()) {
1275        TotalOutFlow += Block.Flow;
1276        assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1277      } else {
1278        assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1279        assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1280      }
1281    }
1282    assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1283  
1284    // Verify that there are no isolated flow components
1285    // One could modify FlowFunction to hold edges indexed by the sources, which
1286    // will avoid a creation of the object
1287    auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1288    for (const auto &Jump : Func.Jumps) {
1289      if (Jump.Flow > 0) {
1290        PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1291      }
1292    }
1293  
1294    // Run BFS from the source along edges with positive flow
1295    std::queue<uint64_t> Queue;
1296    auto Visited = BitVector(NumBlocks, false);
1297    Queue.push(Func.Entry);
1298    Visited[Func.Entry] = true;
1299    while (!Queue.empty()) {
1300      uint64_t Src = Queue.front();
1301      Queue.pop();
1302      for (uint64_t Dst : PositiveFlowEdges[Src]) {
1303        if (!Visited[Dst]) {
1304          Queue.push(Dst);
1305          Visited[Dst] = true;
1306        }
1307      }
1308    }
1309  
1310    // Verify that every block that has a positive flow is reached from the source
1311    // along edges with a positive flow
1312    for (uint64_t I = 0; I < NumBlocks; I++) {
1313      auto &Block = Func.Blocks[I];
1314      assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1315    }
1316  }
1317  #endif
1318  
1319  } // end of anonymous namespace
1320  
1321  /// Apply the profile inference algorithm for a given function and provided
1322  /// profi options
1323  void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
1324    // Check if the function has samples and assign initial flow values
1325    bool HasSamples = false;
1326    for (FlowBlock &Block : Func.Blocks) {
1327      if (Block.Weight > 0)
1328        HasSamples = true;
1329      Block.Flow = Block.Weight;
1330    }
1331    for (FlowJump &Jump : Func.Jumps) {
1332      if (Jump.Weight > 0)
1333        HasSamples = true;
1334      Jump.Flow = Jump.Weight;
1335    }
1336  
1337    // Quit early for functions with a single block or ones w/o samples
1338    if (Func.Blocks.size() <= 1 || !HasSamples)
1339      return;
1340  
1341  #ifndef NDEBUG
1342    // Verify the input data
1343    verifyInput(Func);
1344  #endif
1345  
1346    // Create and apply an inference network model
1347    auto InferenceNetwork = MinCostMaxFlow(Params);
1348    initializeNetwork(Params, InferenceNetwork, Func);
1349    InferenceNetwork.run();
1350  
1351    // Extract flow values for every block and every edge
1352    extractWeights(Params, InferenceNetwork, Func);
1353  
1354    // Post-processing adjustments to the flow
1355    auto Adjuster = FlowAdjuster(Params, Func);
1356    Adjuster.run();
1357  
1358  #ifndef NDEBUG
1359    // Verify the result
1360    verifyOutput(Func);
1361  #endif
1362  }
1363  
1364  /// Apply the profile inference algorithm for a given flow function
1365  void llvm::applyFlowInference(FlowFunction &Func) {
1366    ProfiParams Params;
1367    // Set the params from the command-line flags.
1368    Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
1369    Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
1370    Params.JoinIslands = SampleProfileJoinIslands;
1371    Params.CostBlockInc = SampleProfileProfiCostBlockInc;
1372    Params.CostBlockDec = SampleProfileProfiCostBlockDec;
1373    Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
1374    Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
1375    Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
1376    Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
1377  
1378    applyFlowInference(Params, Func);
1379  }
1380