xref: /freebsd/contrib/llvm-project/llvm/lib/Support/BalancedPartitioning.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1 //===- BalancedPartitioning.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements BalancedPartitioning, a recursive balanced graph
10 // partitioning algorithm.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Support/BalancedPartitioning.h"
15 #include "llvm/Support/Debug.h"
16 #include "llvm/Support/Format.h"
17 #include "llvm/Support/FormatVariadic.h"
18 #include "llvm/Support/ThreadPool.h"
19 
20 using namespace llvm;
21 #define DEBUG_TYPE "balanced-partitioning"
22 
23 void BPFunctionNode::dump(raw_ostream &OS) const {
24   OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
25                 make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket);
26 }
27 
28 template <typename Func>
29 void BalancedPartitioning::BPThreadPool::async(Func &&F) {
30 #if LLVM_ENABLE_THREADS
31   // This new thread could spawn more threads, so mark it as active
32   ++NumActiveThreads;
33   TheThreadPool.async([=]() {
34     // Run the task
35     F();
36 
37     // This thread will no longer spawn new threads, so mark it as inactive
38     if (--NumActiveThreads == 0) {
39       // There are no more active threads, so mark as finished and notify
40       {
41         std::unique_lock<std::mutex> lock(mtx);
42         assert(!IsFinishedSpawning);
43         IsFinishedSpawning = true;
44       }
45       cv.notify_one();
46     }
47   });
48 #else
49   llvm_unreachable("threads are disabled");
50 #endif
51 }
52 
53 void BalancedPartitioning::BPThreadPool::wait() {
54 #if LLVM_ENABLE_THREADS
55   // TODO: We could remove the mutex and condition variable and use
56   // std::atomic::wait() instead, but that isn't available until C++20
57   {
58     std::unique_lock<std::mutex> lock(mtx);
59     cv.wait(lock, [&]() { return IsFinishedSpawning; });
60     assert(IsFinishedSpawning && NumActiveThreads == 0);
61   }
62   // Now we can call ThreadPool::wait() since all tasks have been submitted
63   TheThreadPool.wait();
64 #else
65   llvm_unreachable("threads are disabled");
66 #endif
67 }
68 
69 BalancedPartitioning::BalancedPartitioning(
70     const BalancedPartitioningConfig &Config)
71     : Config(Config) {
72   // Pre-computing log2 values
73   Log2Cache[0] = 0.0;
74   for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
75     Log2Cache[I] = std::log2(I);
76 }
77 
78 void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
79   LLVM_DEBUG(
80       dbgs() << format(
81           "Partitioning %d nodes using depth %d and %d iterations per split\n",
82           Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
83   std::optional<BPThreadPool> TP;
84 #if LLVM_ENABLE_THREADS
85   ThreadPool TheThreadPool;
86   if (Config.TaskSplitDepth > 1)
87     TP.emplace(TheThreadPool);
88 #endif
89 
90   // Record the input order
91   for (unsigned I = 0; I < Nodes.size(); I++)
92     Nodes[I].InputOrderIndex = I;
93 
94   auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
95   auto BisectTask = [=, &TP]() {
96     bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
97   };
98   if (TP) {
99     TP->async(std::move(BisectTask));
100     TP->wait();
101   } else {
102     BisectTask();
103   }
104 
105   llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
106     return L.Bucket < R.Bucket;
107   });
108 
109   LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
110 }
111 
112 void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
113                                   unsigned RecDepth, unsigned RootBucket,
114                                   unsigned Offset,
115                                   std::optional<BPThreadPool> &TP) const {
116   unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
117   if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
118     // We've reach the lowest level of the recursion tree. Fall back to the
119     // original order and assign to buckets.
120     llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
121       return L.InputOrderIndex < R.InputOrderIndex;
122     });
123     for (auto &N : Nodes)
124       N.Bucket = Offset++;
125     return;
126   }
127 
128   LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
129                               NumNodes, RootBucket));
130 
131   std::mt19937 RNG(RootBucket);
132 
133   unsigned LeftBucket = 2 * RootBucket;
134   unsigned RightBucket = 2 * RootBucket + 1;
135 
136   // Split into two and assign to the left and right buckets
137   split(Nodes, LeftBucket);
138 
139   runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG);
140 
141   // Split nodes wrt the resulting buckets
142   auto NodesMid =
143       llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
144   unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
145 
146   auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
147   auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
148 
149   auto LeftRecTask = [=, &TP]() {
150     bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
151   };
152   auto RightRecTask = [=, &TP]() {
153     bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
154   };
155 
156   if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
157     TP->async(std::move(LeftRecTask));
158     TP->async(std::move(RightRecTask));
159   } else {
160     LeftRecTask();
161     RightRecTask();
162   }
163 }
164 
165 void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
166                                          unsigned RecDepth, unsigned LeftBucket,
167                                          unsigned RightBucket,
168                                          std::mt19937 &RNG) const {
169   unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
170   DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
171   for (auto &N : Nodes)
172     for (auto &UN : N.UtilityNodes)
173       ++UtilityNodeDegree[UN];
174   // Remove utility nodes if they have just one edge or are connected to all
175   // functions
176   for (auto &N : Nodes)
177     llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
178       return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
179     });
180 
181   // Renumber utility nodes so they can be used to index into Signatures
182   DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
183   for (auto &N : Nodes)
184     for (auto &UN : N.UtilityNodes)
185       if (!UtilityNodeIndex.count(UN))
186         UtilityNodeIndex[UN] = UtilityNodeIndex.size();
187   for (auto &N : Nodes)
188     for (auto &UN : N.UtilityNodes)
189       UN = UtilityNodeIndex[UN];
190 
191   // Initialize signatures
192   SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
193   for (auto &N : Nodes) {
194     for (auto &UN : N.UtilityNodes) {
195       assert(UN < Signatures.size());
196       if (N.Bucket == LeftBucket) {
197         Signatures[UN].LeftCount++;
198       } else {
199         Signatures[UN].RightCount++;
200       }
201     }
202   }
203 
204   for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
205     unsigned NumMovedNodes =
206         runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
207     if (NumMovedNodes == 0)
208       break;
209   }
210 }
211 
212 unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
213                                             unsigned LeftBucket,
214                                             unsigned RightBucket,
215                                             SignaturesT &Signatures,
216                                             std::mt19937 &RNG) const {
217   // Init signature cost caches
218   for (auto &Signature : Signatures) {
219     if (Signature.CachedGainIsValid)
220       continue;
221     unsigned L = Signature.LeftCount;
222     unsigned R = Signature.RightCount;
223     assert((L > 0 || R > 0) && "incorrect signature");
224     float Cost = logCost(L, R);
225     Signature.CachedGainLR = 0.f;
226     Signature.CachedGainRL = 0.f;
227     if (L > 0)
228       Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
229     if (R > 0)
230       Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
231     Signature.CachedGainIsValid = true;
232   }
233 
234   // Compute move gains
235   typedef std::pair<float, BPFunctionNode *> GainPair;
236   std::vector<GainPair> Gains;
237   for (auto &N : Nodes) {
238     bool FromLeftToRight = (N.Bucket == LeftBucket);
239     float Gain = moveGain(N, FromLeftToRight, Signatures);
240     Gains.push_back(std::make_pair(Gain, &N));
241   }
242 
243   // Collect left and right gains
244   auto LeftEnd = llvm::partition(
245       Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
246   auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
247   auto RightRange = llvm::make_range(LeftEnd, Gains.end());
248 
249   // Sort gains in descending order
250   auto LargerGain = [](const auto &L, const auto &R) {
251     return L.first > R.first;
252   };
253   llvm::stable_sort(LeftRange, LargerGain);
254   llvm::stable_sort(RightRange, LargerGain);
255 
256   unsigned NumMovedDataVertices = 0;
257   for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
258     auto &[LeftGain, LeftNode] = LeftPair;
259     auto &[RightGain, RightNode] = RightPair;
260     // Stop when the gain is no longer beneficial
261     if (LeftGain + RightGain <= 0.f)
262       break;
263     // Try to exchange the nodes between buckets
264     if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
265       ++NumMovedDataVertices;
266     if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
267       ++NumMovedDataVertices;
268   }
269   return NumMovedDataVertices;
270 }
271 
272 bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
273                                             unsigned LeftBucket,
274                                             unsigned RightBucket,
275                                             SignaturesT &Signatures,
276                                             std::mt19937 &RNG) const {
277   // Sometimes we skip the move. This helps to escape local optima
278   if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
279       Config.SkipProbability)
280     return false;
281 
282   bool FromLeftToRight = (N.Bucket == LeftBucket);
283   // Update the current bucket
284   N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
285 
286   // Update signatures and invalidate gain cache
287   if (FromLeftToRight) {
288     for (auto &UN : N.UtilityNodes) {
289       auto &Signature = Signatures[UN];
290       Signature.LeftCount--;
291       Signature.RightCount++;
292       Signature.CachedGainIsValid = false;
293     }
294   } else {
295     for (auto &UN : N.UtilityNodes) {
296       auto &Signature = Signatures[UN];
297       Signature.LeftCount++;
298       Signature.RightCount--;
299       Signature.CachedGainIsValid = false;
300     }
301   }
302   return true;
303 }
304 
305 void BalancedPartitioning::split(const FunctionNodeRange Nodes,
306                                  unsigned StartBucket) const {
307   unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
308   auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
309 
310   std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
311     return L.InputOrderIndex < R.InputOrderIndex;
312   });
313 
314   for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
315     N.Bucket = StartBucket;
316   for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
317     N.Bucket = StartBucket + 1;
318 }
319 
320 float BalancedPartitioning::moveGain(const BPFunctionNode &N,
321                                      bool FromLeftToRight,
322                                      const SignaturesT &Signatures) {
323   float Gain = 0.f;
324   for (auto &UN : N.UtilityNodes)
325     Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
326                              : Signatures[UN].CachedGainRL);
327   return Gain;
328 }
329 
330 float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
331   return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
332 }
333 
334 float BalancedPartitioning::log2Cached(unsigned i) const {
335   return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
336 }
337