1 //===- BalancedPartitioning.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BalancedPartitioning, a recursive balanced graph 10 // partitioning algorithm. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/BalancedPartitioning.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Support/Format.h" 17 #include "llvm/Support/FormatVariadic.h" 18 #include "llvm/Support/ThreadPool.h" 19 20 using namespace llvm; 21 #define DEBUG_TYPE "balanced-partitioning" 22 23 void BPFunctionNode::dump(raw_ostream &OS) const { 24 OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id, 25 make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket); 26 } 27 28 template <typename Func> 29 void BalancedPartitioning::BPThreadPool::async(Func &&F) { 30 #if LLVM_ENABLE_THREADS 31 // This new thread could spawn more threads, so mark it as active 32 ++NumActiveThreads; 33 TheThreadPool.async([=]() { 34 // Run the task 35 F(); 36 37 // This thread will no longer spawn new threads, so mark it as inactive 38 if (--NumActiveThreads == 0) { 39 // There are no more active threads, so mark as finished and notify 40 { 41 std::unique_lock<std::mutex> lock(mtx); 42 assert(!IsFinishedSpawning); 43 IsFinishedSpawning = true; 44 } 45 cv.notify_one(); 46 } 47 }); 48 #else 49 llvm_unreachable("threads are disabled"); 50 #endif 51 } 52 53 void BalancedPartitioning::BPThreadPool::wait() { 54 #if LLVM_ENABLE_THREADS 55 // TODO: We could remove the mutex and condition variable and use 56 // std::atomic::wait() instead, but that isn't available until C++20 57 { 58 std::unique_lock<std::mutex> lock(mtx); 59 cv.wait(lock, [&]() { return IsFinishedSpawning; }); 60 assert(IsFinishedSpawning && NumActiveThreads == 0); 61 } 62 // Now we can call ThreadPool::wait() since all tasks have been submitted 63 TheThreadPool.wait(); 64 #else 65 llvm_unreachable("threads are disabled"); 66 #endif 67 } 68 69 BalancedPartitioning::BalancedPartitioning( 70 const BalancedPartitioningConfig &Config) 71 : Config(Config) { 72 // Pre-computing log2 values 73 Log2Cache[0] = 0.0; 74 for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) 75 Log2Cache[I] = std::log2(I); 76 } 77 78 void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { 79 LLVM_DEBUG( 80 dbgs() << format( 81 "Partitioning %d nodes using depth %d and %d iterations per split\n", 82 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); 83 std::optional<BPThreadPool> TP; 84 #if LLVM_ENABLE_THREADS 85 ThreadPool TheThreadPool; 86 if (Config.TaskSplitDepth > 1) 87 TP.emplace(TheThreadPool); 88 #endif 89 90 // Record the input order 91 for (unsigned I = 0; I < Nodes.size(); I++) 92 Nodes[I].InputOrderIndex = I; 93 94 auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end()); 95 auto BisectTask = [=, &TP]() { 96 bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); 97 }; 98 if (TP) { 99 TP->async(std::move(BisectTask)); 100 TP->wait(); 101 } else { 102 BisectTask(); 103 } 104 105 llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) { 106 return L.Bucket < R.Bucket; 107 }); 108 109 LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n"); 110 } 111 112 void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, 113 unsigned RecDepth, unsigned RootBucket, 114 unsigned Offset, 115 std::optional<BPThreadPool> &TP) const { 116 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 117 if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { 118 // We've reach the lowest level of the recursion tree. Fall back to the 119 // original order and assign to buckets. 120 llvm::stable_sort(Nodes, [](const auto &L, const auto &R) { 121 return L.InputOrderIndex < R.InputOrderIndex; 122 }); 123 for (auto &N : Nodes) 124 N.Bucket = Offset++; 125 return; 126 } 127 128 LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n", 129 NumNodes, RootBucket)); 130 131 std::mt19937 RNG(RootBucket); 132 133 unsigned LeftBucket = 2 * RootBucket; 134 unsigned RightBucket = 2 * RootBucket + 1; 135 136 // Split into two and assign to the left and right buckets 137 split(Nodes, LeftBucket); 138 139 runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG); 140 141 // Split nodes wrt the resulting buckets 142 auto NodesMid = 143 llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; }); 144 unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid); 145 146 auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid); 147 auto RightNodes = llvm::make_range(NodesMid, Nodes.end()); 148 149 auto LeftRecTask = [=, &TP]() { 150 bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP); 151 }; 152 auto RightRecTask = [=, &TP]() { 153 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP); 154 }; 155 156 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { 157 TP->async(std::move(LeftRecTask)); 158 TP->async(std::move(RightRecTask)); 159 } else { 160 LeftRecTask(); 161 RightRecTask(); 162 } 163 } 164 165 void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, 166 unsigned RecDepth, unsigned LeftBucket, 167 unsigned RightBucket, 168 std::mt19937 &RNG) const { 169 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 170 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree; 171 for (auto &N : Nodes) 172 for (auto &UN : N.UtilityNodes) 173 ++UtilityNodeDegree[UN]; 174 // Remove utility nodes if they have just one edge or are connected to all 175 // functions 176 for (auto &N : Nodes) 177 llvm::erase_if(N.UtilityNodes, [&](auto &UN) { 178 return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes; 179 }); 180 181 // Renumber utility nodes so they can be used to index into Signatures 182 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; 183 for (auto &N : Nodes) 184 for (auto &UN : N.UtilityNodes) 185 if (!UtilityNodeIndex.count(UN)) 186 UtilityNodeIndex[UN] = UtilityNodeIndex.size(); 187 for (auto &N : Nodes) 188 for (auto &UN : N.UtilityNodes) 189 UN = UtilityNodeIndex[UN]; 190 191 // Initialize signatures 192 SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); 193 for (auto &N : Nodes) { 194 for (auto &UN : N.UtilityNodes) { 195 assert(UN < Signatures.size()); 196 if (N.Bucket == LeftBucket) { 197 Signatures[UN].LeftCount++; 198 } else { 199 Signatures[UN].RightCount++; 200 } 201 } 202 } 203 204 for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { 205 unsigned NumMovedNodes = 206 runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); 207 if (NumMovedNodes == 0) 208 break; 209 } 210 } 211 212 unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, 213 unsigned LeftBucket, 214 unsigned RightBucket, 215 SignaturesT &Signatures, 216 std::mt19937 &RNG) const { 217 // Init signature cost caches 218 for (auto &Signature : Signatures) { 219 if (Signature.CachedGainIsValid) 220 continue; 221 unsigned L = Signature.LeftCount; 222 unsigned R = Signature.RightCount; 223 assert((L > 0 || R > 0) && "incorrect signature"); 224 float Cost = logCost(L, R); 225 Signature.CachedGainLR = 0.f; 226 Signature.CachedGainRL = 0.f; 227 if (L > 0) 228 Signature.CachedGainLR = Cost - logCost(L - 1, R + 1); 229 if (R > 0) 230 Signature.CachedGainRL = Cost - logCost(L + 1, R - 1); 231 Signature.CachedGainIsValid = true; 232 } 233 234 // Compute move gains 235 typedef std::pair<float, BPFunctionNode *> GainPair; 236 std::vector<GainPair> Gains; 237 for (auto &N : Nodes) { 238 bool FromLeftToRight = (N.Bucket == LeftBucket); 239 float Gain = moveGain(N, FromLeftToRight, Signatures); 240 Gains.push_back(std::make_pair(Gain, &N)); 241 } 242 243 // Collect left and right gains 244 auto LeftEnd = llvm::partition( 245 Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); 246 auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd); 247 auto RightRange = llvm::make_range(LeftEnd, Gains.end()); 248 249 // Sort gains in descending order 250 auto LargerGain = [](const auto &L, const auto &R) { 251 return L.first > R.first; 252 }; 253 llvm::stable_sort(LeftRange, LargerGain); 254 llvm::stable_sort(RightRange, LargerGain); 255 256 unsigned NumMovedDataVertices = 0; 257 for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) { 258 auto &[LeftGain, LeftNode] = LeftPair; 259 auto &[RightGain, RightNode] = RightPair; 260 // Stop when the gain is no longer beneficial 261 if (LeftGain + RightGain <= 0.f) 262 break; 263 // Try to exchange the nodes between buckets 264 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG)) 265 ++NumMovedDataVertices; 266 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG)) 267 ++NumMovedDataVertices; 268 } 269 return NumMovedDataVertices; 270 } 271 272 bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, 273 unsigned LeftBucket, 274 unsigned RightBucket, 275 SignaturesT &Signatures, 276 std::mt19937 &RNG) const { 277 // Sometimes we skip the move. This helps to escape local optima 278 if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= 279 Config.SkipProbability) 280 return false; 281 282 bool FromLeftToRight = (N.Bucket == LeftBucket); 283 // Update the current bucket 284 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); 285 286 // Update signatures and invalidate gain cache 287 if (FromLeftToRight) { 288 for (auto &UN : N.UtilityNodes) { 289 auto &Signature = Signatures[UN]; 290 Signature.LeftCount--; 291 Signature.RightCount++; 292 Signature.CachedGainIsValid = false; 293 } 294 } else { 295 for (auto &UN : N.UtilityNodes) { 296 auto &Signature = Signatures[UN]; 297 Signature.LeftCount++; 298 Signature.RightCount--; 299 Signature.CachedGainIsValid = false; 300 } 301 } 302 return true; 303 } 304 305 void BalancedPartitioning::split(const FunctionNodeRange Nodes, 306 unsigned StartBucket) const { 307 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 308 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; 309 310 std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) { 311 return L.InputOrderIndex < R.InputOrderIndex; 312 }); 313 314 for (auto &N : llvm::make_range(Nodes.begin(), NodesMid)) 315 N.Bucket = StartBucket; 316 for (auto &N : llvm::make_range(NodesMid, Nodes.end())) 317 N.Bucket = StartBucket + 1; 318 } 319 320 float BalancedPartitioning::moveGain(const BPFunctionNode &N, 321 bool FromLeftToRight, 322 const SignaturesT &Signatures) { 323 float Gain = 0.f; 324 for (auto &UN : N.UtilityNodes) 325 Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR 326 : Signatures[UN].CachedGainRL); 327 return Gain; 328 } 329 330 float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { 331 return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1)); 332 } 333 334 float BalancedPartitioning::log2Cached(unsigned i) const { 335 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i); 336 } 337