1 //===- BalancedPartitioning.cpp -------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements BalancedPartitioning, a recursive balanced graph 10 // partitioning algorithm. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Support/BalancedPartitioning.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/Support/Debug.h" 17 #include "llvm/Support/Format.h" 18 #include "llvm/Support/FormatVariadic.h" 19 #include "llvm/Support/ThreadPool.h" 20 21 using namespace llvm; 22 #define DEBUG_TYPE "balanced-partitioning" 23 24 void BPFunctionNode::dump(raw_ostream &OS) const { 25 OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id, 26 make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket); 27 } 28 29 template <typename Func> 30 void BalancedPartitioning::BPThreadPool::async(Func &&F) { 31 #if LLVM_ENABLE_THREADS 32 // This new thread could spawn more threads, so mark it as active 33 ++NumActiveThreads; 34 TheThreadPool.async([=]() { 35 // Run the task 36 F(); 37 38 // This thread will no longer spawn new threads, so mark it as inactive 39 if (--NumActiveThreads == 0) { 40 // There are no more active threads, so mark as finished and notify 41 { 42 std::unique_lock<std::mutex> lock(mtx); 43 assert(!IsFinishedSpawning); 44 IsFinishedSpawning = true; 45 } 46 cv.notify_one(); 47 } 48 }); 49 #else 50 llvm_unreachable("threads are disabled"); 51 #endif 52 } 53 54 void BalancedPartitioning::BPThreadPool::wait() { 55 #if LLVM_ENABLE_THREADS 56 // TODO: We could remove the mutex and condition variable and use 57 // std::atomic::wait() instead, but that isn't available until C++20 58 { 59 std::unique_lock<std::mutex> lock(mtx); 60 cv.wait(lock, [&]() { return IsFinishedSpawning; }); 61 assert(IsFinishedSpawning && NumActiveThreads == 0); 62 } 63 // Now we can call ThreadPool::wait() since all tasks have been submitted 64 TheThreadPool.wait(); 65 #else 66 llvm_unreachable("threads are disabled"); 67 #endif 68 } 69 70 BalancedPartitioning::BalancedPartitioning( 71 const BalancedPartitioningConfig &Config) 72 : Config(Config) { 73 // Pre-computing log2 values 74 Log2Cache[0] = 0.0; 75 for (unsigned I = 1; I < LOG_CACHE_SIZE; I++) 76 Log2Cache[I] = std::log2(I); 77 } 78 79 void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const { 80 LLVM_DEBUG( 81 dbgs() << format( 82 "Partitioning %d nodes using depth %d and %d iterations per split\n", 83 Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit)); 84 std::optional<BPThreadPool> TP; 85 #if LLVM_ENABLE_THREADS 86 ThreadPool TheThreadPool; 87 if (Config.TaskSplitDepth > 1) 88 TP.emplace(TheThreadPool); 89 #endif 90 91 // Record the input order 92 for (unsigned I = 0; I < Nodes.size(); I++) 93 Nodes[I].InputOrderIndex = I; 94 95 auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end()); 96 auto BisectTask = [=, &TP]() { 97 bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP); 98 }; 99 if (TP) { 100 TP->async(std::move(BisectTask)); 101 TP->wait(); 102 } else { 103 BisectTask(); 104 } 105 106 llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) { 107 return L.Bucket < R.Bucket; 108 }); 109 110 LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n"); 111 } 112 113 void BalancedPartitioning::bisect(const FunctionNodeRange Nodes, 114 unsigned RecDepth, unsigned RootBucket, 115 unsigned Offset, 116 std::optional<BPThreadPool> &TP) const { 117 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 118 if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) { 119 // We've reach the lowest level of the recursion tree. Fall back to the 120 // original order and assign to buckets. 121 llvm::stable_sort(Nodes, [](const auto &L, const auto &R) { 122 return L.InputOrderIndex < R.InputOrderIndex; 123 }); 124 for (auto &N : Nodes) 125 N.Bucket = Offset++; 126 return; 127 } 128 129 LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n", 130 NumNodes, RootBucket)); 131 132 std::mt19937 RNG(RootBucket); 133 134 unsigned LeftBucket = 2 * RootBucket; 135 unsigned RightBucket = 2 * RootBucket + 1; 136 137 // Split into two and assign to the left and right buckets 138 split(Nodes, LeftBucket); 139 140 runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG); 141 142 // Split nodes wrt the resulting buckets 143 auto NodesMid = 144 llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; }); 145 unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid); 146 147 auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid); 148 auto RightNodes = llvm::make_range(NodesMid, Nodes.end()); 149 150 auto LeftRecTask = [=, &TP]() { 151 bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP); 152 }; 153 auto RightRecTask = [=, &TP]() { 154 bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP); 155 }; 156 157 if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) { 158 TP->async(std::move(LeftRecTask)); 159 TP->async(std::move(RightRecTask)); 160 } else { 161 LeftRecTask(); 162 RightRecTask(); 163 } 164 } 165 166 void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes, 167 unsigned RecDepth, unsigned LeftBucket, 168 unsigned RightBucket, 169 std::mt19937 &RNG) const { 170 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 171 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree; 172 for (auto &N : Nodes) 173 for (auto &UN : N.UtilityNodes) 174 ++UtilityNodeDegree[UN]; 175 // Remove utility nodes if they have just one edge or are connected to all 176 // functions 177 for (auto &N : Nodes) 178 llvm::erase_if(N.UtilityNodes, [&](auto &UN) { 179 return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes; 180 }); 181 182 // Renumber utility nodes so they can be used to index into Signatures 183 DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex; 184 for (auto &N : Nodes) 185 for (auto &UN : N.UtilityNodes) 186 if (!UtilityNodeIndex.count(UN)) 187 UtilityNodeIndex[UN] = UtilityNodeIndex.size(); 188 for (auto &N : Nodes) 189 for (auto &UN : N.UtilityNodes) 190 UN = UtilityNodeIndex[UN]; 191 192 // Initialize signatures 193 SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size()); 194 for (auto &N : Nodes) { 195 for (auto &UN : N.UtilityNodes) { 196 assert(UN < Signatures.size()); 197 if (N.Bucket == LeftBucket) { 198 Signatures[UN].LeftCount++; 199 } else { 200 Signatures[UN].RightCount++; 201 } 202 } 203 } 204 205 for (unsigned I = 0; I < Config.IterationsPerSplit; I++) { 206 unsigned NumMovedNodes = 207 runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG); 208 if (NumMovedNodes == 0) 209 break; 210 } 211 } 212 213 unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes, 214 unsigned LeftBucket, 215 unsigned RightBucket, 216 SignaturesT &Signatures, 217 std::mt19937 &RNG) const { 218 // Init signature cost caches 219 for (auto &Signature : Signatures) { 220 if (Signature.CachedGainIsValid) 221 continue; 222 unsigned L = Signature.LeftCount; 223 unsigned R = Signature.RightCount; 224 assert((L > 0 || R > 0) && "incorrect signature"); 225 float Cost = logCost(L, R); 226 Signature.CachedGainLR = 0.f; 227 Signature.CachedGainRL = 0.f; 228 if (L > 0) 229 Signature.CachedGainLR = Cost - logCost(L - 1, R + 1); 230 if (R > 0) 231 Signature.CachedGainRL = Cost - logCost(L + 1, R - 1); 232 Signature.CachedGainIsValid = true; 233 } 234 235 // Compute move gains 236 typedef std::pair<float, BPFunctionNode *> GainPair; 237 std::vector<GainPair> Gains; 238 for (auto &N : Nodes) { 239 bool FromLeftToRight = (N.Bucket == LeftBucket); 240 float Gain = moveGain(N, FromLeftToRight, Signatures); 241 Gains.push_back(std::make_pair(Gain, &N)); 242 } 243 244 // Collect left and right gains 245 auto LeftEnd = llvm::partition( 246 Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; }); 247 auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd); 248 auto RightRange = llvm::make_range(LeftEnd, Gains.end()); 249 250 // Sort gains in descending order 251 auto LargerGain = [](const auto &L, const auto &R) { 252 return L.first > R.first; 253 }; 254 llvm::stable_sort(LeftRange, LargerGain); 255 llvm::stable_sort(RightRange, LargerGain); 256 257 unsigned NumMovedDataVertices = 0; 258 for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) { 259 auto &[LeftGain, LeftNode] = LeftPair; 260 auto &[RightGain, RightNode] = RightPair; 261 // Stop when the gain is no longer beneficial 262 if (LeftGain + RightGain <= 0.f) 263 break; 264 // Try to exchange the nodes between buckets 265 if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG)) 266 ++NumMovedDataVertices; 267 if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG)) 268 ++NumMovedDataVertices; 269 } 270 return NumMovedDataVertices; 271 } 272 273 bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N, 274 unsigned LeftBucket, 275 unsigned RightBucket, 276 SignaturesT &Signatures, 277 std::mt19937 &RNG) const { 278 // Sometimes we skip the move. This helps to escape local optima 279 if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <= 280 Config.SkipProbability) 281 return false; 282 283 bool FromLeftToRight = (N.Bucket == LeftBucket); 284 // Update the current bucket 285 N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket); 286 287 // Update signatures and invalidate gain cache 288 if (FromLeftToRight) { 289 for (auto &UN : N.UtilityNodes) { 290 auto &Signature = Signatures[UN]; 291 Signature.LeftCount--; 292 Signature.RightCount++; 293 Signature.CachedGainIsValid = false; 294 } 295 } else { 296 for (auto &UN : N.UtilityNodes) { 297 auto &Signature = Signatures[UN]; 298 Signature.LeftCount++; 299 Signature.RightCount--; 300 Signature.CachedGainIsValid = false; 301 } 302 } 303 return true; 304 } 305 306 void BalancedPartitioning::split(const FunctionNodeRange Nodes, 307 unsigned StartBucket) const { 308 unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end()); 309 auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2; 310 311 std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) { 312 return L.InputOrderIndex < R.InputOrderIndex; 313 }); 314 315 for (auto &N : llvm::make_range(Nodes.begin(), NodesMid)) 316 N.Bucket = StartBucket; 317 for (auto &N : llvm::make_range(NodesMid, Nodes.end())) 318 N.Bucket = StartBucket + 1; 319 } 320 321 float BalancedPartitioning::moveGain(const BPFunctionNode &N, 322 bool FromLeftToRight, 323 const SignaturesT &Signatures) { 324 float Gain = 0.f; 325 for (auto &UN : N.UtilityNodes) 326 Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR 327 : Signatures[UN].CachedGainRL); 328 return Gain; 329 } 330 331 float BalancedPartitioning::logCost(unsigned X, unsigned Y) const { 332 return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1)); 333 } 334 335 float BalancedPartitioning::log2Cached(unsigned i) const { 336 return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i); 337 } 338