1 //===- SectionPriorities.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details 10 /// about the algorithm. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "SectionPriorities.h" 15 #include "BPSectionOrderer.h" 16 #include "Config.h" 17 #include "InputFiles.h" 18 #include "Symbols.h" 19 #include "Target.h" 20 21 #include "lld/Common/Args.h" 22 #include "lld/Common/CommonLinkerContext.h" 23 #include "lld/Common/ErrorHandler.h" 24 #include "lld/Common/Utils.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/MapVector.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/TimeProfiler.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 #include <numeric> 32 33 using namespace llvm; 34 using namespace llvm::MachO; 35 using namespace llvm::sys; 36 using namespace lld; 37 using namespace lld::macho; 38 39 PriorityBuilder macho::priorityBuilder; 40 41 namespace { 42 struct Edge { 43 int from; 44 uint64_t weight; 45 }; 46 47 struct Cluster { 48 Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} 49 50 double getDensity() const { 51 if (size == 0) 52 return 0; 53 return double(weight) / double(size); 54 } 55 56 int next; 57 int prev; 58 uint64_t size; 59 uint64_t weight = 0; 60 uint64_t initialWeight = 0; 61 Edge bestPred = {-1, 0}; 62 }; 63 64 class CallGraphSort { 65 public: 66 CallGraphSort(const MapVector<SectionPair, uint64_t> &profile); 67 68 DenseMap<const InputSection *, int> run(); 69 70 private: 71 std::vector<Cluster> clusters; 72 std::vector<const InputSection *> sections; 73 }; 74 // Maximum amount the combined cluster density can be worse than the original 75 // cluster to consider merging. 76 constexpr int MAX_DENSITY_DEGRADATION = 8; 77 } // end anonymous namespace 78 79 // Take the edge list in callGraphProfile, resolve symbol names to Symbols, and 80 // generate a graph between InputSections with the provided weights. 81 CallGraphSort::CallGraphSort(const MapVector<SectionPair, uint64_t> &profile) { 82 DenseMap<const InputSection *, int> secToCluster; 83 84 auto getOrCreateCluster = [&](const InputSection *isec) -> int { 85 auto res = secToCluster.try_emplace(isec, clusters.size()); 86 if (res.second) { 87 sections.push_back(isec); 88 clusters.emplace_back(clusters.size(), isec->getSize()); 89 } 90 return res.first->second; 91 }; 92 93 // Create the graph 94 for (const std::pair<SectionPair, uint64_t> &c : profile) { 95 const auto fromSec = c.first.first->canonical(); 96 const auto toSec = c.first.second->canonical(); 97 uint64_t weight = c.second; 98 // Ignore edges between input sections belonging to different output 99 // sections. This is done because otherwise we would end up with clusters 100 // containing input sections that can't actually be placed adjacently in the 101 // output. This messes with the cluster size and density calculations. We 102 // would also end up moving input sections in other output sections without 103 // moving them closer to what calls them. 104 if (fromSec->parent != toSec->parent) 105 continue; 106 107 int from = getOrCreateCluster(fromSec); 108 int to = getOrCreateCluster(toSec); 109 110 clusters[to].weight += weight; 111 112 if (from == to) 113 continue; 114 115 // Remember the best edge. 116 Cluster &toC = clusters[to]; 117 if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { 118 toC.bestPred.from = from; 119 toC.bestPred.weight = weight; 120 } 121 } 122 for (Cluster &c : clusters) 123 c.initialWeight = c.weight; 124 } 125 126 // It's bad to merge clusters which would degrade the density too much. 127 static bool isNewDensityBad(Cluster &a, Cluster &b) { 128 double newDensity = double(a.weight + b.weight) / double(a.size + b.size); 129 return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; 130 } 131 132 // Find the leader of V's belonged cluster (represented as an equivalence 133 // class). We apply union-find path-halving technique (simple to implement) in 134 // the meantime as it decreases depths and the time complexity. 135 static int getLeader(std::vector<int> &leaders, int v) { 136 while (leaders[v] != v) { 137 leaders[v] = leaders[leaders[v]]; 138 v = leaders[v]; 139 } 140 return v; 141 } 142 143 static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, 144 Cluster &from, int fromIdx) { 145 int tail1 = into.prev, tail2 = from.prev; 146 into.prev = tail2; 147 cs[tail2].next = intoIdx; 148 from.prev = tail1; 149 cs[tail1].next = fromIdx; 150 into.size += from.size; 151 into.weight += from.weight; 152 from.size = 0; 153 from.weight = 0; 154 } 155 156 // Group InputSections into clusters using the Call-Chain Clustering heuristic 157 // then sort the clusters by density. 158 DenseMap<const InputSection *, int> CallGraphSort::run() { 159 const uint64_t maxClusterSize = target->getPageSize(); 160 161 // Cluster indices sorted by density. 162 std::vector<int> sorted(clusters.size()); 163 // For union-find. 164 std::vector<int> leaders(clusters.size()); 165 166 std::iota(leaders.begin(), leaders.end(), 0); 167 std::iota(sorted.begin(), sorted.end(), 0); 168 169 llvm::stable_sort(sorted, [&](int a, int b) { 170 return clusters[a].getDensity() > clusters[b].getDensity(); 171 }); 172 173 for (int l : sorted) { 174 // The cluster index is the same as the index of its leader here because 175 // clusters[L] has not been merged into another cluster yet. 176 Cluster &c = clusters[l]; 177 178 // Don't consider merging if the edge is unlikely. 179 if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) 180 continue; 181 182 int predL = getLeader(leaders, c.bestPred.from); 183 // Already in the same cluster. 184 if (l == predL) 185 continue; 186 187 Cluster *predC = &clusters[predL]; 188 if (c.size + predC->size > maxClusterSize) 189 continue; 190 191 if (isNewDensityBad(*predC, c)) 192 continue; 193 194 leaders[l] = predL; 195 mergeClusters(clusters, *predC, predL, c, l); 196 } 197 // Sort remaining non-empty clusters by density. 198 sorted.clear(); 199 for (int i = 0, e = (int)clusters.size(); i != e; ++i) 200 if (clusters[i].size > 0) 201 sorted.push_back(i); 202 llvm::stable_sort(sorted, [&](int a, int b) { 203 return clusters[a].getDensity() > clusters[b].getDensity(); 204 }); 205 206 DenseMap<const InputSection *, int> orderMap; 207 208 // Sections will be sorted by decreasing order. Absent sections will have 209 // priority 0 and be placed at the end of sections. 210 int curOrder = -clusters.size(); 211 for (int leader : sorted) { 212 for (int i = leader;;) { 213 orderMap[sections[i]] = curOrder++; 214 i = clusters[i].next; 215 if (i == leader) 216 break; 217 } 218 } 219 if (!config->printSymbolOrder.empty()) { 220 std::error_code ec; 221 raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); 222 if (ec) { 223 error("cannot open " + config->printSymbolOrder + ": " + ec.message()); 224 return orderMap; 225 } 226 // Print the symbols ordered by C3, in the order of decreasing curOrder 227 // Instead of sorting all the orderMap, just repeat the loops above. 228 for (int leader : sorted) 229 for (int i = leader;;) { 230 const InputSection *isec = sections[i]; 231 // Search all the symbols in the file of the section 232 // and find out a Defined symbol with name that is within the 233 // section. 234 for (Symbol *sym : isec->getFile()->symbols) { 235 if (auto *d = dyn_cast_or_null<Defined>(sym)) { 236 if (d->isec() == isec) 237 os << sym->getName() << "\n"; 238 } 239 } 240 i = clusters[i].next; 241 if (i == leader) 242 break; 243 } 244 } 245 246 return orderMap; 247 } 248 249 std::optional<int> 250 macho::PriorityBuilder::getSymbolOrCStringPriority(const StringRef key, 251 InputFile *f) { 252 253 auto it = priorities.find(key); 254 if (it == priorities.end()) 255 return std::nullopt; 256 const SymbolPriorityEntry &entry = it->second; 257 if (!f) 258 return entry.anyObjectFile; 259 // We don't use toString(InputFile *) here because it returns the full path 260 // for object files, and we only want the basename. 261 StringRef filename; 262 if (f->archiveName.empty()) 263 filename = path::filename(f->getName()); 264 else 265 filename = saver().save(path::filename(f->archiveName) + "(" + 266 path::filename(f->getName()) + ")"); 267 return std::min(entry.objectFiles.lookup(filename), entry.anyObjectFile); 268 } 269 270 std::optional<int> 271 macho::PriorityBuilder::getSymbolPriority(const Defined *sym) { 272 if (sym->isAbsolute()) 273 return std::nullopt; 274 return getSymbolOrCStringPriority(utils::getRootSymbol(sym->getName()), 275 sym->isec()->getFile()); 276 } 277 278 void macho::PriorityBuilder::extractCallGraphProfile() { 279 TimeTraceScope timeScope("Extract call graph profile"); 280 bool hasOrderFile = !priorities.empty(); 281 for (const InputFile *file : inputFiles) { 282 auto *obj = dyn_cast_or_null<ObjFile>(file); 283 if (!obj) 284 continue; 285 for (const CallGraphEntry &entry : obj->callGraph) { 286 assert(entry.fromIndex < obj->symbols.size() && 287 entry.toIndex < obj->symbols.size()); 288 auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]); 289 auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]); 290 if (fromSym && toSym && 291 (!hasOrderFile || 292 (!getSymbolPriority(fromSym) && !getSymbolPriority(toSym)))) 293 callGraphProfile[{fromSym->isec(), toSym->isec()}] += entry.count; 294 } 295 } 296 } 297 298 void macho::PriorityBuilder::parseOrderFile(StringRef path) { 299 assert(callGraphProfile.empty() && 300 "Order file must be parsed before call graph profile is processed"); 301 std::optional<MemoryBufferRef> buffer = readFile(path); 302 if (!buffer) { 303 error("Could not read order file at " + path); 304 return; 305 } 306 307 int prio = std::numeric_limits<int>::min(); 308 MemoryBufferRef mbref = *buffer; 309 for (StringRef line : args::getLines(mbref)) { 310 StringRef objectFile, symbolOrCStrHash; 311 line = line.take_until([](char c) { return c == '#'; }); // ignore comments 312 line = line.ltrim(); 313 314 CPUType cpuType = StringSwitch<CPUType>(line) 315 .StartsWith("i386:", CPU_TYPE_I386) 316 .StartsWith("x86_64:", CPU_TYPE_X86_64) 317 .StartsWith("arm:", CPU_TYPE_ARM) 318 .StartsWith("arm64:", CPU_TYPE_ARM64) 319 .StartsWith("ppc:", CPU_TYPE_POWERPC) 320 .StartsWith("ppc64:", CPU_TYPE_POWERPC64) 321 .Default(CPU_TYPE_ANY); 322 323 if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) 324 continue; 325 // Drop the CPU type as well as the colon 326 if (cpuType != CPU_TYPE_ANY) 327 line = line.drop_until([](char c) { return c == ':'; }).drop_front(); 328 329 constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"}; 330 for (StringRef fileEnd : fileEnds) { 331 size_t pos = line.find(fileEnd); 332 if (pos != StringRef::npos) { 333 // Split the string around the colon 334 objectFile = line.take_front(pos + fileEnd.size() - 1); 335 line = line.drop_front(pos + fileEnd.size()); 336 break; 337 } 338 } 339 340 // The rest of the line is either <symbol name> or 341 // CStringEntryPrefix<cstring hash> 342 line = line.trim(); 343 if (line.starts_with(CStringEntryPrefix)) { 344 StringRef possibleHash = line.drop_front(CStringEntryPrefix.size()); 345 uint32_t hash = 0; 346 if (to_integer(possibleHash, hash)) 347 symbolOrCStrHash = possibleHash; 348 } else 349 symbolOrCStrHash = utils::getRootSymbol(line); 350 351 if (!symbolOrCStrHash.empty()) { 352 SymbolPriorityEntry &entry = priorities[symbolOrCStrHash]; 353 if (!objectFile.empty()) 354 entry.objectFiles.insert(std::make_pair(objectFile, prio)); 355 else 356 entry.anyObjectFile = std::min(entry.anyObjectFile, prio); 357 } 358 359 ++prio; 360 } 361 } 362 363 DenseMap<const InputSection *, int> 364 macho::PriorityBuilder::buildInputSectionPriorities() { 365 DenseMap<const InputSection *, int> sectionPriorities; 366 if (config->bpStartupFunctionSort || config->bpFunctionOrderForCompression || 367 config->bpDataOrderForCompression) { 368 TimeTraceScope timeScope("Balanced Partitioning Section Orderer"); 369 sectionPriorities = runBalancedPartitioning( 370 config->bpStartupFunctionSort ? config->irpgoProfilePath : "", 371 config->bpFunctionOrderForCompression, 372 config->bpDataOrderForCompression, 373 config->bpCompressionSortStartupFunctions, 374 config->bpVerboseSectionOrderer); 375 } else if (config->callGraphProfileSort) { 376 // Sort sections by the profile data provided by __LLVM,__cg_profile 377 // sections. 378 // 379 // This first builds a call graph based on the profile data then merges 380 // sections according to the C³ heuristic. All clusters are then sorted by a 381 // density metric to further improve locality. 382 TimeTraceScope timeScope("Call graph profile sort"); 383 sectionPriorities = CallGraphSort(callGraphProfile).run(); 384 } 385 386 if (priorities.empty()) 387 return sectionPriorities; 388 389 auto addSym = [&](const Defined *sym) { 390 std::optional<int> symbolPriority = getSymbolPriority(sym); 391 if (!symbolPriority) 392 return; 393 int &priority = sectionPriorities[sym->isec()]; 394 priority = std::min(priority, *symbolPriority); 395 }; 396 397 // TODO: Make sure this handles weak symbols correctly. 398 for (const InputFile *file : inputFiles) { 399 if (isa<ObjFile>(file)) 400 for (Symbol *sym : file->symbols) 401 if (auto *d = dyn_cast_or_null<Defined>(sym)) 402 addSym(d); 403 } 404 405 return sectionPriorities; 406 } 407 408 std::vector<StringPiecePair> macho::PriorityBuilder::buildCStringPriorities( 409 ArrayRef<CStringInputSection *> inputs) { 410 // Split the input strings into hold and cold sets. 411 // Order hot set based on -order_file_cstring for performance improvement; 412 // TODO: Order cold set of cstrings for compression via BP. 413 std::vector<std::pair<int, StringPiecePair>> 414 hotStringPrioritiesAndStringPieces; 415 std::vector<StringPiecePair> coldStringPieces; 416 std::vector<StringPiecePair> orderedStringPieces; 417 418 for (CStringInputSection *isec : inputs) { 419 for (const auto &[stringPieceIdx, piece] : llvm::enumerate(isec->pieces)) { 420 if (!piece.live) 421 continue; 422 423 std::optional<int> priority = getSymbolOrCStringPriority( 424 std::to_string(piece.hash), isec->getFile()); 425 if (!priority) 426 coldStringPieces.emplace_back(isec, stringPieceIdx); 427 else 428 hotStringPrioritiesAndStringPieces.emplace_back( 429 *priority, std::make_pair(isec, stringPieceIdx)); 430 } 431 } 432 433 // Order hot set for perf 434 llvm::stable_sort(hotStringPrioritiesAndStringPieces); 435 for (auto &[priority, stringPiecePair] : hotStringPrioritiesAndStringPieces) 436 orderedStringPieces.push_back(stringPiecePair); 437 438 // TODO: Order cold set for compression 439 440 orderedStringPieces.insert(orderedStringPieces.end(), 441 coldStringPieces.begin(), coldStringPieces.end()); 442 443 return orderedStringPieces; 444 } 445