1 //===- SectionPriorities.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details 10 /// about the algorithm. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "SectionPriorities.h" 15 #include "Config.h" 16 #include "InputFiles.h" 17 #include "Symbols.h" 18 #include "Target.h" 19 #include "lld/Common/Args.h" 20 #include "lld/Common/CommonLinkerContext.h" 21 #include "lld/Common/ErrorHandler.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/TimeProfiler.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <numeric> 29 30 using namespace llvm; 31 using namespace llvm::MachO; 32 using namespace llvm::sys; 33 using namespace lld; 34 using namespace lld::macho; 35 36 namespace { 37 struct Edge { 38 int from; 39 uint64_t weight; 40 }; 41 42 struct Cluster { 43 Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {} 44 45 double getDensity() const { 46 if (size == 0) 47 return 0; 48 return double(weight) / double(size); 49 } 50 51 int next; 52 int prev; 53 uint64_t size; 54 uint64_t weight = 0; 55 uint64_t initialWeight = 0; 56 Edge bestPred = {-1, 0}; 57 }; 58 59 class CallGraphSort { 60 public: 61 CallGraphSort(); 62 63 DenseMap<const InputSection *, size_t> run(); 64 65 private: 66 std::vector<Cluster> clusters; 67 std::vector<const InputSection *> sections; 68 }; 69 // Maximum amount the combined cluster density can be worse than the original 70 // cluster to consider merging. 71 constexpr int MAX_DENSITY_DEGRADATION = 8; 72 } // end anonymous namespace 73 74 using SectionPair = std::pair<const InputSection *, const InputSection *>; 75 76 // Take the edge list in config->callGraphProfile, resolve symbol names to 77 // Symbols, and generate a graph between InputSections with the provided 78 // weights. 79 CallGraphSort::CallGraphSort() { 80 MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile; 81 DenseMap<const InputSection *, int> secToCluster; 82 83 auto getOrCreateCluster = [&](const InputSection *isec) -> int { 84 auto res = secToCluster.try_emplace(isec, clusters.size()); 85 if (res.second) { 86 sections.push_back(isec); 87 clusters.emplace_back(clusters.size(), isec->getSize()); 88 } 89 return res.first->second; 90 }; 91 92 // Create the graph 93 for (std::pair<SectionPair, uint64_t> &c : profile) { 94 const auto fromSec = c.first.first->canonical(); 95 const auto toSec = c.first.second->canonical(); 96 uint64_t weight = c.second; 97 // Ignore edges between input sections belonging to different output 98 // sections. This is done because otherwise we would end up with clusters 99 // containing input sections that can't actually be placed adjacently in the 100 // output. This messes with the cluster size and density calculations. We 101 // would also end up moving input sections in other output sections without 102 // moving them closer to what calls them. 103 if (fromSec->parent != toSec->parent) 104 continue; 105 106 int from = getOrCreateCluster(fromSec); 107 int to = getOrCreateCluster(toSec); 108 109 clusters[to].weight += weight; 110 111 if (from == to) 112 continue; 113 114 // Remember the best edge. 115 Cluster &toC = clusters[to]; 116 if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) { 117 toC.bestPred.from = from; 118 toC.bestPred.weight = weight; 119 } 120 } 121 for (Cluster &c : clusters) 122 c.initialWeight = c.weight; 123 } 124 125 // It's bad to merge clusters which would degrade the density too much. 126 static bool isNewDensityBad(Cluster &a, Cluster &b) { 127 double newDensity = double(a.weight + b.weight) / double(a.size + b.size); 128 return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION; 129 } 130 131 // Find the leader of V's belonged cluster (represented as an equivalence 132 // class). We apply union-find path-halving technique (simple to implement) in 133 // the meantime as it decreases depths and the time complexity. 134 static int getLeader(std::vector<int> &leaders, int v) { 135 while (leaders[v] != v) { 136 leaders[v] = leaders[leaders[v]]; 137 v = leaders[v]; 138 } 139 return v; 140 } 141 142 static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx, 143 Cluster &from, int fromIdx) { 144 int tail1 = into.prev, tail2 = from.prev; 145 into.prev = tail2; 146 cs[tail2].next = intoIdx; 147 from.prev = tail1; 148 cs[tail1].next = fromIdx; 149 into.size += from.size; 150 into.weight += from.weight; 151 from.size = 0; 152 from.weight = 0; 153 } 154 155 // Group InputSections into clusters using the Call-Chain Clustering heuristic 156 // then sort the clusters by density. 157 DenseMap<const InputSection *, size_t> CallGraphSort::run() { 158 const uint64_t maxClusterSize = target->getPageSize(); 159 160 // Cluster indices sorted by density. 161 std::vector<int> sorted(clusters.size()); 162 // For union-find. 163 std::vector<int> leaders(clusters.size()); 164 165 std::iota(leaders.begin(), leaders.end(), 0); 166 std::iota(sorted.begin(), sorted.end(), 0); 167 168 llvm::stable_sort(sorted, [&](int a, int b) { 169 return clusters[a].getDensity() > clusters[b].getDensity(); 170 }); 171 172 for (int l : sorted) { 173 // The cluster index is the same as the index of its leader here because 174 // clusters[L] has not been merged into another cluster yet. 175 Cluster &c = clusters[l]; 176 177 // Don't consider merging if the edge is unlikely. 178 if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight) 179 continue; 180 181 int predL = getLeader(leaders, c.bestPred.from); 182 // Already in the same cluster. 183 if (l == predL) 184 continue; 185 186 Cluster *predC = &clusters[predL]; 187 if (c.size + predC->size > maxClusterSize) 188 continue; 189 190 if (isNewDensityBad(*predC, c)) 191 continue; 192 193 leaders[l] = predL; 194 mergeClusters(clusters, *predC, predL, c, l); 195 } 196 // Sort remaining non-empty clusters by density. 197 sorted.clear(); 198 for (int i = 0, e = (int)clusters.size(); i != e; ++i) 199 if (clusters[i].size > 0) 200 sorted.push_back(i); 201 llvm::stable_sort(sorted, [&](int a, int b) { 202 return clusters[a].getDensity() > clusters[b].getDensity(); 203 }); 204 205 DenseMap<const InputSection *, size_t> orderMap; 206 207 // Sections will be sorted by decreasing order. Absent sections will have 208 // priority 0 and be placed at the end of sections. 209 // NB: This is opposite from COFF/ELF to be compatible with the existing 210 // order-file code. 211 int curOrder = clusters.size(); 212 for (int leader : sorted) { 213 for (int i = leader;;) { 214 orderMap[sections[i]] = curOrder--; 215 i = clusters[i].next; 216 if (i == leader) 217 break; 218 } 219 } 220 if (!config->printSymbolOrder.empty()) { 221 std::error_code ec; 222 raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None); 223 if (ec) { 224 error("cannot open " + config->printSymbolOrder + ": " + ec.message()); 225 return orderMap; 226 } 227 // Print the symbols ordered by C3, in the order of decreasing curOrder 228 // Instead of sorting all the orderMap, just repeat the loops above. 229 for (int leader : sorted) 230 for (int i = leader;;) { 231 const InputSection *isec = sections[i]; 232 // Search all the symbols in the file of the section 233 // and find out a Defined symbol with name that is within the 234 // section. 235 for (Symbol *sym : isec->getFile()->symbols) { 236 if (auto *d = dyn_cast_or_null<Defined>(sym)) { 237 if (d->isec == isec) 238 os << sym->getName() << "\n"; 239 } 240 } 241 i = clusters[i].next; 242 if (i == leader) 243 break; 244 } 245 } 246 247 return orderMap; 248 } 249 250 static size_t getSymbolPriority(const SymbolPriorityEntry &entry, 251 const InputFile *f) { 252 // We don't use toString(InputFile *) here because it returns the full path 253 // for object files, and we only want the basename. 254 StringRef filename; 255 if (f->archiveName.empty()) 256 filename = path::filename(f->getName()); 257 else 258 filename = saver().save(path::filename(f->archiveName) + "(" + 259 path::filename(f->getName()) + ")"); 260 return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile); 261 } 262 263 void macho::extractCallGraphProfile() { 264 TimeTraceScope timeScope("Extract call graph profile"); 265 for (const InputFile *file : inputFiles) { 266 auto *obj = dyn_cast_or_null<ObjFile>(file); 267 if (!obj) 268 continue; 269 for (const CallGraphEntry &entry : obj->callGraph) { 270 assert(entry.fromIndex < obj->symbols.size() && 271 entry.toIndex < obj->symbols.size()); 272 auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]); 273 auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]); 274 275 if (!fromSym || !toSym) 276 continue; 277 config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count; 278 } 279 } 280 } 281 282 void macho::parseOrderFile(StringRef path) { 283 Optional<MemoryBufferRef> buffer = readFile(path); 284 if (!buffer) { 285 error("Could not read order file at " + path); 286 return; 287 } 288 289 MemoryBufferRef mbref = *buffer; 290 size_t priority = std::numeric_limits<size_t>::max(); 291 for (StringRef line : args::getLines(mbref)) { 292 StringRef objectFile, symbol; 293 line = line.take_until([](char c) { return c == '#'; }); // ignore comments 294 line = line.ltrim(); 295 296 CPUType cpuType = StringSwitch<CPUType>(line) 297 .StartsWith("i386:", CPU_TYPE_I386) 298 .StartsWith("x86_64:", CPU_TYPE_X86_64) 299 .StartsWith("arm:", CPU_TYPE_ARM) 300 .StartsWith("arm64:", CPU_TYPE_ARM64) 301 .StartsWith("ppc:", CPU_TYPE_POWERPC) 302 .StartsWith("ppc64:", CPU_TYPE_POWERPC64) 303 .Default(CPU_TYPE_ANY); 304 305 if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType) 306 continue; 307 308 // Drop the CPU type as well as the colon 309 if (cpuType != CPU_TYPE_ANY) 310 line = line.drop_until([](char c) { return c == ':'; }).drop_front(); 311 312 constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"}; 313 for (StringRef fileEnd : fileEnds) { 314 size_t pos = line.find(fileEnd); 315 if (pos != StringRef::npos) { 316 // Split the string around the colon 317 objectFile = line.take_front(pos + fileEnd.size() - 1); 318 line = line.drop_front(pos + fileEnd.size()); 319 break; 320 } 321 } 322 symbol = line.trim(); 323 324 if (!symbol.empty()) { 325 SymbolPriorityEntry &entry = config->priorities[symbol]; 326 if (!objectFile.empty()) 327 entry.objectFiles.insert(std::make_pair(objectFile, priority)); 328 else 329 entry.anyObjectFile = std::max(entry.anyObjectFile, priority); 330 } 331 332 --priority; 333 } 334 } 335 336 // Sort sections by the profile data provided by __LLVM,__cg_profile sections. 337 // 338 // This first builds a call graph based on the profile data then merges sections 339 // according to the C³ heuristic. All clusters are then sorted by a density 340 // metric to further improve locality. 341 static DenseMap<const InputSection *, size_t> computeCallGraphProfileOrder() { 342 TimeTraceScope timeScope("Call graph profile sort"); 343 return CallGraphSort().run(); 344 } 345 346 // Each section gets assigned the priority of the highest-priority symbol it 347 // contains. 348 DenseMap<const InputSection *, size_t> macho::buildInputSectionPriorities() { 349 if (config->callGraphProfileSort) 350 return computeCallGraphProfileOrder(); 351 DenseMap<const InputSection *, size_t> sectionPriorities; 352 353 if (config->priorities.empty()) 354 return sectionPriorities; 355 356 auto addSym = [&](Defined &sym) { 357 if (sym.isAbsolute()) 358 return; 359 360 auto it = config->priorities.find(sym.getName()); 361 if (it == config->priorities.end()) 362 return; 363 364 SymbolPriorityEntry &entry = it->second; 365 size_t &priority = sectionPriorities[sym.isec]; 366 priority = 367 std::max(priority, getSymbolPriority(entry, sym.isec->getFile())); 368 }; 369 370 // TODO: Make sure this handles weak symbols correctly. 371 for (const InputFile *file : inputFiles) { 372 if (isa<ObjFile>(file)) 373 for (Symbol *sym : file->symbols) 374 if (auto *d = dyn_cast_or_null<Defined>(sym)) 375 addSym(*d); 376 } 377 378 return sectionPriorities; 379 } 380