1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ICF.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputSection.h" 13 #include "SymbolTable.h" 14 #include "Symbols.h" 15 #include "UnwindInfoSection.h" 16 17 #include "lld/Common/CommonLinkerContext.h" 18 #include "llvm/Support/LEB128.h" 19 #include "llvm/Support/Parallel.h" 20 #include "llvm/Support/TimeProfiler.h" 21 #include "llvm/Support/xxhash.h" 22 23 #include <atomic> 24 25 using namespace llvm; 26 using namespace lld; 27 using namespace lld::macho; 28 29 static constexpr bool verboseDiagnostics = false; 30 31 class ICF { 32 public: 33 ICF(std::vector<ConcatInputSection *> &inputs); 34 void run(); 35 36 using EqualsFn = bool (ICF::*)(const ConcatInputSection *, 37 const ConcatInputSection *); 38 void segregate(size_t begin, size_t end, EqualsFn); 39 size_t findBoundary(size_t begin, size_t end); 40 void forEachClassRange(size_t begin, size_t end, 41 llvm::function_ref<void(size_t, size_t)> func); 42 void forEachClass(llvm::function_ref<void(size_t, size_t)> func); 43 44 bool equalsConstant(const ConcatInputSection *ia, 45 const ConcatInputSection *ib); 46 bool equalsVariable(const ConcatInputSection *ia, 47 const ConcatInputSection *ib); 48 49 // ICF needs a copy of the inputs vector because its equivalence-class 50 // segregation algorithm destroys the proper sequence. 51 std::vector<ConcatInputSection *> icfInputs; 52 53 unsigned icfPass = 0; 54 std::atomic<bool> icfRepeat{false}; 55 std::atomic<uint64_t> equalsConstantCount{0}; 56 std::atomic<uint64_t> equalsVariableCount{0}; 57 }; 58 59 ICF::ICF(std::vector<ConcatInputSection *> &inputs) { 60 icfInputs.assign(inputs.begin(), inputs.end()); 61 } 62 63 // ICF = Identical Code Folding 64 // 65 // We only fold __TEXT,__text, so this is really "code" folding, and not 66 // "COMDAT" folding. String and scalar constant literals are deduplicated 67 // elsewhere. 68 // 69 // Summary of segments & sections: 70 // 71 // The __TEXT segment is readonly at the MMU. Some sections are already 72 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are 73 // synthetic and inherently free of duplicates (__TEXT,__stubs & 74 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, 75 // because doing so induces many test failures. 76 // 77 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and 78 // thus ineligible for ICF. 79 // 80 // The __DATA_CONST segment is read/write at the MMU, but is logically const to 81 // the application after dyld applies fixups to pointer data. We currently 82 // fold only the __DATA_CONST,__cfstring section. 83 // 84 // The __DATA segment is read/write at the MMU, and as application-writeable 85 // data, none of its sections are eligible for ICF. 86 // 87 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation 88 // of the segregation algorithm. 89 // 90 // FIXME(gkm): implement keep-unique attributes 91 // FIXME(gkm): implement address-significance tables for MachO object files 92 93 // Compare "non-moving" parts of two ConcatInputSections, namely everything 94 // except references to other ConcatInputSections. 95 bool ICF::equalsConstant(const ConcatInputSection *ia, 96 const ConcatInputSection *ib) { 97 if (verboseDiagnostics) 98 ++equalsConstantCount; 99 // We can only fold within the same OutputSection. 100 if (ia->parent != ib->parent) 101 return false; 102 if (ia->data.size() != ib->data.size()) 103 return false; 104 if (ia->data != ib->data) 105 return false; 106 if (ia->relocs.size() != ib->relocs.size()) 107 return false; 108 auto f = [](const Reloc &ra, const Reloc &rb) { 109 if (ra.type != rb.type) 110 return false; 111 if (ra.pcrel != rb.pcrel) 112 return false; 113 if (ra.length != rb.length) 114 return false; 115 if (ra.offset != rb.offset) 116 return false; 117 if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) 118 return false; 119 120 InputSection *isecA, *isecB; 121 122 uint64_t valueA = 0; 123 uint64_t valueB = 0; 124 if (ra.referent.is<Symbol *>()) { 125 const auto *sa = ra.referent.get<Symbol *>(); 126 const auto *sb = rb.referent.get<Symbol *>(); 127 if (sa->kind() != sb->kind()) 128 return false; 129 // ICF runs before Undefineds are treated (and potentially converted into 130 // DylibSymbols). 131 if (isa<DylibSymbol>(sa) || isa<Undefined>(sa)) 132 return sa == sb && ra.addend == rb.addend; 133 assert(isa<Defined>(sa)); 134 const auto *da = cast<Defined>(sa); 135 const auto *db = cast<Defined>(sb); 136 if (!da->isec() || !db->isec()) { 137 assert(da->isAbsolute() && db->isAbsolute()); 138 return da->value + ra.addend == db->value + rb.addend; 139 } 140 isecA = da->isec(); 141 valueA = da->value; 142 isecB = db->isec(); 143 valueB = db->value; 144 } else { 145 isecA = ra.referent.get<InputSection *>(); 146 isecB = rb.referent.get<InputSection *>(); 147 } 148 149 if (isecA->parent != isecB->parent) 150 return false; 151 // Sections with identical parents should be of the same kind. 152 assert(isecA->kind() == isecB->kind()); 153 // We will compare ConcatInputSection contents in equalsVariable. 154 if (isa<ConcatInputSection>(isecA)) 155 return ra.addend == rb.addend; 156 // Else we have two literal sections. References to them are equal iff their 157 // offsets in the output section are equal. 158 if (ra.referent.is<Symbol *>()) 159 // For symbol relocs, we compare the contents at the symbol address. We 160 // don't do `getOffset(value + addend)` because value + addend may not be 161 // a valid offset in the literal section. 162 return isecA->getOffset(valueA) == isecB->getOffset(valueB) && 163 ra.addend == rb.addend; 164 else { 165 assert(valueA == 0 && valueB == 0); 166 // For section relocs, we compare the content at the section offset. 167 return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend); 168 } 169 }; 170 return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), 171 f); 172 } 173 174 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not 175 // handled by equalsConstant(). 176 bool ICF::equalsVariable(const ConcatInputSection *ia, 177 const ConcatInputSection *ib) { 178 if (verboseDiagnostics) 179 ++equalsVariableCount; 180 assert(ia->relocs.size() == ib->relocs.size()); 181 auto f = [this](const Reloc &ra, const Reloc &rb) { 182 // We already filtered out mismatching values/addends in equalsConstant. 183 if (ra.referent == rb.referent) 184 return true; 185 const ConcatInputSection *isecA, *isecB; 186 if (ra.referent.is<Symbol *>()) { 187 // Matching DylibSymbols are already filtered out by the 188 // identical-referent check above. Non-matching DylibSymbols were filtered 189 // out in equalsConstant(). So we can safely cast to Defined here. 190 const auto *da = cast<Defined>(ra.referent.get<Symbol *>()); 191 const auto *db = cast<Defined>(rb.referent.get<Symbol *>()); 192 if (da->isAbsolute()) 193 return true; 194 isecA = dyn_cast<ConcatInputSection>(da->isec()); 195 if (!isecA) 196 return true; // literal sections were checked in equalsConstant. 197 isecB = cast<ConcatInputSection>(db->isec()); 198 } else { 199 const auto *sa = ra.referent.get<InputSection *>(); 200 const auto *sb = rb.referent.get<InputSection *>(); 201 isecA = dyn_cast<ConcatInputSection>(sa); 202 if (!isecA) 203 return true; 204 isecB = cast<ConcatInputSection>(sb); 205 } 206 return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; 207 }; 208 if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f)) 209 return false; 210 211 // If there are symbols with associated unwind info, check that the unwind 212 // info matches. For simplicity, we only handle the case where there are only 213 // symbols at offset zero within the section (which is typically the case with 214 // .subsections_via_symbols.) 215 auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; 216 const auto *itA = llvm::find_if(ia->symbols, hasUnwind); 217 const auto *itB = llvm::find_if(ib->symbols, hasUnwind); 218 if (itA == ia->symbols.end()) 219 return itB == ib->symbols.end(); 220 if (itB == ib->symbols.end()) 221 return false; 222 const Defined *da = *itA; 223 const Defined *db = *itB; 224 if (da->unwindEntry()->icfEqClass[icfPass % 2] != 225 db->unwindEntry()->icfEqClass[icfPass % 2] || 226 da->value != 0 || db->value != 0) 227 return false; 228 auto isZero = [](Defined *d) { return d->value == 0; }; 229 return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) == 230 ia->symbols.end() && 231 std::find_if_not(std::next(itB), ib->symbols.end(), isZero) == 232 ib->symbols.end(); 233 } 234 235 // Find the first InputSection after BEGIN whose equivalence class differs 236 size_t ICF::findBoundary(size_t begin, size_t end) { 237 uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; 238 for (size_t i = begin + 1; i < end; ++i) 239 if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) 240 return i; 241 return end; 242 } 243 244 // Invoke FUNC on subranges with matching equivalence class 245 void ICF::forEachClassRange(size_t begin, size_t end, 246 llvm::function_ref<void(size_t, size_t)> func) { 247 while (begin < end) { 248 size_t mid = findBoundary(begin, end); 249 func(begin, mid); 250 begin = mid; 251 } 252 } 253 254 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges 255 // with matching equivalence class 256 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { 257 // Only use threads when the benefits outweigh the overhead. 258 const size_t threadingThreshold = 1024; 259 if (icfInputs.size() < threadingThreshold) { 260 forEachClassRange(0, icfInputs.size(), func); 261 ++icfPass; 262 return; 263 } 264 265 // Shard into non-overlapping intervals, and call FUNC in parallel. The 266 // sharding must be completed before any calls to FUNC are made so that FUNC 267 // can modify the InputSection in its shard without causing data races. 268 const size_t shards = 256; 269 size_t step = icfInputs.size() / shards; 270 size_t boundaries[shards + 1]; 271 boundaries[0] = 0; 272 boundaries[shards] = icfInputs.size(); 273 parallelFor(1, shards, [&](size_t i) { 274 boundaries[i] = findBoundary((i - 1) * step, icfInputs.size()); 275 }); 276 parallelFor(1, shards + 1, [&](size_t i) { 277 if (boundaries[i - 1] < boundaries[i]) { 278 forEachClassRange(boundaries[i - 1], boundaries[i], func); 279 } 280 }); 281 ++icfPass; 282 } 283 284 void ICF::run() { 285 // Into each origin-section hash, combine all reloc referent section hashes. 286 for (icfPass = 0; icfPass < 2; ++icfPass) { 287 parallelForEach(icfInputs, [&](ConcatInputSection *isec) { 288 uint32_t hash = isec->icfEqClass[icfPass % 2]; 289 for (const Reloc &r : isec->relocs) { 290 if (auto *sym = r.referent.dyn_cast<Symbol *>()) { 291 if (auto *defined = dyn_cast<Defined>(sym)) { 292 if (defined->isec()) { 293 if (auto *referentIsec = 294 dyn_cast<ConcatInputSection>(defined->isec())) 295 hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; 296 else 297 hash += defined->isec()->kind() + 298 defined->isec()->getOffset(defined->value); 299 } else { 300 hash += defined->value; 301 } 302 } else { 303 // ICF runs before Undefined diags 304 assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); 305 } 306 } 307 } 308 // Set MSB to 1 to avoid collisions with non-hashed classes. 309 isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); 310 }); 311 } 312 313 llvm::stable_sort( 314 icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) { 315 return a->icfEqClass[0] < b->icfEqClass[0]; 316 }); 317 forEachClass([&](size_t begin, size_t end) { 318 segregate(begin, end, &ICF::equalsConstant); 319 }); 320 321 // Split equivalence groups by comparing relocations until convergence 322 do { 323 icfRepeat = false; 324 forEachClass([&](size_t begin, size_t end) { 325 segregate(begin, end, &ICF::equalsVariable); 326 }); 327 } while (icfRepeat); 328 log("ICF needed " + Twine(icfPass) + " iterations"); 329 if (verboseDiagnostics) { 330 log("equalsConstant() called " + Twine(equalsConstantCount) + " times"); 331 log("equalsVariable() called " + Twine(equalsVariableCount) + " times"); 332 } 333 334 // Fold sections within equivalence classes 335 forEachClass([&](size_t begin, size_t end) { 336 if (end - begin < 2) 337 return; 338 ConcatInputSection *beginIsec = icfInputs[begin]; 339 for (size_t i = begin + 1; i < end; ++i) 340 beginIsec->foldIdentical(icfInputs[i]); 341 }); 342 } 343 344 // Split an equivalence class into smaller classes. 345 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { 346 while (begin < end) { 347 // Divide [begin, end) into two. Let mid be the start index of the 348 // second group. 349 auto bound = std::stable_partition( 350 icfInputs.begin() + begin + 1, icfInputs.begin() + end, 351 [&](ConcatInputSection *isec) { 352 return (this->*equals)(icfInputs[begin], isec); 353 }); 354 size_t mid = bound - icfInputs.begin(); 355 356 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an 357 // equivalence class ID because every group ends with a unique index. 358 for (size_t i = begin; i < mid; ++i) 359 icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; 360 361 // If we created a group, we need to iterate the main loop again. 362 if (mid != end) 363 icfRepeat = true; 364 365 begin = mid; 366 } 367 } 368 369 void macho::markSymAsAddrSig(Symbol *s) { 370 if (auto *d = dyn_cast_or_null<Defined>(s)) 371 if (d->isec()) 372 d->isec()->keepUnique = true; 373 } 374 375 void macho::markAddrSigSymbols() { 376 TimeTraceScope timeScope("Mark addrsig symbols"); 377 for (InputFile *file : inputFiles) { 378 ObjFile *obj = dyn_cast<ObjFile>(file); 379 if (!obj) 380 continue; 381 382 Section *addrSigSection = obj->addrSigSection; 383 if (!addrSigSection) 384 continue; 385 assert(addrSigSection->subsections.size() == 1); 386 387 const InputSection *isec = addrSigSection->subsections[0].isec; 388 389 for (const Reloc &r : isec->relocs) { 390 if (auto *sym = r.referent.dyn_cast<Symbol *>()) 391 markSymAsAddrSig(sym); 392 else 393 error(toString(isec) + ": unexpected section relocation"); 394 } 395 } 396 } 397 398 void macho::foldIdenticalSections(bool onlyCfStrings) { 399 TimeTraceScope timeScope("Fold Identical Code Sections"); 400 // The ICF equivalence-class segregation algorithm relies on pre-computed 401 // hashes of InputSection::data for the ConcatOutputSection::inputs and all 402 // sections referenced by their relocs. We could recursively traverse the 403 // relocs to find every referenced InputSection, but that precludes easy 404 // parallelization. Therefore, we hash every InputSection here where we have 405 // them all accessible as simple vectors. 406 407 // If an InputSection is ineligible for ICF, we give it a unique ID to force 408 // it into an unfoldable singleton equivalence class. Begin the unique-ID 409 // space at inputSections.size(), so that it will never intersect with 410 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never 411 // coexist with equivalence-class IDs, this is not necessary, but might help 412 // someone keep the numbers straight in case we ever need to debug the 413 // ICF::segregate() 414 std::vector<ConcatInputSection *> foldable; 415 uint64_t icfUniqueID = inputSections.size(); 416 for (ConcatInputSection *isec : inputSections) { 417 bool isFoldableWithAddendsRemoved = isCfStringSection(isec) || 418 isClassRefsSection(isec) || 419 isSelRefsSection(isec); 420 // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we 421 // can still fold it. 422 bool hasFoldableFlags = (isSelRefsSection(isec) || 423 sectionType(isec->getFlags()) == MachO::S_REGULAR); 424 // FIXME: consider non-code __text sections as foldable? 425 bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) && 426 (isCodeSection(isec) || isFoldableWithAddendsRemoved || 427 isGccExceptTabSection(isec)) && 428 !isec->keepUnique && !isec->hasAltEntry && 429 !isec->shouldOmitFromOutput() && hasFoldableFlags; 430 if (isFoldable) { 431 foldable.push_back(isec); 432 for (Defined *d : isec->symbols) 433 if (d->unwindEntry()) 434 foldable.push_back(d->unwindEntry()); 435 436 // Some sections have embedded addends that foil ICF's hashing / equality 437 // checks. (We can ignore embedded addends when doing ICF because the same 438 // information gets recorded in our Reloc structs.) We therefore create a 439 // mutable copy of the section data and zero out the embedded addends 440 // before performing any hashing / equality checks. 441 if (isFoldableWithAddendsRemoved) { 442 // We have to do this copying serially as the BumpPtrAllocator is not 443 // thread-safe. FIXME: Make a thread-safe allocator. 444 MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc()); 445 for (const Reloc &r : isec->relocs) 446 target->relocateOne(copy.data() + r.offset, r, /*va=*/0, 447 /*relocVA=*/0); 448 isec->data = copy; 449 } 450 } else if (!isEhFrameSection(isec)) { 451 // EH frames are gathered as foldables from unwindEntry above; give a 452 // unique ID to everything else. 453 isec->icfEqClass[0] = ++icfUniqueID; 454 } 455 } 456 parallelForEach(foldable, [](ConcatInputSection *isec) { 457 assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! 458 // Turn-on the top bit to guarantee that valid hashes have no collisions 459 // with the small-integer unique IDs for ICF-ineligible sections 460 isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31); 461 }); 462 // Now that every input section is either hashed or marked as unique, run the 463 // segregation algorithm to detect foldable subsections. 464 ICF(foldable).run(); 465 } 466