1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // ICF is short for Identical Code Folding. That is a size optimization to 10 // identify and merge two or more read-only sections (typically functions) 11 // that happened to have the same contents. It usually reduces output size 12 // by a few percent. 13 // 14 // On Windows, ICF is enabled by default. 15 // 16 // See ELF/ICF.cpp for the details about the algorithm. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "ICF.h" 21 #include "COFFLinkerContext.h" 22 #include "Chunks.h" 23 #include "Symbols.h" 24 #include "lld/Common/Timer.h" 25 #include "llvm/Support/Parallel.h" 26 #include "llvm/Support/TimeProfiler.h" 27 #include "llvm/Support/xxhash.h" 28 #include <algorithm> 29 #include <atomic> 30 #include <vector> 31 32 using namespace llvm; 33 34 namespace lld::coff { 35 36 class ICF { 37 public: 38 ICF(COFFLinkerContext &c) : ctx(c){}; 39 void run(); 40 41 private: 42 void segregate(size_t begin, size_t end, bool constant); 43 44 bool assocEquals(const SectionChunk *a, const SectionChunk *b); 45 46 bool equalsConstant(const SectionChunk *a, const SectionChunk *b); 47 bool equalsVariable(const SectionChunk *a, const SectionChunk *b); 48 49 bool isEligible(SectionChunk *c); 50 51 size_t findBoundary(size_t begin, size_t end); 52 53 void forEachClassRange(size_t begin, size_t end, 54 std::function<void(size_t, size_t)> fn); 55 56 void forEachClass(std::function<void(size_t, size_t)> fn); 57 58 std::vector<SectionChunk *> chunks; 59 int cnt = 0; 60 std::atomic<bool> repeat = {false}; 61 62 COFFLinkerContext &ctx; 63 }; 64 65 // Returns true if section S is subject of ICF. 66 // 67 // Microsoft's documentation 68 // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April 69 // 2017) says that /opt:icf folds both functions and read-only data. 70 // Despite that, the MSVC linker folds only functions. We found 71 // a few instances of programs that are not safe for data merging. 72 // Therefore, we merge only functions just like the MSVC tool. However, we also 73 // merge read-only sections in a couple of cases where the address of the 74 // section is insignificant to the user program and the behaviour matches that 75 // of the Visual C++ linker. 76 bool ICF::isEligible(SectionChunk *c) { 77 // Non-comdat chunks, dead chunks, and writable chunks are not eligible. 78 bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; 79 if (!c->isCOMDAT() || !c->live || writable) 80 return false; 81 82 // Under regular (not safe) ICF, all code sections are eligible. 83 if ((ctx.config.doICF == ICFLevel::All) && 84 c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) 85 return true; 86 87 // .pdata and .xdata unwind info sections are eligible. 88 StringRef outSecName = c->getSectionName().split('$').first; 89 if (outSecName == ".pdata" || outSecName == ".xdata") 90 return true; 91 92 // So are vtables. 93 const char *itaniumVtablePrefix = 94 ctx.config.machine == I386 ? "__ZTV" : "_ZTV"; 95 if (c->sym && (c->sym->getName().starts_with("??_7") || 96 c->sym->getName().starts_with(itaniumVtablePrefix))) 97 return true; 98 99 // Anything else not in an address-significance table is eligible. 100 return !c->keepUnique; 101 } 102 103 // Split an equivalence class into smaller classes. 104 void ICF::segregate(size_t begin, size_t end, bool constant) { 105 while (begin < end) { 106 // Divide [Begin, End) into two. Let Mid be the start index of the 107 // second group. 108 auto bound = std::stable_partition( 109 chunks.begin() + begin + 1, chunks.begin() + end, [&](SectionChunk *s) { 110 if (constant) 111 return equalsConstant(chunks[begin], s); 112 return equalsVariable(chunks[begin], s); 113 }); 114 size_t mid = bound - chunks.begin(); 115 116 // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an 117 // equivalence class ID because every group ends with a unique index. 118 for (size_t i = begin; i < mid; ++i) 119 chunks[i]->eqClass[(cnt + 1) % 2] = mid; 120 121 // If we created a group, we need to iterate the main loop again. 122 if (mid != end) 123 repeat = true; 124 125 begin = mid; 126 } 127 } 128 129 // Returns true if two sections' associative children are equal. 130 bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { 131 // Ignore associated metadata sections that don't participate in ICF, such as 132 // debug info and CFGuard metadata. 133 auto considerForICF = [](const SectionChunk &assoc) { 134 StringRef Name = assoc.getSectionName(); 135 return !(Name.starts_with(".debug") || Name == ".gfids$y" || 136 Name == ".giats$y" || Name == ".gljmp$y"); 137 }; 138 auto ra = make_filter_range(a->children(), considerForICF); 139 auto rb = make_filter_range(b->children(), considerForICF); 140 return std::equal(ra.begin(), ra.end(), rb.begin(), rb.end(), 141 [&](const SectionChunk &ia, const SectionChunk &ib) { 142 return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2]; 143 }); 144 } 145 146 // Compare "non-moving" part of two sections, namely everything 147 // except relocation targets. 148 bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { 149 if (a->relocsSize != b->relocsSize) 150 return false; 151 152 // Compare relocations. 153 auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { 154 if (r1.Type != r2.Type || 155 r1.VirtualAddress != r2.VirtualAddress) { 156 return false; 157 } 158 Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex); 159 Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex); 160 if (b1 == b2) 161 return true; 162 if (auto *d1 = dyn_cast<DefinedRegular>(b1)) 163 if (auto *d2 = dyn_cast<DefinedRegular>(b2)) 164 return d1->getValue() == d2->getValue() && 165 d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; 166 return false; 167 }; 168 if (!std::equal(a->getRelocs().begin(), a->getRelocs().end(), 169 b->getRelocs().begin(), eq)) 170 return false; 171 172 // Compare section attributes and contents. 173 return a->getOutputCharacteristics() == b->getOutputCharacteristics() && 174 a->getSectionName() == b->getSectionName() && 175 a->header->SizeOfRawData == b->header->SizeOfRawData && 176 a->checksum == b->checksum && a->getContents() == b->getContents() && 177 a->getMachine() == b->getMachine() && assocEquals(a, b); 178 } 179 180 // Compare "moving" part of two sections, namely relocation targets. 181 bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) { 182 // Compare relocations. 183 auto eqSym = [&](Symbol *b1, Symbol *b2) { 184 if (b1 == b2) 185 return true; 186 if (auto *d1 = dyn_cast<DefinedRegular>(b1)) 187 if (auto *d2 = dyn_cast<DefinedRegular>(b2)) 188 return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; 189 return false; 190 }; 191 auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { 192 Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex); 193 Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex); 194 return eqSym(b1, b2); 195 }; 196 197 Symbol *e1 = a->getEntryThunk(); 198 Symbol *e2 = b->getEntryThunk(); 199 if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2))) 200 return false; 201 202 return std::equal(a->getRelocs().begin(), a->getRelocs().end(), 203 b->getRelocs().begin(), eq) && 204 assocEquals(a, b); 205 } 206 207 // Find the first Chunk after Begin that has a different class from Begin. 208 size_t ICF::findBoundary(size_t begin, size_t end) { 209 for (size_t i = begin + 1; i < end; ++i) 210 if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2]) 211 return i; 212 return end; 213 } 214 215 void ICF::forEachClassRange(size_t begin, size_t end, 216 std::function<void(size_t, size_t)> fn) { 217 while (begin < end) { 218 size_t mid = findBoundary(begin, end); 219 fn(begin, mid); 220 begin = mid; 221 } 222 } 223 224 // Call Fn on each class group. 225 void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { 226 // If the number of sections are too small to use threading, 227 // call Fn sequentially. 228 if (chunks.size() < 1024) { 229 forEachClassRange(0, chunks.size(), fn); 230 ++cnt; 231 return; 232 } 233 234 // Shard into non-overlapping intervals, and call Fn in parallel. 235 // The sharding must be completed before any calls to Fn are made 236 // so that Fn can modify the Chunks in its shard without causing data 237 // races. 238 const size_t numShards = 256; 239 size_t step = chunks.size() / numShards; 240 size_t boundaries[numShards + 1]; 241 boundaries[0] = 0; 242 boundaries[numShards] = chunks.size(); 243 parallelFor(1, numShards, [&](size_t i) { 244 boundaries[i] = findBoundary((i - 1) * step, chunks.size()); 245 }); 246 parallelFor(1, numShards + 1, [&](size_t i) { 247 if (boundaries[i - 1] < boundaries[i]) { 248 forEachClassRange(boundaries[i - 1], boundaries[i], fn); 249 } 250 }); 251 ++cnt; 252 } 253 254 // Merge identical COMDAT sections. 255 // Two sections are considered the same if their section headers, 256 // contents and relocations are all the same. 257 void ICF::run() { 258 llvm::TimeTraceScope timeScope("ICF"); 259 ScopedTimer t(ctx.icfTimer); 260 261 // Collect only mergeable sections and group by hash value. 262 uint32_t nextId = 1; 263 for (Chunk *c : ctx.driver.getChunks()) { 264 if (auto *sc = dyn_cast<SectionChunk>(c)) { 265 if (isEligible(sc)) 266 chunks.push_back(sc); 267 else 268 sc->eqClass[0] = nextId++; 269 } 270 } 271 272 // Make sure that ICF doesn't merge sections that are being handled by string 273 // tail merging. 274 for (MergeChunk *mc : ctx.mergeChunkInstances) 275 if (mc) 276 for (SectionChunk *sc : mc->sections) 277 sc->eqClass[0] = nextId++; 278 279 // Initially, we use hash values to partition sections. 280 parallelForEach(chunks, [&](SectionChunk *sc) { 281 sc->eqClass[0] = xxh3_64bits(sc->getContents()); 282 }); 283 284 // Combine the hashes of the sections referenced by each section into its 285 // hash. 286 for (unsigned cnt = 0; cnt != 2; ++cnt) { 287 parallelForEach(chunks, [&](SectionChunk *sc) { 288 uint32_t hash = sc->eqClass[cnt % 2]; 289 for (Symbol *b : sc->symbols()) 290 if (auto *sym = dyn_cast_or_null<DefinedRegular>(b)) 291 hash += sym->getChunk()->eqClass[cnt % 2]; 292 // Set MSB to 1 to avoid collisions with non-hash classes. 293 sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); 294 }); 295 } 296 297 // From now on, sections in Chunks are ordered so that sections in 298 // the same group are consecutive in the vector. 299 llvm::stable_sort(chunks, [](const SectionChunk *a, const SectionChunk *b) { 300 return a->eqClass[0] < b->eqClass[0]; 301 }); 302 303 // Compare static contents and assign unique IDs for each static content. 304 forEachClass([&](size_t begin, size_t end) { segregate(begin, end, true); }); 305 306 // Split groups by comparing relocations until convergence is obtained. 307 do { 308 repeat = false; 309 forEachClass( 310 [&](size_t begin, size_t end) { segregate(begin, end, false); }); 311 } while (repeat); 312 313 Log(ctx) << "ICF needed " << Twine(cnt) << " iterations"; 314 315 // Merge sections in the same classes. 316 forEachClass([&](size_t begin, size_t end) { 317 if (end - begin == 1) 318 return; 319 320 Log(ctx) << "Selected " << chunks[begin]->getDebugName(); 321 for (size_t i = begin + 1; i < end; ++i) { 322 Log(ctx) << " Removed " << chunks[i]->getDebugName(); 323 chunks[begin]->replace(chunks[i]); 324 } 325 }); 326 } 327 328 // Entry point to ICF. 329 void doICF(COFFLinkerContext &ctx) { ICF(ctx).run(); } 330 331 } // namespace lld::coff 332