1 //===- ICF.cpp ------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // ICF is short for Identical Code Folding. That is a size optimization to 10 // identify and merge two or more read-only sections (typically functions) 11 // that happened to have the same contents. It usually reduces output size 12 // by a few percent. 13 // 14 // On Windows, ICF is enabled by default. 15 // 16 // See ELF/ICF.cpp for the details about the algorithm. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "ICF.h" 21 #include "Chunks.h" 22 #include "Symbols.h" 23 #include "lld/Common/ErrorHandler.h" 24 #include "lld/Common/Threads.h" 25 #include "lld/Common/Timer.h" 26 #include "llvm/ADT/Hashing.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/Parallel.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Support/xxhash.h" 31 #include <algorithm> 32 #include <atomic> 33 #include <vector> 34 35 using namespace llvm; 36 37 namespace lld { 38 namespace coff { 39 40 static Timer icfTimer("ICF", Timer::root()); 41 42 class ICF { 43 public: 44 void run(ArrayRef<Chunk *> v); 45 46 private: 47 void segregate(size_t begin, size_t end, bool constant); 48 49 bool assocEquals(const SectionChunk *a, const SectionChunk *b); 50 51 bool equalsConstant(const SectionChunk *a, const SectionChunk *b); 52 bool equalsVariable(const SectionChunk *a, const SectionChunk *b); 53 54 bool isEligible(SectionChunk *c); 55 56 size_t findBoundary(size_t begin, size_t end); 57 58 void forEachClassRange(size_t begin, size_t end, 59 std::function<void(size_t, size_t)> fn); 60 61 void forEachClass(std::function<void(size_t, size_t)> fn); 62 63 std::vector<SectionChunk *> chunks; 64 int cnt = 0; 65 std::atomic<bool> repeat = {false}; 66 }; 67 68 // Returns true if section S is subject of ICF. 69 // 70 // Microsoft's documentation 71 // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April 72 // 2017) says that /opt:icf folds both functions and read-only data. 73 // Despite that, the MSVC linker folds only functions. We found 74 // a few instances of programs that are not safe for data merging. 75 // Therefore, we merge only functions just like the MSVC tool. However, we also 76 // merge read-only sections in a couple of cases where the address of the 77 // section is insignificant to the user program and the behaviour matches that 78 // of the Visual C++ linker. 79 bool ICF::isEligible(SectionChunk *c) { 80 // Non-comdat chunks, dead chunks, and writable chunks are not eligible. 81 bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; 82 if (!c->isCOMDAT() || !c->live || writable) 83 return false; 84 85 // Code sections are eligible. 86 if (c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) 87 return true; 88 89 // .pdata and .xdata unwind info sections are eligible. 90 StringRef outSecName = c->getSectionName().split('$').first; 91 if (outSecName == ".pdata" || outSecName == ".xdata") 92 return true; 93 94 // So are vtables. 95 if (c->sym && c->sym->getName().startswith("??_7")) 96 return true; 97 98 // Anything else not in an address-significance table is eligible. 99 return !c->keepUnique; 100 } 101 102 // Split an equivalence class into smaller classes. 103 void ICF::segregate(size_t begin, size_t end, bool constant) { 104 while (begin < end) { 105 // Divide [Begin, End) into two. Let Mid be the start index of the 106 // second group. 107 auto bound = std::stable_partition( 108 chunks.begin() + begin + 1, chunks.begin() + end, [&](SectionChunk *s) { 109 if (constant) 110 return equalsConstant(chunks[begin], s); 111 return equalsVariable(chunks[begin], s); 112 }); 113 size_t mid = bound - chunks.begin(); 114 115 // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an 116 // equivalence class ID because every group ends with a unique index. 117 for (size_t i = begin; i < mid; ++i) 118 chunks[i]->eqClass[(cnt + 1) % 2] = mid; 119 120 // If we created a group, we need to iterate the main loop again. 121 if (mid != end) 122 repeat = true; 123 124 begin = mid; 125 } 126 } 127 128 // Returns true if two sections' associative children are equal. 129 bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { 130 auto childClasses = [&](const SectionChunk *sc) { 131 std::vector<uint32_t> classes; 132 for (const SectionChunk &c : sc->children()) 133 if (!c.getSectionName().startswith(".debug") && 134 c.getSectionName() != ".gfids$y" && c.getSectionName() != ".gljmp$y") 135 classes.push_back(c.eqClass[cnt % 2]); 136 return classes; 137 }; 138 return childClasses(a) == childClasses(b); 139 } 140 141 // Compare "non-moving" part of two sections, namely everything 142 // except relocation targets. 143 bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { 144 if (a->relocsSize != b->relocsSize) 145 return false; 146 147 // Compare relocations. 148 auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { 149 if (r1.Type != r2.Type || 150 r1.VirtualAddress != r2.VirtualAddress) { 151 return false; 152 } 153 Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex); 154 Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex); 155 if (b1 == b2) 156 return true; 157 if (auto *d1 = dyn_cast<DefinedRegular>(b1)) 158 if (auto *d2 = dyn_cast<DefinedRegular>(b2)) 159 return d1->getValue() == d2->getValue() && 160 d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; 161 return false; 162 }; 163 if (!std::equal(a->getRelocs().begin(), a->getRelocs().end(), 164 b->getRelocs().begin(), eq)) 165 return false; 166 167 // Compare section attributes and contents. 168 return a->getOutputCharacteristics() == b->getOutputCharacteristics() && 169 a->getSectionName() == b->getSectionName() && 170 a->header->SizeOfRawData == b->header->SizeOfRawData && 171 a->checksum == b->checksum && a->getContents() == b->getContents() && 172 assocEquals(a, b); 173 } 174 175 // Compare "moving" part of two sections, namely relocation targets. 176 bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) { 177 // Compare relocations. 178 auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { 179 Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex); 180 Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex); 181 if (b1 == b2) 182 return true; 183 if (auto *d1 = dyn_cast<DefinedRegular>(b1)) 184 if (auto *d2 = dyn_cast<DefinedRegular>(b2)) 185 return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; 186 return false; 187 }; 188 return std::equal(a->getRelocs().begin(), a->getRelocs().end(), 189 b->getRelocs().begin(), eq) && 190 assocEquals(a, b); 191 } 192 193 // Find the first Chunk after Begin that has a different class from Begin. 194 size_t ICF::findBoundary(size_t begin, size_t end) { 195 for (size_t i = begin + 1; i < end; ++i) 196 if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2]) 197 return i; 198 return end; 199 } 200 201 void ICF::forEachClassRange(size_t begin, size_t end, 202 std::function<void(size_t, size_t)> fn) { 203 while (begin < end) { 204 size_t mid = findBoundary(begin, end); 205 fn(begin, mid); 206 begin = mid; 207 } 208 } 209 210 // Call Fn on each class group. 211 void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { 212 // If the number of sections are too small to use threading, 213 // call Fn sequentially. 214 if (chunks.size() < 1024) { 215 forEachClassRange(0, chunks.size(), fn); 216 ++cnt; 217 return; 218 } 219 220 // Shard into non-overlapping intervals, and call Fn in parallel. 221 // The sharding must be completed before any calls to Fn are made 222 // so that Fn can modify the Chunks in its shard without causing data 223 // races. 224 const size_t numShards = 256; 225 size_t step = chunks.size() / numShards; 226 size_t boundaries[numShards + 1]; 227 boundaries[0] = 0; 228 boundaries[numShards] = chunks.size(); 229 parallelForEachN(1, numShards, [&](size_t i) { 230 boundaries[i] = findBoundary((i - 1) * step, chunks.size()); 231 }); 232 parallelForEachN(1, numShards + 1, [&](size_t i) { 233 if (boundaries[i - 1] < boundaries[i]) { 234 forEachClassRange(boundaries[i - 1], boundaries[i], fn); 235 } 236 }); 237 ++cnt; 238 } 239 240 // Merge identical COMDAT sections. 241 // Two sections are considered the same if their section headers, 242 // contents and relocations are all the same. 243 void ICF::run(ArrayRef<Chunk *> vec) { 244 ScopedTimer t(icfTimer); 245 246 // Collect only mergeable sections and group by hash value. 247 uint32_t nextId = 1; 248 for (Chunk *c : vec) { 249 if (auto *sc = dyn_cast<SectionChunk>(c)) { 250 if (isEligible(sc)) 251 chunks.push_back(sc); 252 else 253 sc->eqClass[0] = nextId++; 254 } 255 } 256 257 // Make sure that ICF doesn't merge sections that are being handled by string 258 // tail merging. 259 for (MergeChunk *mc : MergeChunk::instances) 260 if (mc) 261 for (SectionChunk *sc : mc->sections) 262 sc->eqClass[0] = nextId++; 263 264 // Initially, we use hash values to partition sections. 265 parallelForEach(chunks, [&](SectionChunk *sc) { 266 sc->eqClass[0] = xxHash64(sc->getContents()); 267 }); 268 269 // Combine the hashes of the sections referenced by each section into its 270 // hash. 271 for (unsigned cnt = 0; cnt != 2; ++cnt) { 272 parallelForEach(chunks, [&](SectionChunk *sc) { 273 uint32_t hash = sc->eqClass[cnt % 2]; 274 for (Symbol *b : sc->symbols()) 275 if (auto *sym = dyn_cast_or_null<DefinedRegular>(b)) 276 hash += sym->getChunk()->eqClass[cnt % 2]; 277 // Set MSB to 1 to avoid collisions with non-hash classes. 278 sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); 279 }); 280 } 281 282 // From now on, sections in Chunks are ordered so that sections in 283 // the same group are consecutive in the vector. 284 llvm::stable_sort(chunks, [](const SectionChunk *a, const SectionChunk *b) { 285 return a->eqClass[0] < b->eqClass[0]; 286 }); 287 288 // Compare static contents and assign unique IDs for each static content. 289 forEachClass([&](size_t begin, size_t end) { segregate(begin, end, true); }); 290 291 // Split groups by comparing relocations until convergence is obtained. 292 do { 293 repeat = false; 294 forEachClass( 295 [&](size_t begin, size_t end) { segregate(begin, end, false); }); 296 } while (repeat); 297 298 log("ICF needed " + Twine(cnt) + " iterations"); 299 300 // Merge sections in the same classes. 301 forEachClass([&](size_t begin, size_t end) { 302 if (end - begin == 1) 303 return; 304 305 log("Selected " + chunks[begin]->getDebugName()); 306 for (size_t i = begin + 1; i < end; ++i) { 307 log(" Removed " + chunks[i]->getDebugName()); 308 chunks[begin]->replace(chunks[i]); 309 } 310 }); 311 } 312 313 // Entry point to ICF. 314 void doICF(ArrayRef<Chunk *> chunks) { ICF().run(chunks); } 315 316 } // namespace coff 317 } // namespace lld 318