10b57cec5SDimitry Andric //===- ICF.cpp ------------------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // ICF is short for Identical Code Folding. That is a size optimization to
100b57cec5SDimitry Andric // identify and merge two or more read-only sections (typically functions)
110b57cec5SDimitry Andric // that happened to have the same contents. It usually reduces output size
120b57cec5SDimitry Andric // by a few percent.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // On Windows, ICF is enabled by default.
150b57cec5SDimitry Andric //
1685868e8aSDimitry Andric // See ELF/ICF.cpp for the details about the algorithm.
170b57cec5SDimitry Andric //
180b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric #include "ICF.h"
21349cc55cSDimitry Andric #include "COFFLinkerContext.h"
220b57cec5SDimitry Andric #include "Chunks.h"
230b57cec5SDimitry Andric #include "Symbols.h"
240b57cec5SDimitry Andric #include "lld/Common/ErrorHandler.h"
250b57cec5SDimitry Andric #include "lld/Common/Timer.h"
260b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
270b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
280b57cec5SDimitry Andric #include "llvm/Support/Parallel.h"
295f757f3fSDimitry Andric #include "llvm/Support/TimeProfiler.h"
300b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
310b57cec5SDimitry Andric #include "llvm/Support/xxhash.h"
320b57cec5SDimitry Andric #include <algorithm>
330b57cec5SDimitry Andric #include <atomic>
340b57cec5SDimitry Andric #include <vector>
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric using namespace llvm;
370b57cec5SDimitry Andric
38bdd1243dSDimitry Andric namespace lld::coff {
390b57cec5SDimitry Andric
400b57cec5SDimitry Andric class ICF {
410b57cec5SDimitry Andric public:
ICF(COFFLinkerContext & c)42bdd1243dSDimitry Andric ICF(COFFLinkerContext &c) : ctx(c){};
43349cc55cSDimitry Andric void run();
440b57cec5SDimitry Andric
450b57cec5SDimitry Andric private:
460b57cec5SDimitry Andric void segregate(size_t begin, size_t end, bool constant);
470b57cec5SDimitry Andric
480b57cec5SDimitry Andric bool assocEquals(const SectionChunk *a, const SectionChunk *b);
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric bool equalsConstant(const SectionChunk *a, const SectionChunk *b);
510b57cec5SDimitry Andric bool equalsVariable(const SectionChunk *a, const SectionChunk *b);
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric bool isEligible(SectionChunk *c);
540b57cec5SDimitry Andric
550b57cec5SDimitry Andric size_t findBoundary(size_t begin, size_t end);
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric void forEachClassRange(size_t begin, size_t end,
580b57cec5SDimitry Andric std::function<void(size_t, size_t)> fn);
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric void forEachClass(std::function<void(size_t, size_t)> fn);
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric std::vector<SectionChunk *> chunks;
630b57cec5SDimitry Andric int cnt = 0;
640b57cec5SDimitry Andric std::atomic<bool> repeat = {false};
65349cc55cSDimitry Andric
66349cc55cSDimitry Andric COFFLinkerContext &ctx;
670b57cec5SDimitry Andric };
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric // Returns true if section S is subject of ICF.
700b57cec5SDimitry Andric //
710b57cec5SDimitry Andric // Microsoft's documentation
720b57cec5SDimitry Andric // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
730b57cec5SDimitry Andric // 2017) says that /opt:icf folds both functions and read-only data.
740b57cec5SDimitry Andric // Despite that, the MSVC linker folds only functions. We found
750b57cec5SDimitry Andric // a few instances of programs that are not safe for data merging.
760b57cec5SDimitry Andric // Therefore, we merge only functions just like the MSVC tool. However, we also
770b57cec5SDimitry Andric // merge read-only sections in a couple of cases where the address of the
780b57cec5SDimitry Andric // section is insignificant to the user program and the behaviour matches that
790b57cec5SDimitry Andric // of the Visual C++ linker.
isEligible(SectionChunk * c)800b57cec5SDimitry Andric bool ICF::isEligible(SectionChunk *c) {
8185868e8aSDimitry Andric // Non-comdat chunks, dead chunks, and writable chunks are not eligible.
820b57cec5SDimitry Andric bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
830b57cec5SDimitry Andric if (!c->isCOMDAT() || !c->live || writable)
840b57cec5SDimitry Andric return false;
850b57cec5SDimitry Andric
86fe6060f1SDimitry Andric // Under regular (not safe) ICF, all code sections are eligible.
87bdd1243dSDimitry Andric if ((ctx.config.doICF == ICFLevel::All) &&
88fe6060f1SDimitry Andric c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
890b57cec5SDimitry Andric return true;
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric // .pdata and .xdata unwind info sections are eligible.
920b57cec5SDimitry Andric StringRef outSecName = c->getSectionName().split('$').first;
930b57cec5SDimitry Andric if (outSecName == ".pdata" || outSecName == ".xdata")
940b57cec5SDimitry Andric return true;
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric // So are vtables.
975f757f3fSDimitry Andric const char *itaniumVtablePrefix =
985f757f3fSDimitry Andric ctx.config.machine == I386 ? "__ZTV" : "_ZTV";
995f757f3fSDimitry Andric if (c->sym && (c->sym->getName().starts_with("??_7") ||
1005f757f3fSDimitry Andric c->sym->getName().starts_with(itaniumVtablePrefix)))
1010b57cec5SDimitry Andric return true;
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric // Anything else not in an address-significance table is eligible.
1040b57cec5SDimitry Andric return !c->keepUnique;
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andric // Split an equivalence class into smaller classes.
segregate(size_t begin,size_t end,bool constant)1080b57cec5SDimitry Andric void ICF::segregate(size_t begin, size_t end, bool constant) {
1090b57cec5SDimitry Andric while (begin < end) {
1100b57cec5SDimitry Andric // Divide [Begin, End) into two. Let Mid be the start index of the
1110b57cec5SDimitry Andric // second group.
1120b57cec5SDimitry Andric auto bound = std::stable_partition(
1130b57cec5SDimitry Andric chunks.begin() + begin + 1, chunks.begin() + end, [&](SectionChunk *s) {
1140b57cec5SDimitry Andric if (constant)
1150b57cec5SDimitry Andric return equalsConstant(chunks[begin], s);
1160b57cec5SDimitry Andric return equalsVariable(chunks[begin], s);
1170b57cec5SDimitry Andric });
1180b57cec5SDimitry Andric size_t mid = bound - chunks.begin();
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an
1210b57cec5SDimitry Andric // equivalence class ID because every group ends with a unique index.
1220b57cec5SDimitry Andric for (size_t i = begin; i < mid; ++i)
1230b57cec5SDimitry Andric chunks[i]->eqClass[(cnt + 1) % 2] = mid;
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric // If we created a group, we need to iterate the main loop again.
1260b57cec5SDimitry Andric if (mid != end)
1270b57cec5SDimitry Andric repeat = true;
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric begin = mid;
1300b57cec5SDimitry Andric }
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric
1330b57cec5SDimitry Andric // Returns true if two sections' associative children are equal.
assocEquals(const SectionChunk * a,const SectionChunk * b)1340b57cec5SDimitry Andric bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) {
1355ffd83dbSDimitry Andric // Ignore associated metadata sections that don't participate in ICF, such as
1365ffd83dbSDimitry Andric // debug info and CFGuard metadata.
1375ffd83dbSDimitry Andric auto considerForICF = [](const SectionChunk &assoc) {
1385ffd83dbSDimitry Andric StringRef Name = assoc.getSectionName();
13906c3fb27SDimitry Andric return !(Name.starts_with(".debug") || Name == ".gfids$y" ||
140e8d8bef9SDimitry Andric Name == ".giats$y" || Name == ".gljmp$y");
1410b57cec5SDimitry Andric };
1425ffd83dbSDimitry Andric auto ra = make_filter_range(a->children(), considerForICF);
1435ffd83dbSDimitry Andric auto rb = make_filter_range(b->children(), considerForICF);
1445ffd83dbSDimitry Andric return std::equal(ra.begin(), ra.end(), rb.begin(), rb.end(),
1455ffd83dbSDimitry Andric [&](const SectionChunk &ia, const SectionChunk &ib) {
1465ffd83dbSDimitry Andric return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2];
1475ffd83dbSDimitry Andric });
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric // Compare "non-moving" part of two sections, namely everything
1510b57cec5SDimitry Andric // except relocation targets.
equalsConstant(const SectionChunk * a,const SectionChunk * b)1520b57cec5SDimitry Andric bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
1530b57cec5SDimitry Andric if (a->relocsSize != b->relocsSize)
1540b57cec5SDimitry Andric return false;
1550b57cec5SDimitry Andric
1560b57cec5SDimitry Andric // Compare relocations.
1570b57cec5SDimitry Andric auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
1580b57cec5SDimitry Andric if (r1.Type != r2.Type ||
1590b57cec5SDimitry Andric r1.VirtualAddress != r2.VirtualAddress) {
1600b57cec5SDimitry Andric return false;
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
1630b57cec5SDimitry Andric Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
1640b57cec5SDimitry Andric if (b1 == b2)
1650b57cec5SDimitry Andric return true;
1660b57cec5SDimitry Andric if (auto *d1 = dyn_cast<DefinedRegular>(b1))
1670b57cec5SDimitry Andric if (auto *d2 = dyn_cast<DefinedRegular>(b2))
1680b57cec5SDimitry Andric return d1->getValue() == d2->getValue() &&
1690b57cec5SDimitry Andric d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
1700b57cec5SDimitry Andric return false;
1710b57cec5SDimitry Andric };
1720b57cec5SDimitry Andric if (!std::equal(a->getRelocs().begin(), a->getRelocs().end(),
1730b57cec5SDimitry Andric b->getRelocs().begin(), eq))
1740b57cec5SDimitry Andric return false;
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric // Compare section attributes and contents.
1770b57cec5SDimitry Andric return a->getOutputCharacteristics() == b->getOutputCharacteristics() &&
1780b57cec5SDimitry Andric a->getSectionName() == b->getSectionName() &&
1790b57cec5SDimitry Andric a->header->SizeOfRawData == b->header->SizeOfRawData &&
1800b57cec5SDimitry Andric a->checksum == b->checksum && a->getContents() == b->getContents() &&
181*0fca6ea1SDimitry Andric a->getMachine() == b->getMachine() && assocEquals(a, b);
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric
1840b57cec5SDimitry Andric // Compare "moving" part of two sections, namely relocation targets.
equalsVariable(const SectionChunk * a,const SectionChunk * b)1850b57cec5SDimitry Andric bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
1860b57cec5SDimitry Andric // Compare relocations.
187*0fca6ea1SDimitry Andric auto eqSym = [&](Symbol *b1, Symbol *b2) {
1880b57cec5SDimitry Andric if (b1 == b2)
1890b57cec5SDimitry Andric return true;
1900b57cec5SDimitry Andric if (auto *d1 = dyn_cast<DefinedRegular>(b1))
1910b57cec5SDimitry Andric if (auto *d2 = dyn_cast<DefinedRegular>(b2))
1920b57cec5SDimitry Andric return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
1930b57cec5SDimitry Andric return false;
1940b57cec5SDimitry Andric };
195*0fca6ea1SDimitry Andric auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
196*0fca6ea1SDimitry Andric Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
197*0fca6ea1SDimitry Andric Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
198*0fca6ea1SDimitry Andric return eqSym(b1, b2);
199*0fca6ea1SDimitry Andric };
200*0fca6ea1SDimitry Andric
201*0fca6ea1SDimitry Andric Symbol *e1 = a->getEntryThunk();
202*0fca6ea1SDimitry Andric Symbol *e2 = b->getEntryThunk();
203*0fca6ea1SDimitry Andric if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2)))
204*0fca6ea1SDimitry Andric return false;
205*0fca6ea1SDimitry Andric
2060b57cec5SDimitry Andric return std::equal(a->getRelocs().begin(), a->getRelocs().end(),
2070b57cec5SDimitry Andric b->getRelocs().begin(), eq) &&
2080b57cec5SDimitry Andric assocEquals(a, b);
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric // Find the first Chunk after Begin that has a different class from Begin.
findBoundary(size_t begin,size_t end)2120b57cec5SDimitry Andric size_t ICF::findBoundary(size_t begin, size_t end) {
2130b57cec5SDimitry Andric for (size_t i = begin + 1; i < end; ++i)
2140b57cec5SDimitry Andric if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2])
2150b57cec5SDimitry Andric return i;
2160b57cec5SDimitry Andric return end;
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric
forEachClassRange(size_t begin,size_t end,std::function<void (size_t,size_t)> fn)2190b57cec5SDimitry Andric void ICF::forEachClassRange(size_t begin, size_t end,
2200b57cec5SDimitry Andric std::function<void(size_t, size_t)> fn) {
2210b57cec5SDimitry Andric while (begin < end) {
2220b57cec5SDimitry Andric size_t mid = findBoundary(begin, end);
2230b57cec5SDimitry Andric fn(begin, mid);
2240b57cec5SDimitry Andric begin = mid;
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric // Call Fn on each class group.
forEachClass(std::function<void (size_t,size_t)> fn)2290b57cec5SDimitry Andric void ICF::forEachClass(std::function<void(size_t, size_t)> fn) {
2300b57cec5SDimitry Andric // If the number of sections are too small to use threading,
2310b57cec5SDimitry Andric // call Fn sequentially.
2320b57cec5SDimitry Andric if (chunks.size() < 1024) {
2330b57cec5SDimitry Andric forEachClassRange(0, chunks.size(), fn);
2340b57cec5SDimitry Andric ++cnt;
2350b57cec5SDimitry Andric return;
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric // Shard into non-overlapping intervals, and call Fn in parallel.
2390b57cec5SDimitry Andric // The sharding must be completed before any calls to Fn are made
2400b57cec5SDimitry Andric // so that Fn can modify the Chunks in its shard without causing data
2410b57cec5SDimitry Andric // races.
2420b57cec5SDimitry Andric const size_t numShards = 256;
2430b57cec5SDimitry Andric size_t step = chunks.size() / numShards;
2440b57cec5SDimitry Andric size_t boundaries[numShards + 1];
2450b57cec5SDimitry Andric boundaries[0] = 0;
2460b57cec5SDimitry Andric boundaries[numShards] = chunks.size();
24781ad6265SDimitry Andric parallelFor(1, numShards, [&](size_t i) {
2480b57cec5SDimitry Andric boundaries[i] = findBoundary((i - 1) * step, chunks.size());
2490b57cec5SDimitry Andric });
25081ad6265SDimitry Andric parallelFor(1, numShards + 1, [&](size_t i) {
2510b57cec5SDimitry Andric if (boundaries[i - 1] < boundaries[i]) {
2520b57cec5SDimitry Andric forEachClassRange(boundaries[i - 1], boundaries[i], fn);
2530b57cec5SDimitry Andric }
2540b57cec5SDimitry Andric });
2550b57cec5SDimitry Andric ++cnt;
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric
2580b57cec5SDimitry Andric // Merge identical COMDAT sections.
2590b57cec5SDimitry Andric // Two sections are considered the same if their section headers,
2600b57cec5SDimitry Andric // contents and relocations are all the same.
run()261349cc55cSDimitry Andric void ICF::run() {
2625f757f3fSDimitry Andric llvm::TimeTraceScope timeScope("ICF");
263349cc55cSDimitry Andric ScopedTimer t(ctx.icfTimer);
2640b57cec5SDimitry Andric
2650b57cec5SDimitry Andric // Collect only mergeable sections and group by hash value.
2660b57cec5SDimitry Andric uint32_t nextId = 1;
267349cc55cSDimitry Andric for (Chunk *c : ctx.symtab.getChunks()) {
2680b57cec5SDimitry Andric if (auto *sc = dyn_cast<SectionChunk>(c)) {
2690b57cec5SDimitry Andric if (isEligible(sc))
2700b57cec5SDimitry Andric chunks.push_back(sc);
2710b57cec5SDimitry Andric else
2720b57cec5SDimitry Andric sc->eqClass[0] = nextId++;
2730b57cec5SDimitry Andric }
2740b57cec5SDimitry Andric }
2750b57cec5SDimitry Andric
2760b57cec5SDimitry Andric // Make sure that ICF doesn't merge sections that are being handled by string
2770b57cec5SDimitry Andric // tail merging.
278349cc55cSDimitry Andric for (MergeChunk *mc : ctx.mergeChunkInstances)
2790b57cec5SDimitry Andric if (mc)
2800b57cec5SDimitry Andric for (SectionChunk *sc : mc->sections)
2810b57cec5SDimitry Andric sc->eqClass[0] = nextId++;
2820b57cec5SDimitry Andric
2830b57cec5SDimitry Andric // Initially, we use hash values to partition sections.
2840b57cec5SDimitry Andric parallelForEach(chunks, [&](SectionChunk *sc) {
28506c3fb27SDimitry Andric sc->eqClass[0] = xxh3_64bits(sc->getContents());
2860b57cec5SDimitry Andric });
2870b57cec5SDimitry Andric
2880b57cec5SDimitry Andric // Combine the hashes of the sections referenced by each section into its
2890b57cec5SDimitry Andric // hash.
2900b57cec5SDimitry Andric for (unsigned cnt = 0; cnt != 2; ++cnt) {
2910b57cec5SDimitry Andric parallelForEach(chunks, [&](SectionChunk *sc) {
2920b57cec5SDimitry Andric uint32_t hash = sc->eqClass[cnt % 2];
2930b57cec5SDimitry Andric for (Symbol *b : sc->symbols())
2940b57cec5SDimitry Andric if (auto *sym = dyn_cast_or_null<DefinedRegular>(b))
2950b57cec5SDimitry Andric hash += sym->getChunk()->eqClass[cnt % 2];
29685868e8aSDimitry Andric // Set MSB to 1 to avoid collisions with non-hash classes.
2970b57cec5SDimitry Andric sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31);
2980b57cec5SDimitry Andric });
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric
3010b57cec5SDimitry Andric // From now on, sections in Chunks are ordered so that sections in
3020b57cec5SDimitry Andric // the same group are consecutive in the vector.
3030b57cec5SDimitry Andric llvm::stable_sort(chunks, [](const SectionChunk *a, const SectionChunk *b) {
3040b57cec5SDimitry Andric return a->eqClass[0] < b->eqClass[0];
3050b57cec5SDimitry Andric });
3060b57cec5SDimitry Andric
3070b57cec5SDimitry Andric // Compare static contents and assign unique IDs for each static content.
3080b57cec5SDimitry Andric forEachClass([&](size_t begin, size_t end) { segregate(begin, end, true); });
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric // Split groups by comparing relocations until convergence is obtained.
3110b57cec5SDimitry Andric do {
3120b57cec5SDimitry Andric repeat = false;
3130b57cec5SDimitry Andric forEachClass(
3140b57cec5SDimitry Andric [&](size_t begin, size_t end) { segregate(begin, end, false); });
3150b57cec5SDimitry Andric } while (repeat);
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric log("ICF needed " + Twine(cnt) + " iterations");
3180b57cec5SDimitry Andric
31985868e8aSDimitry Andric // Merge sections in the same classes.
3200b57cec5SDimitry Andric forEachClass([&](size_t begin, size_t end) {
3210b57cec5SDimitry Andric if (end - begin == 1)
3220b57cec5SDimitry Andric return;
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric log("Selected " + chunks[begin]->getDebugName());
3250b57cec5SDimitry Andric for (size_t i = begin + 1; i < end; ++i) {
3260b57cec5SDimitry Andric log(" Removed " + chunks[i]->getDebugName());
3270b57cec5SDimitry Andric chunks[begin]->replace(chunks[i]);
3280b57cec5SDimitry Andric }
3290b57cec5SDimitry Andric });
3300b57cec5SDimitry Andric }
3310b57cec5SDimitry Andric
3320b57cec5SDimitry Andric // Entry point to ICF.
doICF(COFFLinkerContext & ctx)333bdd1243dSDimitry Andric void doICF(COFFLinkerContext &ctx) { ICF(ctx).run(); }
3340b57cec5SDimitry Andric
335bdd1243dSDimitry Andric } // namespace lld::coff
336