xref: /freebsd/contrib/llvm-project/lld/MachO/ICF.cpp (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 //===- ICF.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ICF.h"
10 #include "ConcatOutputSection.h"
11 #include "InputSection.h"
12 #include "Symbols.h"
13 #include "UnwindInfoSection.h"
14 
15 #include "llvm/Support/Parallel.h"
16 #include "llvm/Support/TimeProfiler.h"
17 
18 #include <atomic>
19 
20 using namespace llvm;
21 using namespace lld;
22 using namespace lld::macho;
23 
24 class ICF {
25 public:
26   ICF(std::vector<ConcatInputSection *> &inputs);
27 
28   void run();
29   void segregate(size_t begin, size_t end,
30                  std::function<bool(const ConcatInputSection *,
31                                     const ConcatInputSection *)>
32                      equals);
33   size_t findBoundary(size_t begin, size_t end);
34   void forEachClassRange(size_t begin, size_t end,
35                          std::function<void(size_t, size_t)> func);
36   void forEachClass(std::function<void(size_t, size_t)> func);
37 
38   // ICF needs a copy of the inputs vector because its equivalence-class
39   // segregation algorithm destroys the proper sequence.
40   std::vector<ConcatInputSection *> icfInputs;
41 };
42 
43 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
44   icfInputs.assign(inputs.begin(), inputs.end());
45 }
46 
47 // ICF = Identical Code Folding
48 //
49 // We only fold __TEXT,__text, so this is really "code" folding, and not
50 // "COMDAT" folding. String and scalar constant literals are deduplicated
51 // elsewhere.
52 //
53 // Summary of segments & sections:
54 //
55 // The __TEXT segment is readonly at the MMU. Some sections are already
56 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
57 // synthetic and inherently free of duplicates (__TEXT,__stubs &
58 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
59 // because doing so induces many test failures.
60 //
61 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
62 // thus ineligible for ICF.
63 //
64 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
65 // the application after dyld applies fixups to pointer data. We currently
66 // fold only the __DATA_CONST,__cfstring section.
67 //
68 // The __DATA segment is read/write at the MMU, and as application-writeable
69 // data, none of its sections are eligible for ICF.
70 //
71 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
72 // of the segregation algorithm.
73 //
74 // FIXME(gkm): implement keep-unique attributes
75 // FIXME(gkm): implement address-significance tables for MachO object files
76 
77 static unsigned icfPass = 0;
78 static std::atomic<bool> icfRepeat{false};
79 
80 // Compare "non-moving" parts of two ConcatInputSections, namely everything
81 // except references to other ConcatInputSections.
82 static bool equalsConstant(const ConcatInputSection *ia,
83                            const ConcatInputSection *ib) {
84   // We can only fold within the same OutputSection.
85   if (ia->parent != ib->parent)
86     return false;
87   if (ia->data.size() != ib->data.size())
88     return false;
89   if (ia->data != ib->data)
90     return false;
91   if (ia->relocs.size() != ib->relocs.size())
92     return false;
93   auto f = [](const Reloc &ra, const Reloc &rb) {
94     if (ra.type != rb.type)
95       return false;
96     if (ra.pcrel != rb.pcrel)
97       return false;
98     if (ra.length != rb.length)
99       return false;
100     if (ra.offset != rb.offset)
101       return false;
102     if (ra.addend != rb.addend)
103       return false;
104     if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
105       return false;
106 
107     InputSection *isecA, *isecB;
108     if (ra.referent.is<Symbol *>()) {
109       const auto *sa = ra.referent.get<Symbol *>();
110       const auto *sb = rb.referent.get<Symbol *>();
111       if (sa->kind() != sb->kind())
112         return false;
113       if (isa<Defined>(sa)) {
114         const auto *da = cast<Defined>(sa);
115         const auto *db = cast<Defined>(sb);
116         if (da->isec && db->isec) {
117           isecA = da->isec;
118           isecB = db->isec;
119         } else {
120           assert(da->isAbsolute() && db->isAbsolute());
121           return da->value == db->value;
122         }
123       } else {
124         assert(isa<DylibSymbol>(sa));
125         return sa == sb;
126       }
127     } else {
128       isecA = ra.referent.get<InputSection *>();
129       isecB = rb.referent.get<InputSection *>();
130     }
131 
132     if (isecA->parent != isecB->parent)
133       return false;
134     // Sections with identical parents should be of the same kind.
135     assert(isecA->kind() == isecB->kind());
136     // We will compare ConcatInputSection contents in equalsVariable.
137     if (isa<ConcatInputSection>(isecA))
138       return true;
139     // Else we have two literal sections. References to them are equal iff their
140     // offsets in the output section are equal.
141     return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
142   };
143   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
144                     f);
145 }
146 
147 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
148 // handled by equalsConstant().
149 static bool equalsVariable(const ConcatInputSection *ia,
150                            const ConcatInputSection *ib) {
151   assert(ia->relocs.size() == ib->relocs.size());
152   auto f = [](const Reloc &ra, const Reloc &rb) {
153     // We already filtered out mismatching values/addends in equalsConstant.
154     if (ra.referent == rb.referent)
155       return true;
156     const ConcatInputSection *isecA, *isecB;
157     if (ra.referent.is<Symbol *>()) {
158       // Matching DylibSymbols are already filtered out by the
159       // identical-referent check above. Non-matching DylibSymbols were filtered
160       // out in equalsConstant(). So we can safely cast to Defined here.
161       const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
162       const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
163       if (da->isAbsolute())
164         return true;
165       isecA = dyn_cast<ConcatInputSection>(da->isec);
166       if (!isecA)
167         return true; // literal sections were checked in equalsConstant.
168       isecB = cast<ConcatInputSection>(db->isec);
169     } else {
170       const auto *sa = ra.referent.get<InputSection *>();
171       const auto *sb = rb.referent.get<InputSection *>();
172       isecA = dyn_cast<ConcatInputSection>(sa);
173       if (!isecA)
174         return true;
175       isecB = cast<ConcatInputSection>(sb);
176     }
177     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
178   };
179   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
180                     f);
181 }
182 
183 // Find the first InputSection after BEGIN whose equivalence class differs
184 size_t ICF::findBoundary(size_t begin, size_t end) {
185   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
186   for (size_t i = begin + 1; i < end; ++i)
187     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
188       return i;
189   return end;
190 }
191 
192 // Invoke FUNC on subranges with matching equivalence class
193 void ICF::forEachClassRange(size_t begin, size_t end,
194                             std::function<void(size_t, size_t)> func) {
195   while (begin < end) {
196     size_t mid = findBoundary(begin, end);
197     func(begin, mid);
198     begin = mid;
199   }
200 }
201 
202 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
203 // with matching equivalence class
204 void ICF::forEachClass(std::function<void(size_t, size_t)> func) {
205   // Only use threads when the benefits outweigh the overhead.
206   const size_t threadingThreshold = 1024;
207   if (icfInputs.size() < threadingThreshold) {
208     forEachClassRange(0, icfInputs.size(), func);
209     ++icfPass;
210     return;
211   }
212 
213   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
214   // sharding must be completed before any calls to FUNC are made so that FUNC
215   // can modify the InputSection in its shard without causing data races.
216   const size_t shards = 256;
217   size_t step = icfInputs.size() / shards;
218   size_t boundaries[shards + 1];
219   boundaries[0] = 0;
220   boundaries[shards] = icfInputs.size();
221   parallelForEachN(1, shards, [&](size_t i) {
222     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
223   });
224   parallelForEachN(1, shards + 1, [&](size_t i) {
225     if (boundaries[i - 1] < boundaries[i]) {
226       forEachClassRange(boundaries[i - 1], boundaries[i], func);
227     }
228   });
229   ++icfPass;
230 }
231 
232 void ICF::run() {
233   // Into each origin-section hash, combine all reloc referent section hashes.
234   for (icfPass = 0; icfPass < 2; ++icfPass) {
235     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
236       uint64_t hash = isec->icfEqClass[icfPass % 2];
237       for (const Reloc &r : isec->relocs) {
238         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
239           if (auto *dylibSym = dyn_cast<DylibSymbol>(sym))
240             hash += dylibSym->stubsHelperIndex;
241           else if (auto *defined = dyn_cast<Defined>(sym)) {
242             if (defined->isec) {
243               if (auto isec = dyn_cast<ConcatInputSection>(defined->isec))
244                 hash += defined->value + isec->icfEqClass[icfPass % 2];
245               else
246                 hash += defined->isec->kind() +
247                         defined->isec->getOffset(defined->value);
248             } else {
249               hash += defined->value;
250             }
251           } else
252             llvm_unreachable("foldIdenticalSections symbol kind");
253         }
254       }
255       // Set MSB to 1 to avoid collisions with non-hashed classes.
256       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 63);
257     });
258   }
259 
260   llvm::stable_sort(
261       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
262         return a->icfEqClass[0] < b->icfEqClass[0];
263       });
264   forEachClass(
265       [&](size_t begin, size_t end) { segregate(begin, end, equalsConstant); });
266 
267   // Split equivalence groups by comparing relocations until convergence
268   do {
269     icfRepeat = false;
270     forEachClass([&](size_t begin, size_t end) {
271       segregate(begin, end, equalsVariable);
272     });
273   } while (icfRepeat);
274   log("ICF needed " + Twine(icfPass) + " iterations");
275 
276   // Fold sections within equivalence classes
277   forEachClass([&](size_t begin, size_t end) {
278     if (end - begin < 2)
279       return;
280     ConcatInputSection *beginIsec = icfInputs[begin];
281     for (size_t i = begin + 1; i < end; ++i)
282       beginIsec->foldIdentical(icfInputs[i]);
283   });
284 }
285 
286 // Split an equivalence class into smaller classes.
287 void ICF::segregate(
288     size_t begin, size_t end,
289     std::function<bool(const ConcatInputSection *, const ConcatInputSection *)>
290         equals) {
291   while (begin < end) {
292     // Divide [begin, end) into two. Let mid be the start index of the
293     // second group.
294     auto bound = std::stable_partition(icfInputs.begin() + begin + 1,
295                                        icfInputs.begin() + end,
296                                        [&](ConcatInputSection *isec) {
297                                          return equals(icfInputs[begin], isec);
298                                        });
299     size_t mid = bound - icfInputs.begin();
300 
301     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
302     // equivalence class ID because every group ends with a unique index.
303     for (size_t i = begin; i < mid; ++i)
304       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
305 
306     // If we created a group, we need to iterate the main loop again.
307     if (mid != end)
308       icfRepeat = true;
309 
310     begin = mid;
311   }
312 }
313 
314 template <class Ptr>
315 DenseSet<const InputSection *> findFunctionsWithUnwindInfo() {
316   DenseSet<const InputSection *> result;
317   for (ConcatInputSection *isec : in.unwindInfo->getInputs()) {
318     for (size_t i = 0; i < isec->relocs.size(); ++i) {
319       Reloc &r = isec->relocs[i];
320       assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));
321       if (r.offset % sizeof(CompactUnwindEntry<Ptr>) !=
322           offsetof(CompactUnwindEntry<Ptr>, functionAddress))
323         continue;
324       result.insert(r.referent.get<InputSection *>());
325     }
326   }
327   return result;
328 }
329 
330 void macho::foldIdenticalSections() {
331   TimeTraceScope timeScope("Fold Identical Code Sections");
332   // The ICF equivalence-class segregation algorithm relies on pre-computed
333   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
334   // sections referenced by their relocs. We could recursively traverse the
335   // relocs to find every referenced InputSection, but that precludes easy
336   // parallelization. Therefore, we hash every InputSection here where we have
337   // them all accessible as simple vectors.
338 
339   // ICF can't fold functions with unwind info
340   DenseSet<const InputSection *> functionsWithUnwindInfo =
341       target->wordSize == 8 ? findFunctionsWithUnwindInfo<uint64_t>()
342                             : findFunctionsWithUnwindInfo<uint32_t>();
343 
344   // If an InputSection is ineligible for ICF, we give it a unique ID to force
345   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
346   // space at inputSections.size(), so that it will never intersect with
347   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
348   // coexist with equivalence-class IDs, this is not necessary, but might help
349   // someone keep the numbers straight in case we ever need to debug the
350   // ICF::segregate()
351   std::vector<ConcatInputSection *> hashable;
352   uint64_t icfUniqueID = inputSections.size();
353   for (ConcatInputSection *isec : inputSections) {
354     // FIXME: consider non-code __text sections as hashable?
355     bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
356                       !isec->shouldOmitFromOutput() &&
357                       !functionsWithUnwindInfo.contains(isec) &&
358                       isec->isHashableForICF();
359     if (isHashable)
360       hashable.push_back(isec);
361     else
362       isec->icfEqClass[0] = ++icfUniqueID;
363   }
364   parallelForEach(hashable,
365                   [](ConcatInputSection *isec) { isec->hashForICF(); });
366   // Now that every input section is either hashed or marked as unique, run the
367   // segregation algorithm to detect foldable subsections.
368   ICF(hashable).run();
369 }
370