xref: /freebsd/contrib/llvm-project/lld/MachO/ICF.cpp (revision 2c2ec6bbc9cc7762a250ffe903bda6c2e44d25ff)
1 //===- ICF.cpp ------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ICF.h"
10 #include "ConcatOutputSection.h"
11 #include "Config.h"
12 #include "InputSection.h"
13 #include "SymbolTable.h"
14 #include "Symbols.h"
15 
16 #include "lld/Common/CommonLinkerContext.h"
17 #include "llvm/Support/Parallel.h"
18 #include "llvm/Support/TimeProfiler.h"
19 #include "llvm/Support/xxhash.h"
20 
21 #include <atomic>
22 
23 using namespace llvm;
24 using namespace lld;
25 using namespace lld::macho;
26 
27 static constexpr bool verboseDiagnostics = false;
28 // This counter is used to generate unique thunk names.
29 static uint64_t icfThunkCounter = 0;
30 
31 class ICF {
32 public:
33   ICF(std::vector<ConcatInputSection *> &inputs);
34   void run();
35 
36   using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
37                                  const ConcatInputSection *);
38   void segregate(size_t begin, size_t end, EqualsFn);
39   size_t findBoundary(size_t begin, size_t end);
40   void forEachClassRange(size_t begin, size_t end,
41                          llvm::function_ref<void(size_t, size_t)> func);
42   void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
43 
44   bool equalsConstant(const ConcatInputSection *ia,
45                       const ConcatInputSection *ib);
46   bool equalsVariable(const ConcatInputSection *ia,
47                       const ConcatInputSection *ib);
48   void applySafeThunksToRange(size_t begin, size_t end);
49 
50   // ICF needs a copy of the inputs vector because its equivalence-class
51   // segregation algorithm destroys the proper sequence.
52   std::vector<ConcatInputSection *> icfInputs;
53 
54   unsigned icfPass = 0;
55   std::atomic<bool> icfRepeat{false};
56   std::atomic<uint64_t> equalsConstantCount{0};
57   std::atomic<uint64_t> equalsVariableCount{0};
58 };
59 
60 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
61   icfInputs.assign(inputs.begin(), inputs.end());
62 }
63 
64 // ICF = Identical Code Folding
65 //
66 // We only fold __TEXT,__text, so this is really "code" folding, and not
67 // "COMDAT" folding. String and scalar constant literals are deduplicated
68 // elsewhere.
69 //
70 // Summary of segments & sections:
71 //
72 // The __TEXT segment is readonly at the MMU. Some sections are already
73 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
74 // synthetic and inherently free of duplicates (__TEXT,__stubs &
75 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
76 // because doing so induces many test failures.
77 //
78 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
79 // thus ineligible for ICF.
80 //
81 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
82 // the application after dyld applies fixups to pointer data. We currently
83 // fold only the __DATA_CONST,__cfstring section.
84 //
85 // The __DATA segment is read/write at the MMU, and as application-writeable
86 // data, none of its sections are eligible for ICF.
87 //
88 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
89 // of the segregation algorithm.
90 //
91 // FIXME(gkm): implement keep-unique attributes
92 // FIXME(gkm): implement address-significance tables for MachO object files
93 
94 // Compare "non-moving" parts of two ConcatInputSections, namely everything
95 // except references to other ConcatInputSections.
96 bool ICF::equalsConstant(const ConcatInputSection *ia,
97                          const ConcatInputSection *ib) {
98   if (verboseDiagnostics)
99     ++equalsConstantCount;
100   // We can only fold within the same OutputSection.
101   if (ia->parent != ib->parent)
102     return false;
103   if (ia->data.size() != ib->data.size())
104     return false;
105   if (ia->data != ib->data)
106     return false;
107   if (ia->relocs.size() != ib->relocs.size())
108     return false;
109   auto f = [](const Reloc &ra, const Reloc &rb) {
110     if (ra.type != rb.type)
111       return false;
112     if (ra.pcrel != rb.pcrel)
113       return false;
114     if (ra.length != rb.length)
115       return false;
116     if (ra.offset != rb.offset)
117       return false;
118     if (isa<Symbol *>(ra.referent) != isa<Symbol *>(rb.referent))
119       return false;
120 
121     InputSection *isecA, *isecB;
122 
123     uint64_t valueA = 0;
124     uint64_t valueB = 0;
125     if (isa<Symbol *>(ra.referent)) {
126       const auto *sa = cast<Symbol *>(ra.referent);
127       const auto *sb = cast<Symbol *>(rb.referent);
128       if (sa->kind() != sb->kind())
129         return false;
130       // ICF runs before Undefineds are treated (and potentially converted into
131       // DylibSymbols).
132       if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
133         return sa == sb && ra.addend == rb.addend;
134       assert(isa<Defined>(sa));
135       const auto *da = cast<Defined>(sa);
136       const auto *db = cast<Defined>(sb);
137       if (!da->isec() || !db->isec()) {
138         assert(da->isAbsolute() && db->isAbsolute());
139         return da->value + ra.addend == db->value + rb.addend;
140       }
141       isecA = da->isec();
142       valueA = da->value;
143       isecB = db->isec();
144       valueB = db->value;
145     } else {
146       isecA = cast<InputSection *>(ra.referent);
147       isecB = cast<InputSection *>(rb.referent);
148     }
149 
150     // Typically, we should not encounter sections marked with `keepUnique` at
151     // this point as they would have resulted in different hashes and therefore
152     // no need for a full comparison.
153     // However, in `safe_thunks` mode, it's possible for two different
154     // relocations to reference identical `keepUnique` functions that will be
155     // distinguished later via thunks - so we need to handle this case
156     // explicitly.
157     if ((isecA != isecB) && ((isecA->keepUnique && isCodeSection(isecA)) ||
158                              (isecB->keepUnique && isCodeSection(isecB))))
159       return false;
160 
161     if (isecA->parent != isecB->parent)
162       return false;
163     // Sections with identical parents should be of the same kind.
164     assert(isecA->kind() == isecB->kind());
165     // We will compare ConcatInputSection contents in equalsVariable.
166     if (isa<ConcatInputSection>(isecA))
167       return ra.addend == rb.addend;
168     // Else we have two literal sections. References to them are equal iff their
169     // offsets in the output section are equal.
170     if (isa<Symbol *>(ra.referent))
171       // For symbol relocs, we compare the contents at the symbol address. We
172       // don't do `getOffset(value + addend)` because value + addend may not be
173       // a valid offset in the literal section.
174       return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
175              ra.addend == rb.addend;
176     else {
177       assert(valueA == 0 && valueB == 0);
178       // For section relocs, we compare the content at the section offset.
179       return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
180     }
181   };
182   return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
183                     f);
184 }
185 
186 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
187 // handled by equalsConstant().
188 bool ICF::equalsVariable(const ConcatInputSection *ia,
189                          const ConcatInputSection *ib) {
190   if (verboseDiagnostics)
191     ++equalsVariableCount;
192   assert(ia->relocs.size() == ib->relocs.size());
193   auto f = [this](const Reloc &ra, const Reloc &rb) {
194     // We already filtered out mismatching values/addends in equalsConstant.
195     if (ra.referent == rb.referent)
196       return true;
197     const ConcatInputSection *isecA, *isecB;
198     if (isa<Symbol *>(ra.referent)) {
199       // Matching DylibSymbols are already filtered out by the
200       // identical-referent check above. Non-matching DylibSymbols were filtered
201       // out in equalsConstant(). So we can safely cast to Defined here.
202       const auto *da = cast<Defined>(cast<Symbol *>(ra.referent));
203       const auto *db = cast<Defined>(cast<Symbol *>(rb.referent));
204       if (da->isAbsolute())
205         return true;
206       isecA = dyn_cast<ConcatInputSection>(da->isec());
207       if (!isecA)
208         return true; // literal sections were checked in equalsConstant.
209       isecB = cast<ConcatInputSection>(db->isec());
210     } else {
211       const auto *sa = cast<InputSection *>(ra.referent);
212       const auto *sb = cast<InputSection *>(rb.referent);
213       isecA = dyn_cast<ConcatInputSection>(sa);
214       if (!isecA)
215         return true;
216       isecB = cast<ConcatInputSection>(sb);
217     }
218     return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
219   };
220   if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
221     return false;
222 
223   // If there are symbols with associated unwind info, check that the unwind
224   // info matches. For simplicity, we only handle the case where there are only
225   // symbols at offset zero within the section (which is typically the case with
226   // .subsections_via_symbols.)
227   auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
228   const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
229   const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
230   if (itA == ia->symbols.end())
231     return itB == ib->symbols.end();
232   if (itB == ib->symbols.end())
233     return false;
234   const Defined *da = *itA;
235   const Defined *db = *itB;
236   if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
237           db->unwindEntry()->icfEqClass[icfPass % 2] ||
238       da->value != 0 || db->value != 0)
239     return false;
240   auto isZero = [](Defined *d) { return d->value == 0; };
241   return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
242              ia->symbols.end() &&
243          std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
244              ib->symbols.end();
245 }
246 
247 // Find the first InputSection after BEGIN whose equivalence class differs
248 size_t ICF::findBoundary(size_t begin, size_t end) {
249   uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
250   for (size_t i = begin + 1; i < end; ++i)
251     if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
252       return i;
253   return end;
254 }
255 
256 // Invoke FUNC on subranges with matching equivalence class
257 void ICF::forEachClassRange(size_t begin, size_t end,
258                             llvm::function_ref<void(size_t, size_t)> func) {
259   while (begin < end) {
260     size_t mid = findBoundary(begin, end);
261     func(begin, mid);
262     begin = mid;
263   }
264 }
265 
266 // Find or create a symbol at offset 0 in the given section
267 static Symbol *getThunkTargetSymbol(ConcatInputSection *isec) {
268   for (Symbol *sym : isec->symbols)
269     if (auto *d = dyn_cast<Defined>(sym))
270       if (d->value == 0)
271         return sym;
272 
273   std::string thunkName;
274   if (isec->symbols.size() == 0)
275     thunkName = isec->getName().str() + ".icf.0";
276   else
277     thunkName = isec->getName().str() + "icf.thunk.target" +
278                 std::to_string(icfThunkCounter++);
279 
280   // If no symbol found at offset 0, create one
281   auto *sym = make<Defined>(thunkName, /*file=*/nullptr, isec,
282                             /*value=*/0, /*size=*/isec->getSize(),
283                             /*isWeakDef=*/false, /*isExternal=*/false,
284                             /*isPrivateExtern=*/false, /*isThumb=*/false,
285                             /*isReferencedDynamically=*/false,
286                             /*noDeadStrip=*/false);
287   isec->symbols.push_back(sym);
288   return sym;
289 }
290 
291 // Given a range of identical icfInputs, replace address significant functions
292 // with a thunk that is just a direct branch to the first function in the
293 // series. This way we keep only one main body of the function but we still
294 // retain the address uniqueness of relevant functions by having them be a
295 // direct branch thunk rather than containing a full copy of the actual function
296 // body.
297 void ICF::applySafeThunksToRange(size_t begin, size_t end) {
298   // When creating a unique ICF thunk, use the first section as the section that
299   // all thunks will branch to.
300   ConcatInputSection *masterIsec = icfInputs[begin];
301 
302   // If the first section is not address significant, sorting guarantees that
303   // there are no address significant functions. So we can skip this range.
304   if (!masterIsec->keepUnique)
305     return;
306 
307   // Skip anything that is not a code section.
308   if (!isCodeSection(masterIsec))
309     return;
310 
311   // If the functions we're dealing with are smaller than the thunk size, then
312   // just leave them all as-is - creating thunks would be a net loss.
313   uint32_t thunkSize = target->getICFSafeThunkSize();
314   if (masterIsec->data.size() <= thunkSize)
315     return;
316 
317   // Get the symbol that all thunks will branch to.
318   Symbol *masterSym = getThunkTargetSymbol(masterIsec);
319 
320   for (size_t i = begin + 1; i < end; ++i) {
321     ConcatInputSection *isec = icfInputs[i];
322     // When we're done processing keepUnique entries, we can stop. Sorting
323     // guaratees that all keepUnique will be at the front.
324     if (!isec->keepUnique)
325       break;
326 
327     ConcatInputSection *thunk =
328         makeSyntheticInputSection(isec->getSegName(), isec->getName());
329     addInputSection(thunk);
330 
331     target->initICFSafeThunkBody(thunk, masterSym);
332     thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
333 
334     // Since we're folding the target function into a thunk, we need to adjust
335     // the symbols that now got relocated from the target function to the thunk.
336     // Since the thunk is only one branch, we move all symbols to offset 0 and
337     // make sure that the size of all non-zero-size symbols is equal to the size
338     // of the branch.
339     for (auto *sym : thunk->symbols) {
340       sym->value = 0;
341       if (sym->size != 0)
342         sym->size = thunkSize;
343     }
344   }
345 }
346 
347 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
348 // with matching equivalence class
349 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
350   // Only use threads when the benefits outweigh the overhead.
351   const size_t threadingThreshold = 1024;
352   if (icfInputs.size() < threadingThreshold) {
353     forEachClassRange(0, icfInputs.size(), func);
354     ++icfPass;
355     return;
356   }
357 
358   // Shard into non-overlapping intervals, and call FUNC in parallel.  The
359   // sharding must be completed before any calls to FUNC are made so that FUNC
360   // can modify the InputSection in its shard without causing data races.
361   const size_t shards = 256;
362   size_t step = icfInputs.size() / shards;
363   size_t boundaries[shards + 1];
364   boundaries[0] = 0;
365   boundaries[shards] = icfInputs.size();
366   parallelFor(1, shards, [&](size_t i) {
367     boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
368   });
369   parallelFor(1, shards + 1, [&](size_t i) {
370     if (boundaries[i - 1] < boundaries[i]) {
371       forEachClassRange(boundaries[i - 1], boundaries[i], func);
372     }
373   });
374   ++icfPass;
375 }
376 
377 void ICF::run() {
378   // Into each origin-section hash, combine all reloc referent section hashes.
379   for (icfPass = 0; icfPass < 2; ++icfPass) {
380     parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
381       uint32_t hash = isec->icfEqClass[icfPass % 2];
382       for (const Reloc &r : isec->relocs) {
383         if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
384           if (auto *defined = dyn_cast<Defined>(sym)) {
385             if (defined->isec()) {
386               if (auto *referentIsec =
387                       dyn_cast<ConcatInputSection>(defined->isec()))
388                 hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
389               else
390                 hash += defined->isec()->kind() +
391                         defined->isec()->getOffset(defined->value);
392             } else {
393               hash += defined->value;
394             }
395           } else {
396             // ICF runs before Undefined diags
397             assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
398           }
399         }
400       }
401       // Set MSB to 1 to avoid collisions with non-hashed classes.
402       isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
403     });
404   }
405 
406   llvm::stable_sort(
407       icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
408         // When using safe_thunks, ensure that we first sort by icfEqClass and
409         // then by keepUnique (descending). This guarantees that within an
410         // equivalence class, the keepUnique inputs are always first.
411         if (config->icfLevel == ICFLevel::safe_thunks)
412           if (a->icfEqClass[0] == b->icfEqClass[0])
413             return a->keepUnique > b->keepUnique;
414         return a->icfEqClass[0] < b->icfEqClass[0];
415       });
416   forEachClass([&](size_t begin, size_t end) {
417     segregate(begin, end, &ICF::equalsConstant);
418   });
419 
420   // Split equivalence groups by comparing relocations until convergence
421   do {
422     icfRepeat = false;
423     forEachClass([&](size_t begin, size_t end) {
424       segregate(begin, end, &ICF::equalsVariable);
425     });
426   } while (icfRepeat);
427   log("ICF needed " + Twine(icfPass) + " iterations");
428   if (verboseDiagnostics) {
429     log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
430     log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
431   }
432 
433   // When using safe_thunks, we need to create thunks for all keepUnique
434   // functions that can be deduplicated. Since we're creating / adding new
435   // InputSections, we can't paralellize this.
436   if (config->icfLevel == ICFLevel::safe_thunks)
437     forEachClassRange(0, icfInputs.size(), [&](size_t begin, size_t end) {
438       applySafeThunksToRange(begin, end);
439     });
440 
441   // Fold sections within equivalence classes
442   forEachClass([&](size_t begin, size_t end) {
443     if (end - begin < 2)
444       return;
445     bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
446 
447     // For ICF level safe_thunks, replace keepUnique function bodies with
448     // thunks. For all other ICF levles, directly merge the functions.
449 
450     ConcatInputSection *beginIsec = icfInputs[begin];
451     for (size_t i = begin + 1; i < end; ++i) {
452       // Skip keepUnique inputs when using safe_thunks (already handeled above)
453       if (useSafeThunks && icfInputs[i]->keepUnique) {
454         // Assert keepUnique sections are either small or replaced with thunks.
455         assert(!icfInputs[i]->live ||
456                icfInputs[i]->data.size() <= target->getICFSafeThunkSize());
457         assert(!icfInputs[i]->replacement ||
458                icfInputs[i]->replacement->data.size() ==
459                    target->getICFSafeThunkSize());
460         continue;
461       }
462       beginIsec->foldIdentical(icfInputs[i]);
463     }
464   });
465 }
466 
467 // Split an equivalence class into smaller classes.
468 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
469   while (begin < end) {
470     // Divide [begin, end) into two. Let mid be the start index of the
471     // second group.
472     auto bound = std::stable_partition(
473         icfInputs.begin() + begin + 1, icfInputs.begin() + end,
474         [&](ConcatInputSection *isec) {
475           return (this->*equals)(icfInputs[begin], isec);
476         });
477     size_t mid = bound - icfInputs.begin();
478 
479     // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
480     // equivalence class ID because every group ends with a unique index.
481     for (size_t i = begin; i < mid; ++i)
482       icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
483 
484     // If we created a group, we need to iterate the main loop again.
485     if (mid != end)
486       icfRepeat = true;
487 
488     begin = mid;
489   }
490 }
491 
492 void macho::markSymAsAddrSig(Symbol *s) {
493   if (auto *d = dyn_cast_or_null<Defined>(s))
494     if (d->isec())
495       d->isec()->keepUnique = true;
496 }
497 
498 void macho::markAddrSigSymbols() {
499   TimeTraceScope timeScope("Mark addrsig symbols");
500   for (InputFile *file : inputFiles) {
501     ObjFile *obj = dyn_cast<ObjFile>(file);
502     if (!obj)
503       continue;
504 
505     Section *addrSigSection = obj->addrSigSection;
506     if (!addrSigSection)
507       continue;
508     assert(addrSigSection->subsections.size() == 1);
509 
510     const InputSection *isec = addrSigSection->subsections[0].isec;
511 
512     for (const Reloc &r : isec->relocs) {
513       if (auto *sym = r.referent.dyn_cast<Symbol *>())
514         markSymAsAddrSig(sym);
515       else
516         error(toString(isec) + ": unexpected section relocation");
517     }
518   }
519 }
520 
521 // Given a symbol that was folded into a thunk, return the symbol pointing to
522 // the actual body of the function. We use this approach rather than storing the
523 // needed info in the Defined itself in order to minimize memory usage.
524 Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) {
525   assert(isa<ConcatInputSection>(foldedSym->originalIsec) &&
526          "thunk-folded ICF symbol expected to be on a ConcatInputSection");
527   // foldedSec is the InputSection that was marked as deleted upon fold
528   ConcatInputSection *foldedSec =
529       cast<ConcatInputSection>(foldedSym->originalIsec);
530 
531   // thunkBody is the actual live thunk, containing the code that branches to
532   // the actual body of the function.
533   InputSection *thunkBody = foldedSec->replacement;
534 
535   // The symbol of the merged body of the function that the thunk jumps to. This
536   // will end up in the final binary.
537   Symbol *targetSym = target->getThunkBranchTarget(thunkBody);
538 
539   return cast<Defined>(targetSym);
540 }
541 void macho::foldIdenticalSections(bool onlyCfStrings) {
542   TimeTraceScope timeScope("Fold Identical Code Sections");
543   // The ICF equivalence-class segregation algorithm relies on pre-computed
544   // hashes of InputSection::data for the ConcatOutputSection::inputs and all
545   // sections referenced by their relocs. We could recursively traverse the
546   // relocs to find every referenced InputSection, but that precludes easy
547   // parallelization. Therefore, we hash every InputSection here where we have
548   // them all accessible as simple vectors.
549 
550   // If an InputSection is ineligible for ICF, we give it a unique ID to force
551   // it into an unfoldable singleton equivalence class.  Begin the unique-ID
552   // space at inputSections.size(), so that it will never intersect with
553   // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
554   // coexist with equivalence-class IDs, this is not necessary, but might help
555   // someone keep the numbers straight in case we ever need to debug the
556   // ICF::segregate()
557   std::vector<ConcatInputSection *> foldable;
558   uint64_t icfUniqueID = inputSections.size();
559   // Reset the thunk counter for each run of ICF.
560   icfThunkCounter = 0;
561   for (ConcatInputSection *isec : inputSections) {
562     bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
563                                         isClassRefsSection(isec) ||
564                                         isSelRefsSection(isec);
565     // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
566     // can still fold it.
567     bool hasFoldableFlags = (isSelRefsSection(isec) ||
568                              sectionType(isec->getFlags()) == MachO::S_REGULAR);
569 
570     bool isCodeSec = isCodeSection(isec);
571 
572     // When keepUnique is true, the section is not foldable. Unless we are at
573     // icf level safe_thunks, in which case we still want to fold code sections.
574     // When using safe_thunks we'll apply the safe_thunks logic at merge time
575     // based on the 'keepUnique' flag.
576     bool noUniqueRequirement =
577         !isec->keepUnique ||
578         ((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
579 
580     // FIXME: consider non-code __text sections as foldable?
581     bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
582                       (isCodeSec || isFoldableWithAddendsRemoved ||
583                        isGccExceptTabSection(isec)) &&
584                       noUniqueRequirement && !isec->hasAltEntry &&
585                       !isec->shouldOmitFromOutput() && hasFoldableFlags;
586     if (isFoldable) {
587       foldable.push_back(isec);
588       for (Defined *d : isec->symbols)
589         if (d->unwindEntry())
590           foldable.push_back(d->unwindEntry());
591 
592       // Some sections have embedded addends that foil ICF's hashing / equality
593       // checks. (We can ignore embedded addends when doing ICF because the same
594       // information gets recorded in our Reloc structs.) We therefore create a
595       // mutable copy of the section data and zero out the embedded addends
596       // before performing any hashing / equality checks.
597       if (isFoldableWithAddendsRemoved) {
598         // We have to do this copying serially as the BumpPtrAllocator is not
599         // thread-safe. FIXME: Make a thread-safe allocator.
600         MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
601         for (const Reloc &r : isec->relocs)
602           target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
603                               /*relocVA=*/0);
604         isec->data = copy;
605       }
606     } else if (!isEhFrameSection(isec)) {
607       // EH frames are gathered as foldables from unwindEntry above; give a
608       // unique ID to everything else.
609       isec->icfEqClass[0] = ++icfUniqueID;
610     }
611   }
612   parallelForEach(foldable, [](ConcatInputSection *isec) {
613     assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
614     // Turn-on the top bit to guarantee that valid hashes have no collisions
615     // with the small-integer unique IDs for ICF-ineligible sections
616     isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
617   });
618   // Now that every input section is either hashed or marked as unique, run the
619   // segregation algorithm to detect foldable subsections.
620   ICF(foldable).run();
621 }
622