xref: /freebsd/contrib/llvm-project/llvm/lib/LTO/LTO.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions and classes used to support LTO.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/LTO/LTO.h"
14 #include "llvm/ADT/ScopeExit.h"
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/ADT/StableHashing.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
20 #include "llvm/Analysis/StackSafetyAnalysis.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/Bitcode/BitcodeWriter.h"
25 #include "llvm/CGData/CodeGenData.h"
26 #include "llvm/CodeGen/Analysis.h"
27 #include "llvm/Config/llvm-config.h"
28 #include "llvm/IR/AutoUpgrade.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/LLVMRemarkStreamer.h"
32 #include "llvm/IR/LegacyPassManager.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/IR/Metadata.h"
35 #include "llvm/IR/RuntimeLibcalls.h"
36 #include "llvm/LTO/LTOBackend.h"
37 #include "llvm/Linker/IRMover.h"
38 #include "llvm/MC/TargetRegistry.h"
39 #include "llvm/Object/IRObjectFile.h"
40 #include "llvm/Support/Caching.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/FileSystem.h"
45 #include "llvm/Support/JSON.h"
46 #include "llvm/Support/MemoryBuffer.h"
47 #include "llvm/Support/Path.h"
48 #include "llvm/Support/Process.h"
49 #include "llvm/Support/SHA1.h"
50 #include "llvm/Support/SourceMgr.h"
51 #include "llvm/Support/ThreadPool.h"
52 #include "llvm/Support/Threading.h"
53 #include "llvm/Support/TimeProfiler.h"
54 #include "llvm/Support/ToolOutputFile.h"
55 #include "llvm/Support/VCSRevision.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include "llvm/Target/TargetOptions.h"
58 #include "llvm/Transforms/IPO.h"
59 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
60 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
61 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
62 #include "llvm/Transforms/Utils/SplitModule.h"
63 
64 #include <optional>
65 #include <set>
66 
67 using namespace llvm;
68 using namespace lto;
69 using namespace object;
70 
71 #define DEBUG_TYPE "lto"
72 
73 static cl::opt<bool>
74     DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden,
75                    cl::desc("Dump the SCCs in the ThinLTO index's callgraph"));
76 
77 extern cl::opt<bool> CodeGenDataThinLTOTwoRounds;
78 
79 extern cl::opt<bool> ForceImportAll;
80 
81 namespace llvm {
82 /// Enable global value internalization in LTO.
83 cl::opt<bool> EnableLTOInternalization(
84     "enable-lto-internalization", cl::init(true), cl::Hidden,
85     cl::desc("Enable global value internalization in LTO"));
86 
87 static cl::opt<bool>
88     LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(false), cl::Hidden,
89                         cl::desc("Keep copies of symbols in LTO indexing"));
90 
91 /// Indicate we are linking with an allocator that supports hot/cold operator
92 /// new interfaces.
93 extern cl::opt<bool> SupportsHotColdNew;
94 
95 /// Enable MemProf context disambiguation for thin link.
96 extern cl::opt<bool> EnableMemProfContextDisambiguation;
97 } // namespace llvm
98 
99 // Computes a unique hash for the Module considering the current list of
100 // export/import and other global analysis results.
101 // Returns the hash in its hexadecimal representation.
computeLTOCacheKey(const Config & Conf,const ModuleSummaryIndex & Index,StringRef ModuleID,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedGlobals,const DenseSet<GlobalValue::GUID> & CfiFunctionDefs,const DenseSet<GlobalValue::GUID> & CfiFunctionDecls)102 std::string llvm::computeLTOCacheKey(
103     const Config &Conf, const ModuleSummaryIndex &Index, StringRef ModuleID,
104     const FunctionImporter::ImportMapTy &ImportList,
105     const FunctionImporter::ExportSetTy &ExportList,
106     const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
107     const GVSummaryMapTy &DefinedGlobals,
108     const DenseSet<GlobalValue::GUID> &CfiFunctionDefs,
109     const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) {
110   // Compute the unique hash for this entry.
111   // This is based on the current compiler version, the module itself, the
112   // export list, the hash for every single module in the import list, the
113   // list of ResolvedODR for the module, and the list of preserved symbols.
114   SHA1 Hasher;
115 
116   // Start with the compiler revision
117   Hasher.update(LLVM_VERSION_STRING);
118 #ifdef LLVM_REVISION
119   Hasher.update(LLVM_REVISION);
120 #endif
121 
122   // Include the parts of the LTO configuration that affect code generation.
123   auto AddString = [&](StringRef Str) {
124     Hasher.update(Str);
125     Hasher.update(ArrayRef<uint8_t>{0});
126   };
127   auto AddUnsigned = [&](unsigned I) {
128     uint8_t Data[4];
129     support::endian::write32le(Data, I);
130     Hasher.update(Data);
131   };
132   auto AddUint64 = [&](uint64_t I) {
133     uint8_t Data[8];
134     support::endian::write64le(Data, I);
135     Hasher.update(Data);
136   };
137   auto AddUint8 = [&](const uint8_t I) {
138     Hasher.update(ArrayRef<uint8_t>(&I, 1));
139   };
140   AddString(Conf.CPU);
141   // FIXME: Hash more of Options. For now all clients initialize Options from
142   // command-line flags (which is unsupported in production), but may set
143   // X86RelaxRelocations. The clang driver can also pass FunctionSections,
144   // DataSections and DebuggerTuning via command line flags.
145   AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations);
146   AddUnsigned(Conf.Options.FunctionSections);
147   AddUnsigned(Conf.Options.DataSections);
148   AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
149   for (auto &A : Conf.MAttrs)
150     AddString(A);
151   if (Conf.RelocModel)
152     AddUnsigned(*Conf.RelocModel);
153   else
154     AddUnsigned(-1);
155   if (Conf.CodeModel)
156     AddUnsigned(*Conf.CodeModel);
157   else
158     AddUnsigned(-1);
159   for (const auto &S : Conf.MllvmArgs)
160     AddString(S);
161   AddUnsigned(static_cast<int>(Conf.CGOptLevel));
162   AddUnsigned(static_cast<int>(Conf.CGFileType));
163   AddUnsigned(Conf.OptLevel);
164   AddUnsigned(Conf.Freestanding);
165   AddString(Conf.OptPipeline);
166   AddString(Conf.AAPipeline);
167   AddString(Conf.OverrideTriple);
168   AddString(Conf.DefaultTriple);
169   AddString(Conf.DwoDir);
170 
171   // Include the hash for the current module
172   auto ModHash = Index.getModuleHash(ModuleID);
173   Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
174 
175   // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is
176   // used to compute cache key, we could omit hashing `ExportList` here.
177   std::vector<uint64_t> ExportsGUID;
178   ExportsGUID.reserve(ExportList.size());
179   for (const auto &VI : ExportList)
180     ExportsGUID.push_back(VI.getGUID());
181 
182   // Sort the export list elements GUIDs.
183   llvm::sort(ExportsGUID);
184   for (auto GUID : ExportsGUID)
185     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
186 
187   // Order using module hash, to be both independent of module name and
188   // module order.
189   auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L,
190                   const std::pair<StringRef, GlobalValue::GUID> &R) {
191     return std::make_pair(Index.getModule(L.first)->second, L.second) <
192            std::make_pair(Index.getModule(R.first)->second, R.second);
193   };
194   FunctionImporter::SortedImportList SortedImportList(ImportList, Comp);
195 
196   // Count the number of imports for each source module.
197   DenseMap<StringRef, unsigned> ModuleToNumImports;
198   for (const auto &[FromModule, GUID, Type] : SortedImportList)
199     ++ModuleToNumImports[FromModule];
200 
201   std::optional<StringRef> LastModule;
202   for (const auto &[FromModule, GUID, Type] : SortedImportList) {
203     if (LastModule != FromModule) {
204       // Include the hash for every module we import functions from. The set of
205       // imported symbols for each module may affect code generation and is
206       // sensitive to link order, so include that as well.
207       LastModule = FromModule;
208       auto ModHash = Index.getModule(FromModule)->second;
209       Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
210       AddUint64(ModuleToNumImports[FromModule]);
211     }
212     AddUint64(GUID);
213     AddUint8(Type);
214   }
215 
216   // Include the hash for the resolved ODR.
217   for (auto &Entry : ResolvedODR) {
218     Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
219                                     sizeof(GlobalValue::GUID)));
220     Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
221                                     sizeof(GlobalValue::LinkageTypes)));
222   }
223 
224   // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or
225   // defined in this module.
226   std::set<GlobalValue::GUID> UsedCfiDefs;
227   std::set<GlobalValue::GUID> UsedCfiDecls;
228 
229   // Typeids used in this module.
230   std::set<GlobalValue::GUID> UsedTypeIds;
231 
232   auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) {
233     if (CfiFunctionDefs.contains(ValueGUID))
234       UsedCfiDefs.insert(ValueGUID);
235     if (CfiFunctionDecls.contains(ValueGUID))
236       UsedCfiDecls.insert(ValueGUID);
237   };
238 
239   auto AddUsedThings = [&](GlobalValueSummary *GS) {
240     if (!GS) return;
241     AddUnsigned(GS->getVisibility());
242     AddUnsigned(GS->isLive());
243     AddUnsigned(GS->canAutoHide());
244     for (const ValueInfo &VI : GS->refs()) {
245       AddUnsigned(VI.isDSOLocal(Index.withDSOLocalPropagation()));
246       AddUsedCfiGlobal(VI.getGUID());
247     }
248     if (auto *GVS = dyn_cast<GlobalVarSummary>(GS)) {
249       AddUnsigned(GVS->maybeReadOnly());
250       AddUnsigned(GVS->maybeWriteOnly());
251     }
252     if (auto *FS = dyn_cast<FunctionSummary>(GS)) {
253       for (auto &TT : FS->type_tests())
254         UsedTypeIds.insert(TT);
255       for (auto &TT : FS->type_test_assume_vcalls())
256         UsedTypeIds.insert(TT.GUID);
257       for (auto &TT : FS->type_checked_load_vcalls())
258         UsedTypeIds.insert(TT.GUID);
259       for (auto &TT : FS->type_test_assume_const_vcalls())
260         UsedTypeIds.insert(TT.VFunc.GUID);
261       for (auto &TT : FS->type_checked_load_const_vcalls())
262         UsedTypeIds.insert(TT.VFunc.GUID);
263       for (auto &ET : FS->calls()) {
264         AddUnsigned(ET.first.isDSOLocal(Index.withDSOLocalPropagation()));
265         AddUsedCfiGlobal(ET.first.getGUID());
266       }
267     }
268   };
269 
270   // Include the hash for the linkage type to reflect internalization and weak
271   // resolution, and collect any used type identifier resolutions.
272   for (auto &GS : DefinedGlobals) {
273     GlobalValue::LinkageTypes Linkage = GS.second->linkage();
274     Hasher.update(
275         ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
276     AddUsedCfiGlobal(GS.first);
277     AddUsedThings(GS.second);
278   }
279 
280   // Imported functions may introduce new uses of type identifier resolutions,
281   // so we need to collect their used resolutions as well.
282   for (const auto &[FromModule, GUID, Type] : SortedImportList) {
283     GlobalValueSummary *S = Index.findSummaryInModule(GUID, FromModule);
284     AddUsedThings(S);
285     // If this is an alias, we also care about any types/etc. that the aliasee
286     // may reference.
287     if (auto *AS = dyn_cast_or_null<AliasSummary>(S))
288       AddUsedThings(AS->getBaseObject());
289   }
290 
291   auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
292     AddString(TId);
293 
294     AddUnsigned(S.TTRes.TheKind);
295     AddUnsigned(S.TTRes.SizeM1BitWidth);
296 
297     AddUint64(S.TTRes.AlignLog2);
298     AddUint64(S.TTRes.SizeM1);
299     AddUint64(S.TTRes.BitMask);
300     AddUint64(S.TTRes.InlineBits);
301 
302     AddUint64(S.WPDRes.size());
303     for (auto &WPD : S.WPDRes) {
304       AddUnsigned(WPD.first);
305       AddUnsigned(WPD.second.TheKind);
306       AddString(WPD.second.SingleImplName);
307 
308       AddUint64(WPD.second.ResByArg.size());
309       for (auto &ByArg : WPD.second.ResByArg) {
310         AddUint64(ByArg.first.size());
311         for (uint64_t Arg : ByArg.first)
312           AddUint64(Arg);
313         AddUnsigned(ByArg.second.TheKind);
314         AddUint64(ByArg.second.Info);
315         AddUnsigned(ByArg.second.Byte);
316         AddUnsigned(ByArg.second.Bit);
317       }
318     }
319   };
320 
321   // Include the hash for all type identifiers used by this module.
322   for (GlobalValue::GUID TId : UsedTypeIds) {
323     auto TidIter = Index.typeIds().equal_range(TId);
324     for (const auto &I : make_range(TidIter))
325       AddTypeIdSummary(I.second.first, I.second.second);
326   }
327 
328   AddUnsigned(UsedCfiDefs.size());
329   for (auto &V : UsedCfiDefs)
330     AddUint64(V);
331 
332   AddUnsigned(UsedCfiDecls.size());
333   for (auto &V : UsedCfiDecls)
334     AddUint64(V);
335 
336   if (!Conf.SampleProfile.empty()) {
337     auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile);
338     if (FileOrErr) {
339       Hasher.update(FileOrErr.get()->getBuffer());
340 
341       if (!Conf.ProfileRemapping.empty()) {
342         FileOrErr = MemoryBuffer::getFile(Conf.ProfileRemapping);
343         if (FileOrErr)
344           Hasher.update(FileOrErr.get()->getBuffer());
345       }
346     }
347   }
348 
349   return toHex(Hasher.result());
350 }
351 
recomputeLTOCacheKey(const std::string & Key,StringRef ExtraID)352 std::string llvm::recomputeLTOCacheKey(const std::string &Key,
353                                        StringRef ExtraID) {
354   SHA1 Hasher;
355 
356   auto AddString = [&](StringRef Str) {
357     Hasher.update(Str);
358     Hasher.update(ArrayRef<uint8_t>{0});
359   };
360   AddString(Key);
361   AddString(ExtraID);
362 
363   return toHex(Hasher.result());
364 }
365 
thinLTOResolvePrevailingGUID(const Config & C,ValueInfo VI,DenseSet<GlobalValueSummary * > & GlobalInvolvedWithAlias,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,function_ref<void (StringRef,GlobalValue::GUID,GlobalValue::LinkageTypes)> recordNewLinkage,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols)366 static void thinLTOResolvePrevailingGUID(
367     const Config &C, ValueInfo VI,
368     DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias,
369     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
370         isPrevailing,
371     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
372         recordNewLinkage,
373     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
374   GlobalValue::VisibilityTypes Visibility =
375       C.VisibilityScheme == Config::ELF ? VI.getELFVisibility()
376                                         : GlobalValue::DefaultVisibility;
377   for (auto &S : VI.getSummaryList()) {
378     GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
379     // Ignore local and appending linkage values since the linker
380     // doesn't resolve them.
381     if (GlobalValue::isLocalLinkage(OriginalLinkage) ||
382         GlobalValue::isAppendingLinkage(S->linkage()))
383       continue;
384     // We need to emit only one of these. The prevailing module will keep it,
385     // but turned into a weak, while the others will drop it when possible.
386     // This is both a compile-time optimization and a correctness
387     // transformation. This is necessary for correctness when we have exported
388     // a reference - we need to convert the linkonce to weak to
389     // ensure a copy is kept to satisfy the exported reference.
390     // FIXME: We may want to split the compile time and correctness
391     // aspects into separate routines.
392     if (isPrevailing(VI.getGUID(), S.get())) {
393       if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) {
394         S->setLinkage(GlobalValue::getWeakLinkage(
395             GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
396         // The kept copy is eligible for auto-hiding (hidden visibility) if all
397         // copies were (i.e. they were all linkonce_odr global unnamed addr).
398         // If any copy is not (e.g. it was originally weak_odr), then the symbol
399         // must remain externally available (e.g. a weak_odr from an explicitly
400         // instantiated template). Additionally, if it is in the
401         // GUIDPreservedSymbols set, that means that it is visibile outside
402         // the summary (e.g. in a native object or a bitcode file without
403         // summary), and in that case we cannot hide it as it isn't possible to
404         // check all copies.
405         S->setCanAutoHide(VI.canAutoHide() &&
406                           !GUIDPreservedSymbols.count(VI.getGUID()));
407       }
408       if (C.VisibilityScheme == Config::FromPrevailing)
409         Visibility = S->getVisibility();
410     }
411     // Alias and aliasee can't be turned into available_externally.
412     // When force-import-all is used, it indicates that object linking is not
413     // supported by the target. In this case, we can't change the linkage as
414     // well in case the global is converted to declaration.
415     else if (!isa<AliasSummary>(S.get()) &&
416              !GlobalInvolvedWithAlias.count(S.get()) && !ForceImportAll)
417       S->setLinkage(GlobalValue::AvailableExternallyLinkage);
418 
419     // For ELF, set visibility to the computed visibility from summaries. We
420     // don't track visibility from declarations so this may be more relaxed than
421     // the most constraining one.
422     if (C.VisibilityScheme == Config::ELF)
423       S->setVisibility(Visibility);
424 
425     if (S->linkage() != OriginalLinkage)
426       recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage());
427   }
428 
429   if (C.VisibilityScheme == Config::FromPrevailing) {
430     for (auto &S : VI.getSummaryList()) {
431       GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
432       if (GlobalValue::isLocalLinkage(OriginalLinkage) ||
433           GlobalValue::isAppendingLinkage(S->linkage()))
434         continue;
435       S->setVisibility(Visibility);
436     }
437   }
438 }
439 
440 /// Resolve linkage for prevailing symbols in the \p Index.
441 //
442 // We'd like to drop these functions if they are no longer referenced in the
443 // current module. However there is a chance that another module is still
444 // referencing them because of the import. We make sure we always emit at least
445 // one copy.
thinLTOResolvePrevailingInIndex(const Config & C,ModuleSummaryIndex & Index,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,function_ref<void (StringRef,GlobalValue::GUID,GlobalValue::LinkageTypes)> recordNewLinkage,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols)446 void llvm::thinLTOResolvePrevailingInIndex(
447     const Config &C, ModuleSummaryIndex &Index,
448     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
449         isPrevailing,
450     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
451         recordNewLinkage,
452     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
453   // We won't optimize the globals that are referenced by an alias for now
454   // Ideally we should turn the alias into a global and duplicate the definition
455   // when needed.
456   DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias;
457   for (auto &I : Index)
458     for (auto &S : I.second.SummaryList)
459       if (auto AS = dyn_cast<AliasSummary>(S.get()))
460         GlobalInvolvedWithAlias.insert(&AS->getAliasee());
461 
462   for (auto &I : Index)
463     thinLTOResolvePrevailingGUID(C, Index.getValueInfo(I),
464                                  GlobalInvolvedWithAlias, isPrevailing,
465                                  recordNewLinkage, GUIDPreservedSymbols);
466 }
467 
thinLTOInternalizeAndPromoteGUID(ValueInfo VI,function_ref<bool (StringRef,ValueInfo)> isExported,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing)468 static void thinLTOInternalizeAndPromoteGUID(
469     ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
470     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
471         isPrevailing) {
472   auto ExternallyVisibleCopies =
473       llvm::count_if(VI.getSummaryList(),
474                      [](const std::unique_ptr<GlobalValueSummary> &Summary) {
475                        return !GlobalValue::isLocalLinkage(Summary->linkage());
476                      });
477 
478   for (auto &S : VI.getSummaryList()) {
479     // First see if we need to promote an internal value because it is not
480     // exported.
481     if (isExported(S->modulePath(), VI)) {
482       if (GlobalValue::isLocalLinkage(S->linkage()))
483         S->setLinkage(GlobalValue::ExternalLinkage);
484       continue;
485     }
486 
487     // Otherwise, see if we can internalize.
488     if (!EnableLTOInternalization)
489       continue;
490 
491     // Non-exported values with external linkage can be internalized.
492     if (GlobalValue::isExternalLinkage(S->linkage())) {
493       S->setLinkage(GlobalValue::InternalLinkage);
494       continue;
495     }
496 
497     // Non-exported function and variable definitions with a weak-for-linker
498     // linkage can be internalized in certain cases. The minimum legality
499     // requirements would be that they are not address taken to ensure that we
500     // don't break pointer equality checks, and that variables are either read-
501     // or write-only. For functions, this is the case if either all copies are
502     // [local_]unnamed_addr, or we can propagate reference edge attributes
503     // (which is how this is guaranteed for variables, when analyzing whether
504     // they are read or write-only).
505     //
506     // However, we only get to this code for weak-for-linkage values in one of
507     // two cases:
508     // 1) The prevailing copy is not in IR (it is in native code).
509     // 2) The prevailing copy in IR is not exported from its module.
510     // Additionally, at least for the new LTO API, case 2 will only happen if
511     // there is exactly one definition of the value (i.e. in exactly one
512     // module), as duplicate defs are result in the value being marked exported.
513     // Likely, users of the legacy LTO API are similar, however, currently there
514     // are llvm-lto based tests of the legacy LTO API that do not mark
515     // duplicate linkonce_odr copies as exported via the tool, so we need
516     // to handle that case below by checking the number of copies.
517     //
518     // Generally, we only want to internalize a weak-for-linker value in case
519     // 2, because in case 1 we cannot see how the value is used to know if it
520     // is read or write-only. We also don't want to bloat the binary with
521     // multiple internalized copies of non-prevailing linkonce/weak functions.
522     // Note if we don't internalize, we will convert non-prevailing copies to
523     // available_externally anyway, so that we drop them after inlining. The
524     // only reason to internalize such a function is if we indeed have a single
525     // copy, because internalizing it won't increase binary size, and enables
526     // use of inliner heuristics that are more aggressive in the face of a
527     // single call to a static (local). For variables, internalizing a read or
528     // write only variable can enable more aggressive optimization. However, we
529     // already perform this elsewhere in the ThinLTO backend handling for
530     // read or write-only variables (processGlobalForThinLTO).
531     //
532     // Therefore, only internalize linkonce/weak if there is a single copy, that
533     // is prevailing in this IR module. We can do so aggressively, without
534     // requiring the address to be insignificant, or that a variable be read or
535     // write-only.
536     if (!GlobalValue::isWeakForLinker(S->linkage()) ||
537         GlobalValue::isExternalWeakLinkage(S->linkage()))
538       continue;
539 
540     if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1)
541       S->setLinkage(GlobalValue::InternalLinkage);
542   }
543 }
544 
545 // Update the linkages in the given \p Index to mark exported values
546 // as external and non-exported values as internal.
thinLTOInternalizeAndPromoteInIndex(ModuleSummaryIndex & Index,function_ref<bool (StringRef,ValueInfo)> isExported,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing)547 void llvm::thinLTOInternalizeAndPromoteInIndex(
548     ModuleSummaryIndex &Index,
549     function_ref<bool(StringRef, ValueInfo)> isExported,
550     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
551         isPrevailing) {
552   for (auto &I : Index)
553     thinLTOInternalizeAndPromoteGUID(Index.getValueInfo(I), isExported,
554                                      isPrevailing);
555 }
556 
557 // Requires a destructor for std::vector<InputModule>.
558 InputFile::~InputFile() = default;
559 
create(MemoryBufferRef Object)560 Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
561   std::unique_ptr<InputFile> File(new InputFile);
562 
563   Expected<IRSymtabFile> FOrErr = readIRSymtab(Object);
564   if (!FOrErr)
565     return FOrErr.takeError();
566 
567   File->TargetTriple = FOrErr->TheReader.getTargetTriple();
568   File->SourceFileName = FOrErr->TheReader.getSourceFileName();
569   File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts();
570   File->DependentLibraries = FOrErr->TheReader.getDependentLibraries();
571   File->ComdatTable = FOrErr->TheReader.getComdatTable();
572 
573   for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) {
574     size_t Begin = File->Symbols.size();
575     for (const irsymtab::Reader::SymbolRef &Sym :
576          FOrErr->TheReader.module_symbols(I))
577       // Skip symbols that are irrelevant to LTO. Note that this condition needs
578       // to match the one in Skip() in LTO::addRegularLTO().
579       if (Sym.isGlobal() && !Sym.isFormatSpecific())
580         File->Symbols.push_back(Sym);
581     File->ModuleSymIndices.push_back({Begin, File->Symbols.size()});
582   }
583 
584   File->Mods = FOrErr->Mods;
585   File->Strtab = std::move(FOrErr->Strtab);
586   return std::move(File);
587 }
588 
getName() const589 StringRef InputFile::getName() const {
590   return Mods[0].getModuleIdentifier();
591 }
592 
getSingleBitcodeModule()593 BitcodeModule &InputFile::getSingleBitcodeModule() {
594   assert(Mods.size() == 1 && "Expect only one bitcode module");
595   return Mods[0];
596 }
597 
RegularLTOState(unsigned ParallelCodeGenParallelismLevel,const Config & Conf)598 LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
599                                       const Config &Conf)
600     : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
601       Ctx(Conf), CombinedModule(std::make_unique<Module>("ld-temp.o", Ctx)),
602       Mover(std::make_unique<IRMover>(*CombinedModule)) {}
603 
ThinLTOState(ThinBackend BackendParam)604 LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam)
605     : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) {
606   if (!Backend.isValid())
607     Backend =
608         createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
609 }
610 
LTO(Config Conf,ThinBackend Backend,unsigned ParallelCodeGenParallelismLevel,LTOKind LTOMode)611 LTO::LTO(Config Conf, ThinBackend Backend,
612          unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
613     : Conf(std::move(Conf)),
614       RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
615       ThinLTO(std::move(Backend)),
616       GlobalResolutions(
617           std::make_unique<DenseMap<StringRef, GlobalResolution>>()),
618       LTOMode(LTOMode) {
619   if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) {
620     Alloc = std::make_unique<BumpPtrAllocator>();
621     GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(*Alloc);
622   }
623 }
624 
625 // Requires a destructor for MapVector<BitcodeModule>.
626 LTO::~LTO() = default;
627 
628 // Add the symbols in the given module to the GlobalResolutions map, and resolve
629 // their partitions.
addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,ArrayRef<SymbolResolution> Res,unsigned Partition,bool InSummary)630 void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
631                                ArrayRef<SymbolResolution> Res,
632                                unsigned Partition, bool InSummary) {
633   auto *ResI = Res.begin();
634   auto *ResE = Res.end();
635   (void)ResE;
636   for (const InputFile::Symbol &Sym : Syms) {
637     assert(ResI != ResE);
638     SymbolResolution Res = *ResI++;
639 
640     StringRef SymbolName = Sym.getName();
641     // Keep copies of symbols if the client of LTO says so.
642     if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(SymbolName))
643       SymbolName = GlobalResolutionSymbolSaver->save(SymbolName);
644 
645     auto &GlobalRes = (*GlobalResolutions)[SymbolName];
646     GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
647     if (Res.Prevailing) {
648       assert(!GlobalRes.Prevailing &&
649              "Multiple prevailing defs are not allowed");
650       GlobalRes.Prevailing = true;
651       GlobalRes.IRName = std::string(Sym.getIRName());
652     } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) {
653       // Sometimes it can be two copies of symbol in a module and prevailing
654       // symbol can have no IR name. That might happen if symbol is defined in
655       // module level inline asm block. In case we have multiple modules with
656       // the same symbol we want to use IR name of the prevailing symbol.
657       // Otherwise, if we haven't seen a prevailing symbol, set the name so that
658       // we can later use it to check if there is any prevailing copy in IR.
659       GlobalRes.IRName = std::string(Sym.getIRName());
660     }
661 
662     // In rare occasion, the symbol used to initialize GlobalRes has a different
663     // IRName from the inspected Symbol. This can happen on macOS + iOS, when a
664     // symbol is referenced through its mangled name, say @"\01_symbol" while
665     // the IRName is @symbol (the prefix underscore comes from MachO mangling).
666     // In that case, we have the same actual Symbol that can get two different
667     // GUID, leading to some invalid internalization. Workaround this by marking
668     // the GlobalRes external.
669 
670     // FIXME: instead of this check, it would be desirable to compute GUIDs
671     // based on mangled name, but this requires an access to the Target Triple
672     // and would be relatively invasive on the codebase.
673     if (GlobalRes.IRName != Sym.getIRName()) {
674       GlobalRes.Partition = GlobalResolution::External;
675       GlobalRes.VisibleOutsideSummary = true;
676     }
677 
678     // Set the partition to external if we know it is re-defined by the linker
679     // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a
680     // regular object, is referenced from llvm.compiler.used/llvm.used, or was
681     // already recorded as being referenced from a different partition.
682     if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() ||
683         (GlobalRes.Partition != GlobalResolution::Unknown &&
684          GlobalRes.Partition != Partition)) {
685       GlobalRes.Partition = GlobalResolution::External;
686     } else
687       // First recorded reference, save the current partition.
688       GlobalRes.Partition = Partition;
689 
690     // Flag as visible outside of summary if visible from a regular object or
691     // from a module that does not have a summary.
692     GlobalRes.VisibleOutsideSummary |=
693         (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary);
694 
695     GlobalRes.ExportDynamic |= Res.ExportDynamic;
696   }
697 }
698 
releaseGlobalResolutionsMemory()699 void LTO::releaseGlobalResolutionsMemory() {
700   // Release GlobalResolutions dense-map itself.
701   GlobalResolutions.reset();
702   // Release the string saver memory.
703   GlobalResolutionSymbolSaver.reset();
704   Alloc.reset();
705 }
706 
writeToResolutionFile(raw_ostream & OS,InputFile * Input,ArrayRef<SymbolResolution> Res)707 static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
708                                   ArrayRef<SymbolResolution> Res) {
709   StringRef Path = Input->getName();
710   OS << Path << '\n';
711   auto ResI = Res.begin();
712   for (const InputFile::Symbol &Sym : Input->symbols()) {
713     assert(ResI != Res.end());
714     SymbolResolution Res = *ResI++;
715 
716     OS << "-r=" << Path << ',' << Sym.getName() << ',';
717     if (Res.Prevailing)
718       OS << 'p';
719     if (Res.FinalDefinitionInLinkageUnit)
720       OS << 'l';
721     if (Res.VisibleToRegularObj)
722       OS << 'x';
723     if (Res.LinkerRedefined)
724       OS << 'r';
725     OS << '\n';
726   }
727   OS.flush();
728   assert(ResI == Res.end());
729 }
730 
add(std::unique_ptr<InputFile> Input,ArrayRef<SymbolResolution> Res)731 Error LTO::add(std::unique_ptr<InputFile> Input,
732                ArrayRef<SymbolResolution> Res) {
733   assert(!CalledGetMaxTasks);
734 
735   if (Conf.ResolutionFile)
736     writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
737 
738   if (RegularLTO.CombinedModule->getTargetTriple().empty()) {
739     Triple InputTriple(Input->getTargetTriple());
740     RegularLTO.CombinedModule->setTargetTriple(InputTriple);
741     if (InputTriple.isOSBinFormatELF())
742       Conf.VisibilityScheme = Config::ELF;
743   }
744 
745   const SymbolResolution *ResI = Res.begin();
746   for (unsigned I = 0; I != Input->Mods.size(); ++I)
747     if (Error Err = addModule(*Input, I, ResI, Res.end()))
748       return Err;
749 
750   assert(ResI == Res.end());
751   return Error::success();
752 }
753 
addModule(InputFile & Input,unsigned ModI,const SymbolResolution * & ResI,const SymbolResolution * ResE)754 Error LTO::addModule(InputFile &Input, unsigned ModI,
755                      const SymbolResolution *&ResI,
756                      const SymbolResolution *ResE) {
757   Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
758   if (!LTOInfo)
759     return LTOInfo.takeError();
760 
761   if (EnableSplitLTOUnit) {
762     // If only some modules were split, flag this in the index so that
763     // we can skip or error on optimizations that need consistently split
764     // modules (whole program devirt and lower type tests).
765     if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit)
766       ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
767   } else
768     EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
769 
770   BitcodeModule BM = Input.Mods[ModI];
771 
772   if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
773       !LTOInfo->UnifiedLTO)
774     return make_error<StringError>(
775         "unified LTO compilation must use "
776         "compatible bitcode modules (use -funified-lto)",
777         inconvertibleErrorCode());
778 
779   if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
780     LTOMode = LTOK_UnifiedThin;
781 
782   bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
783 
784   auto ModSyms = Input.module_symbols(ModI);
785   addModuleToGlobalRes(ModSyms, {ResI, ResE},
786                        IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
787                        LTOInfo->HasSummary);
788 
789   if (IsThinLTO)
790     return addThinLTO(BM, ModSyms, ResI, ResE);
791 
792   RegularLTO.EmptyCombinedModule = false;
793   Expected<RegularLTOState::AddedModule> ModOrErr =
794       addRegularLTO(BM, ModSyms, ResI, ResE);
795   if (!ModOrErr)
796     return ModOrErr.takeError();
797 
798   if (!LTOInfo->HasSummary)
799     return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false);
800 
801   // Regular LTO module summaries are added to a dummy module that represents
802   // the combined regular LTO module.
803   if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, ""))
804     return Err;
805   RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr));
806   return Error::success();
807 }
808 
809 // Checks whether the given global value is in a non-prevailing comdat
810 // (comdat containing values the linker indicated were not prevailing,
811 // which we then dropped to available_externally), and if so, removes
812 // it from the comdat. This is called for all global values to ensure the
813 // comdat is empty rather than leaving an incomplete comdat. It is needed for
814 // regular LTO modules, in case we are in a mixed-LTO mode (both regular
815 // and thin LTO modules) compilation. Since the regular LTO module will be
816 // linked first in the final native link, we want to make sure the linker
817 // doesn't select any of these incomplete comdats that would be left
818 // in the regular LTO module without this cleanup.
819 static void
handleNonPrevailingComdat(GlobalValue & GV,std::set<const Comdat * > & NonPrevailingComdats)820 handleNonPrevailingComdat(GlobalValue &GV,
821                           std::set<const Comdat *> &NonPrevailingComdats) {
822   Comdat *C = GV.getComdat();
823   if (!C)
824     return;
825 
826   if (!NonPrevailingComdats.count(C))
827     return;
828 
829   // Additionally need to drop all global values from the comdat to
830   // available_externally, to satisfy the COMDAT requirement that all members
831   // are discarded as a unit. The non-local linkage global values avoid
832   // duplicate definition linker errors.
833   GV.setLinkage(GlobalValue::AvailableExternallyLinkage);
834 
835   if (auto GO = dyn_cast<GlobalObject>(&GV))
836     GO->setComdat(nullptr);
837 }
838 
839 // Add a regular LTO object to the link.
840 // The resulting module needs to be linked into the combined LTO module with
841 // linkRegularLTO.
842 Expected<LTO::RegularLTOState::AddedModule>
addRegularLTO(BitcodeModule BM,ArrayRef<InputFile::Symbol> Syms,const SymbolResolution * & ResI,const SymbolResolution * ResE)843 LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
844                    const SymbolResolution *&ResI,
845                    const SymbolResolution *ResE) {
846   RegularLTOState::AddedModule Mod;
847   Expected<std::unique_ptr<Module>> MOrErr =
848       BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
849                        /*IsImporting*/ false);
850   if (!MOrErr)
851     return MOrErr.takeError();
852   Module &M = **MOrErr;
853   Mod.M = std::move(*MOrErr);
854 
855   if (Error Err = M.materializeMetadata())
856     return std::move(Err);
857 
858   // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests
859   // will rename local functions in the merged module as "<function name>.1".
860   // This causes linking errors, since other parts of the module expect the
861   // original function name.
862   if (LTOMode == LTOK_UnifiedRegular)
863     if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"))
864       M.eraseNamedMetadata(CfiFunctionsMD);
865 
866   UpgradeDebugInfo(M);
867 
868   ModuleSymbolTable SymTab;
869   SymTab.addModule(&M);
870 
871   for (GlobalVariable &GV : M.globals())
872     if (GV.hasAppendingLinkage())
873       Mod.Keep.push_back(&GV);
874 
875   DenseSet<GlobalObject *> AliasedGlobals;
876   for (auto &GA : M.aliases())
877     if (GlobalObject *GO = GA.getAliaseeObject())
878       AliasedGlobals.insert(GO);
879 
880   // In this function we need IR GlobalValues matching the symbols in Syms
881   // (which is not backed by a module), so we need to enumerate them in the same
882   // order. The symbol enumeration order of a ModuleSymbolTable intentionally
883   // matches the order of an irsymtab, but when we read the irsymtab in
884   // InputFile::create we omit some symbols that are irrelevant to LTO. The
885   // Skip() function skips the same symbols from the module as InputFile does
886   // from the symbol table.
887   auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
888   auto Skip = [&]() {
889     while (MsymI != MsymE) {
890       auto Flags = SymTab.getSymbolFlags(*MsymI);
891       if ((Flags & object::BasicSymbolRef::SF_Global) &&
892           !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
893         return;
894       ++MsymI;
895     }
896   };
897   Skip();
898 
899   std::set<const Comdat *> NonPrevailingComdats;
900   SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
901   for (const InputFile::Symbol &Sym : Syms) {
902     assert(ResI != ResE);
903     SymbolResolution Res = *ResI++;
904 
905     assert(MsymI != MsymE);
906     ModuleSymbolTable::Symbol Msym = *MsymI++;
907     Skip();
908 
909     if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Msym)) {
910       if (Res.Prevailing) {
911         if (Sym.isUndefined())
912           continue;
913         Mod.Keep.push_back(GV);
914         // For symbols re-defined with linker -wrap and -defsym options,
915         // set the linkage to weak to inhibit IPO. The linkage will be
916         // restored by the linker.
917         if (Res.LinkerRedefined)
918           GV->setLinkage(GlobalValue::WeakAnyLinkage);
919 
920         GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage();
921         if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
922           GV->setLinkage(GlobalValue::getWeakLinkage(
923               GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
924       } else if (isa<GlobalObject>(GV) &&
925                  (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
926                   GV->hasAvailableExternallyLinkage()) &&
927                  !AliasedGlobals.count(cast<GlobalObject>(GV))) {
928         // Any of the above three types of linkage indicates that the
929         // chosen prevailing symbol will have the same semantics as this copy of
930         // the symbol, so we may be able to link it with available_externally
931         // linkage. We will decide later whether to do that when we link this
932         // module (in linkRegularLTO), based on whether it is undefined.
933         Mod.Keep.push_back(GV);
934         GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
935         if (GV->hasComdat())
936           NonPrevailingComdats.insert(GV->getComdat());
937         cast<GlobalObject>(GV)->setComdat(nullptr);
938       }
939 
940       // Set the 'local' flag based on the linker resolution for this symbol.
941       if (Res.FinalDefinitionInLinkageUnit) {
942         GV->setDSOLocal(true);
943         if (GV->hasDLLImportStorageClass())
944           GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
945                                  DefaultStorageClass);
946       }
947     } else if (auto *AS =
948                    dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Msym)) {
949       // Collect non-prevailing symbols.
950       if (!Res.Prevailing)
951         NonPrevailingAsmSymbols.insert(AS->first);
952     } else {
953       llvm_unreachable("unknown symbol type");
954     }
955 
956     // Common resolution: collect the maximum size/alignment over all commons.
957     // We also record if we see an instance of a common as prevailing, so that
958     // if none is prevailing we can ignore it later.
959     if (Sym.isCommon()) {
960       // FIXME: We should figure out what to do about commons defined by asm.
961       // For now they aren't reported correctly by ModuleSymbolTable.
962       auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
963       CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
964       if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
965         CommonRes.Alignment =
966             std::max(Align(SymAlignValue), CommonRes.Alignment);
967       }
968       CommonRes.Prevailing |= Res.Prevailing;
969     }
970   }
971 
972   if (!M.getComdatSymbolTable().empty())
973     for (GlobalValue &GV : M.global_values())
974       handleNonPrevailingComdat(GV, NonPrevailingComdats);
975 
976   // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
977   // block.
978   if (!M.getModuleInlineAsm().empty()) {
979     std::string NewIA = ".lto_discard";
980     if (!NonPrevailingAsmSymbols.empty()) {
981       // Don't dicard a symbol if there is a live .symver for it.
982       ModuleSymbolTable::CollectAsmSymvers(
983           M, [&](StringRef Name, StringRef Alias) {
984             if (!NonPrevailingAsmSymbols.count(Alias))
985               NonPrevailingAsmSymbols.erase(Name);
986           });
987       NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", ");
988     }
989     NewIA += "\n";
990     M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
991   }
992 
993   assert(MsymI == MsymE);
994   return std::move(Mod);
995 }
996 
linkRegularLTO(RegularLTOState::AddedModule Mod,bool LivenessFromIndex)997 Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
998                           bool LivenessFromIndex) {
999   std::vector<GlobalValue *> Keep;
1000   for (GlobalValue *GV : Mod.Keep) {
1001     if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
1002       if (Function *F = dyn_cast<Function>(GV)) {
1003         if (DiagnosticOutputFile) {
1004           if (Error Err = F->materialize())
1005             return Err;
1006           OptimizationRemarkEmitter ORE(F, nullptr);
1007           ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
1008                    << ore::NV("Function", F)
1009                    << " not added to the combined module ");
1010         }
1011       }
1012       continue;
1013     }
1014 
1015     if (!GV->hasAvailableExternallyLinkage()) {
1016       Keep.push_back(GV);
1017       continue;
1018     }
1019 
1020     // Only link available_externally definitions if we don't already have a
1021     // definition.
1022     GlobalValue *CombinedGV =
1023         RegularLTO.CombinedModule->getNamedValue(GV->getName());
1024     if (CombinedGV && !CombinedGV->isDeclaration())
1025       continue;
1026 
1027     Keep.push_back(GV);
1028   }
1029 
1030   return RegularLTO.Mover->move(std::move(Mod.M), Keep, nullptr,
1031                                 /* IsPerformingImport */ false);
1032 }
1033 
1034 // Add a ThinLTO module to the link.
addThinLTO(BitcodeModule BM,ArrayRef<InputFile::Symbol> Syms,const SymbolResolution * & ResI,const SymbolResolution * ResE)1035 Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
1036                       const SymbolResolution *&ResI,
1037                       const SymbolResolution *ResE) {
1038   const SymbolResolution *ResITmp = ResI;
1039   for (const InputFile::Symbol &Sym : Syms) {
1040     assert(ResITmp != ResE);
1041     SymbolResolution Res = *ResITmp++;
1042 
1043     if (!Sym.getIRName().empty()) {
1044       auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1045           GlobalValue::getGlobalIdentifier(Sym.getIRName(),
1046                                            GlobalValue::ExternalLinkage, ""));
1047       if (Res.Prevailing)
1048         ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
1049     }
1050   }
1051 
1052   if (Error Err =
1053           BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
1054                          [&](GlobalValue::GUID GUID) {
1055                            return ThinLTO.PrevailingModuleForGUID[GUID] ==
1056                                   BM.getModuleIdentifier();
1057                          }))
1058     return Err;
1059   LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n");
1060 
1061   for (const InputFile::Symbol &Sym : Syms) {
1062     assert(ResI != ResE);
1063     SymbolResolution Res = *ResI++;
1064 
1065     if (!Sym.getIRName().empty()) {
1066       auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1067           GlobalValue::getGlobalIdentifier(Sym.getIRName(),
1068                                            GlobalValue::ExternalLinkage, ""));
1069       if (Res.Prevailing) {
1070         assert(ThinLTO.PrevailingModuleForGUID[GUID] ==
1071                BM.getModuleIdentifier());
1072 
1073         // For linker redefined symbols (via --wrap or --defsym) we want to
1074         // switch the linkage to `weak` to prevent IPOs from happening.
1075         // Find the summary in the module for this very GV and record the new
1076         // linkage so that we can switch it when we import the GV.
1077         if (Res.LinkerRedefined)
1078           if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
1079                   GUID, BM.getModuleIdentifier()))
1080             S->setLinkage(GlobalValue::WeakAnyLinkage);
1081       }
1082 
1083       // If the linker resolved the symbol to a local definition then mark it
1084       // as local in the summary for the module we are adding.
1085       if (Res.FinalDefinitionInLinkageUnit) {
1086         if (auto S = ThinLTO.CombinedIndex.findSummaryInModule(
1087                 GUID, BM.getModuleIdentifier())) {
1088           S->setDSOLocal(true);
1089         }
1090       }
1091     }
1092   }
1093 
1094   if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
1095     return make_error<StringError>(
1096         "Expected at most one ThinLTO module per bitcode file",
1097         inconvertibleErrorCode());
1098 
1099   if (!Conf.ThinLTOModulesToCompile.empty()) {
1100     if (!ThinLTO.ModulesToCompile)
1101       ThinLTO.ModulesToCompile = ModuleMapType();
1102     // This is a fuzzy name matching where only modules with name containing the
1103     // specified switch values are going to be compiled.
1104     for (const std::string &Name : Conf.ThinLTOModulesToCompile) {
1105       if (BM.getModuleIdentifier().contains(Name)) {
1106         ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM});
1107         LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier()
1108                           << " to compile\n");
1109       }
1110     }
1111   }
1112 
1113   return Error::success();
1114 }
1115 
getMaxTasks() const1116 unsigned LTO::getMaxTasks() const {
1117   CalledGetMaxTasks = true;
1118   auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size()
1119                                               : ThinLTO.ModuleMap.size();
1120   return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount;
1121 }
1122 
1123 // If only some of the modules were split, we cannot correctly handle
1124 // code that contains type tests or type checked loads.
checkPartiallySplit()1125 Error LTO::checkPartiallySplit() {
1126   if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits())
1127     return Error::success();
1128 
1129   const Module *Combined = RegularLTO.CombinedModule.get();
1130   Function *TypeTestFunc =
1131       Intrinsic::getDeclarationIfExists(Combined, Intrinsic::type_test);
1132   Function *TypeCheckedLoadFunc =
1133       Intrinsic::getDeclarationIfExists(Combined, Intrinsic::type_checked_load);
1134   Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists(
1135       Combined, Intrinsic::type_checked_load_relative);
1136 
1137   // First check if there are type tests / type checked loads in the
1138   // merged regular LTO module IR.
1139   if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
1140       (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
1141       (TypeCheckedLoadRelativeFunc &&
1142        !TypeCheckedLoadRelativeFunc->use_empty()))
1143     return make_error<StringError>(
1144         "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1145         inconvertibleErrorCode());
1146 
1147   // Otherwise check if there are any recorded in the combined summary from the
1148   // ThinLTO modules.
1149   for (auto &P : ThinLTO.CombinedIndex) {
1150     for (auto &S : P.second.SummaryList) {
1151       auto *FS = dyn_cast<FunctionSummary>(S.get());
1152       if (!FS)
1153         continue;
1154       if (!FS->type_test_assume_vcalls().empty() ||
1155           !FS->type_checked_load_vcalls().empty() ||
1156           !FS->type_test_assume_const_vcalls().empty() ||
1157           !FS->type_checked_load_const_vcalls().empty() ||
1158           !FS->type_tests().empty())
1159         return make_error<StringError>(
1160             "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
1161             inconvertibleErrorCode());
1162     }
1163   }
1164   return Error::success();
1165 }
1166 
run(AddStreamFn AddStream,FileCache Cache)1167 Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
1168   // Compute "dead" symbols, we don't want to import/export these!
1169   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
1170   DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
1171   for (auto &Res : *GlobalResolutions) {
1172     // Normally resolution have IR name of symbol. We can do nothing here
1173     // otherwise. See comments in GlobalResolution struct for more details.
1174     if (Res.second.IRName.empty())
1175       continue;
1176 
1177     GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1178         GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
1179 
1180     if (Res.second.VisibleOutsideSummary && Res.second.Prevailing)
1181       GUIDPreservedSymbols.insert(GUID);
1182 
1183     if (Res.second.ExportDynamic)
1184       DynamicExportSymbols.insert(GUID);
1185 
1186     GUIDPrevailingResolutions[GUID] =
1187         Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No;
1188   }
1189 
1190   auto isPrevailing = [&](GlobalValue::GUID G) {
1191     auto It = GUIDPrevailingResolutions.find(G);
1192     if (It == GUIDPrevailingResolutions.end())
1193       return PrevailingType::Unknown;
1194     return It->second;
1195   };
1196   computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols,
1197                                   isPrevailing, Conf.OptLevel > 0);
1198 
1199   // Setup output file to emit statistics.
1200   auto StatsFileOrErr = setupStatsFile(Conf.StatsFile);
1201   if (!StatsFileOrErr)
1202     return StatsFileOrErr.takeError();
1203   std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
1204 
1205   // TODO: Ideally this would be controlled automatically by detecting that we
1206   // are linking with an allocator that supports these interfaces, rather than
1207   // an internal option (which would still be needed for tests, however). For
1208   // example, if the library exported a symbol like __malloc_hot_cold the linker
1209   // could recognize that and set a flag in the lto::Config.
1210   if (SupportsHotColdNew)
1211     ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
1212 
1213   Error Result = runRegularLTO(AddStream);
1214   if (!Result)
1215     // This will reset the GlobalResolutions optional once done with it to
1216     // reduce peak memory before importing.
1217     Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
1218 
1219   if (StatsFile)
1220     PrintStatisticsJSON(StatsFile->os());
1221 
1222   return Result;
1223 }
1224 
updateMemProfAttributes(Module & Mod,const ModuleSummaryIndex & Index)1225 void lto::updateMemProfAttributes(Module &Mod,
1226                                   const ModuleSummaryIndex &Index) {
1227   if (Index.withSupportsHotColdNew())
1228     return;
1229 
1230   // The profile matcher applies hotness attributes directly for allocations,
1231   // and those will cause us to generate calls to the hot/cold interfaces
1232   // unconditionally. If supports-hot-cold-new was not enabled in the LTO
1233   // link then assume we don't want these calls (e.g. not linking with
1234   // the appropriate library, or otherwise trying to disable this behavior).
1235   for (auto &F : Mod) {
1236     for (auto &BB : F) {
1237       for (auto &I : BB) {
1238         auto *CI = dyn_cast<CallBase>(&I);
1239         if (!CI)
1240           continue;
1241         if (CI->hasFnAttr("memprof"))
1242           CI->removeFnAttr("memprof");
1243         // Strip off all memprof metadata as it is no longer needed.
1244         // Importantly, this avoids the addition of new memprof attributes
1245         // after inlining propagation.
1246         // TODO: If we support additional types of MemProf metadata beyond hot
1247         // and cold, we will need to update the metadata based on the allocator
1248         // APIs supported instead of completely stripping all.
1249         CI->setMetadata(LLVMContext::MD_memprof, nullptr);
1250         CI->setMetadata(LLVMContext::MD_callsite, nullptr);
1251       }
1252     }
1253   }
1254 }
1255 
runRegularLTO(AddStreamFn AddStream)1256 Error LTO::runRegularLTO(AddStreamFn AddStream) {
1257   // Setup optimization remarks.
1258   auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
1259       RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename,
1260       Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness,
1261       Conf.RemarksHotnessThreshold);
1262   LLVM_DEBUG(dbgs() << "Running regular LTO\n");
1263   if (!DiagFileOrErr)
1264     return DiagFileOrErr.takeError();
1265   DiagnosticOutputFile = std::move(*DiagFileOrErr);
1266 
1267   // Finalize linking of regular LTO modules containing summaries now that
1268   // we have computed liveness information.
1269   for (auto &M : RegularLTO.ModsWithSummaries)
1270     if (Error Err = linkRegularLTO(std::move(M),
1271                                    /*LivenessFromIndex=*/true))
1272       return Err;
1273 
1274   // Ensure we don't have inconsistently split LTO units with type tests.
1275   // FIXME: this checks both LTO and ThinLTO. It happens to work as we take
1276   // this path both cases but eventually this should be split into two and
1277   // do the ThinLTO checks in `runThinLTO`.
1278   if (Error Err = checkPartiallySplit())
1279     return Err;
1280 
1281   // Make sure commons have the right size/alignment: we kept the largest from
1282   // all the prevailing when adding the inputs, and we apply it here.
1283   const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
1284   for (auto &I : RegularLTO.Commons) {
1285     if (!I.second.Prevailing)
1286       // Don't do anything if no instance of this common was prevailing.
1287       continue;
1288     GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first);
1289     if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) {
1290       // Don't create a new global if the type is already correct, just make
1291       // sure the alignment is correct.
1292       OldGV->setAlignment(I.second.Alignment);
1293       continue;
1294     }
1295     ArrayType *Ty =
1296         ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size);
1297     auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
1298                                   GlobalValue::CommonLinkage,
1299                                   ConstantAggregateZero::get(Ty), "");
1300     GV->setAlignment(I.second.Alignment);
1301     if (OldGV) {
1302       OldGV->replaceAllUsesWith(GV);
1303       GV->takeName(OldGV);
1304       OldGV->eraseFromParent();
1305     } else {
1306       GV->setName(I.first);
1307     }
1308   }
1309 
1310   updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
1311 
1312   bool WholeProgramVisibilityEnabledInLTO =
1313       Conf.HasWholeProgramVisibility &&
1314       // If validation is enabled, upgrade visibility only when all vtables
1315       // have typeinfos.
1316       (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1317 
1318   // This returns true when the name is local or not defined. Locals are
1319   // expected to be handled separately.
1320   auto IsVisibleToRegularObj = [&](StringRef name) {
1321     auto It = GlobalResolutions->find(name);
1322     return (It == GlobalResolutions->end() ||
1323             It->second.VisibleOutsideSummary || !It->second.Prevailing);
1324   };
1325 
1326   // If allowed, upgrade public vcall visibility metadata to linkage unit
1327   // visibility before whole program devirtualization in the optimizer.
1328   updateVCallVisibilityInModule(
1329       *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO,
1330       DynamicExportSymbols, Conf.ValidateAllVtablesHaveTypeInfos,
1331       IsVisibleToRegularObj);
1332   updatePublicTypeTestCalls(*RegularLTO.CombinedModule,
1333                             WholeProgramVisibilityEnabledInLTO);
1334 
1335   if (Conf.PreOptModuleHook &&
1336       !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
1337     return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
1338 
1339   if (!Conf.CodeGenOnly) {
1340     for (const auto &R : *GlobalResolutions) {
1341       GlobalValue *GV =
1342           RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
1343       if (!R.second.isPrevailingIRSymbol())
1344         continue;
1345       if (R.second.Partition != 0 &&
1346           R.second.Partition != GlobalResolution::External)
1347         continue;
1348 
1349       // Ignore symbols defined in other partitions.
1350       // Also skip declarations, which are not allowed to have internal linkage.
1351       if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
1352         continue;
1353 
1354       // Symbols that are marked DLLImport or DLLExport should not be
1355       // internalized, as they are either externally visible or referencing
1356       // external symbols. Symbols that have AvailableExternally or Appending
1357       // linkage might be used by future passes and should be kept as is.
1358       // These linkages are seen in Unified regular LTO, because the process
1359       // of creating split LTO units introduces symbols with that linkage into
1360       // one of the created modules. Normally, only the ThinLTO backend would
1361       // compile this module, but Unified Regular LTO processes both
1362       // modules created by the splitting process as regular LTO modules.
1363       if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
1364           ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
1365            GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
1366         continue;
1367 
1368       GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
1369                                               : GlobalValue::UnnamedAddr::None);
1370       if (EnableLTOInternalization && R.second.Partition == 0)
1371         GV->setLinkage(GlobalValue::InternalLinkage);
1372     }
1373 
1374     if (Conf.PostInternalizeModuleHook &&
1375         !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
1376       return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
1377   }
1378 
1379   if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) {
1380     if (Error Err =
1381             backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel,
1382                     *RegularLTO.CombinedModule, ThinLTO.CombinedIndex))
1383       return Err;
1384   }
1385 
1386   return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
1387 }
1388 
getRuntimeLibcallSymbols(const Triple & TT)1389 SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) {
1390   RTLIB::RuntimeLibcallsInfo Libcalls(TT);
1391   SmallVector<const char *> LibcallSymbols;
1392   ArrayRef<RTLIB::LibcallImpl> LibcallImpls = Libcalls.getLibcallImpls();
1393   LibcallSymbols.reserve(LibcallImpls.size());
1394 
1395   for (RTLIB::LibcallImpl Impl : LibcallImpls) {
1396     if (Impl != RTLIB::Unsupported)
1397       LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl));
1398   }
1399 
1400   return LibcallSymbols;
1401 }
1402 
emitFiles(const FunctionImporter::ImportMapTy & ImportList,llvm::StringRef ModulePath,const std::string & NewModulePath) const1403 Error ThinBackendProc::emitFiles(
1404     const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1405     const std::string &NewModulePath) const {
1406   return emitFiles(ImportList, ModulePath, NewModulePath,
1407                    NewModulePath + ".thinlto.bc",
1408                    /*ImportsFiles=*/std::nullopt);
1409 }
1410 
emitFiles(const FunctionImporter::ImportMapTy & ImportList,llvm::StringRef ModulePath,const std::string & NewModulePath,StringRef SummaryPath,std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles) const1411 Error ThinBackendProc::emitFiles(
1412     const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath,
1413     const std::string &NewModulePath, StringRef SummaryPath,
1414     std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles)
1415     const {
1416   ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
1417   GVSummaryPtrSet DeclarationSummaries;
1418 
1419   std::error_code EC;
1420   gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
1421                                    ImportList, ModuleToSummariesForIndex,
1422                                    DeclarationSummaries);
1423 
1424   raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
1425   if (EC)
1426     return createFileError("cannot open " + Twine(SummaryPath), EC);
1427 
1428   writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex,
1429                    &DeclarationSummaries);
1430 
1431   if (ShouldEmitImportsFiles) {
1432     Error ImportsFilesError = EmitImportsFiles(
1433         ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex);
1434     if (ImportsFilesError)
1435       return ImportsFilesError;
1436   }
1437 
1438   // Optionally, store the imports files.
1439   if (ImportsFiles)
1440     processImportsFiles(
1441         ModulePath, ModuleToSummariesForIndex,
1442         [&](StringRef M) { ImportsFiles->get().push_back(M.str()); });
1443 
1444   return Error::success();
1445 }
1446 
1447 namespace {
1448 /// Base class for ThinLTO backends that perform code generation and insert the
1449 /// generated files back into the link.
1450 class CGThinBackend : public ThinBackendProc {
1451 protected:
1452   AddStreamFn AddStream;
1453   DenseSet<GlobalValue::GUID> CfiFunctionDefs;
1454   DenseSet<GlobalValue::GUID> CfiFunctionDecls;
1455   bool ShouldEmitIndexFiles;
1456 
1457 public:
CGThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,AddStreamFn AddStream,lto::IndexWriteCallback OnWrite,bool ShouldEmitIndexFiles,bool ShouldEmitImportsFiles,ThreadPoolStrategy ThinLTOParallelism)1458   CGThinBackend(
1459       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1460       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1461       AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
1462       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
1463       ThreadPoolStrategy ThinLTOParallelism)
1464       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1465                         OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1466         AddStream(std::move(AddStream)),
1467         ShouldEmitIndexFiles(ShouldEmitIndexFiles) {
1468     auto &Defs = CombinedIndex.cfiFunctionDefs();
1469     CfiFunctionDefs.insert_range(Defs.guids());
1470     auto &Decls = CombinedIndex.cfiFunctionDecls();
1471     CfiFunctionDecls.insert_range(Decls.guids());
1472   }
1473 };
1474 
1475 /// This backend performs code generation by scheduling a job to run on
1476 /// an in-process thread when invoked for each task.
1477 class InProcessThinBackend : public CGThinBackend {
1478 protected:
1479   FileCache Cache;
1480 
1481 public:
InProcessThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,ThreadPoolStrategy ThinLTOParallelism,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,AddStreamFn AddStream,FileCache Cache,lto::IndexWriteCallback OnWrite,bool ShouldEmitIndexFiles,bool ShouldEmitImportsFiles)1482   InProcessThinBackend(
1483       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1484       ThreadPoolStrategy ThinLTOParallelism,
1485       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1486       AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite,
1487       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles)
1488       : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1489                       AddStream, OnWrite, ShouldEmitIndexFiles,
1490                       ShouldEmitImportsFiles, ThinLTOParallelism),
1491         Cache(std::move(Cache)) {}
1492 
runThinLTOBackendThread(AddStreamFn AddStream,FileCache Cache,unsigned Task,BitcodeModule BM,ModuleSummaryIndex & CombinedIndex,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedGlobals,MapVector<StringRef,BitcodeModule> & ModuleMap)1493   virtual Error runThinLTOBackendThread(
1494       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1495       ModuleSummaryIndex &CombinedIndex,
1496       const FunctionImporter::ImportMapTy &ImportList,
1497       const FunctionImporter::ExportSetTy &ExportList,
1498       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1499       const GVSummaryMapTy &DefinedGlobals,
1500       MapVector<StringRef, BitcodeModule> &ModuleMap) {
1501     auto RunThinBackend = [&](AddStreamFn AddStream) {
1502       LTOLLVMContext BackendContext(Conf);
1503       Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
1504       if (!MOrErr)
1505         return MOrErr.takeError();
1506 
1507       return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex,
1508                          ImportList, DefinedGlobals, &ModuleMap,
1509                          Conf.CodeGenOnly);
1510     };
1511 
1512     auto ModuleID = BM.getModuleIdentifier();
1513 
1514     if (ShouldEmitIndexFiles) {
1515       if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str()))
1516         return E;
1517     }
1518 
1519     if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
1520         all_of(CombinedIndex.getModuleHash(ModuleID),
1521                [](uint32_t V) { return V == 0; }))
1522       // Cache disabled or no entry for this module in the combined index or
1523       // no module hash.
1524       return RunThinBackend(AddStream);
1525 
1526     // The module may be cached, this helps handling it.
1527     std::string Key = computeLTOCacheKey(
1528         Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1529         DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1530     Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1531     if (Error Err = CacheAddStreamOrErr.takeError())
1532       return Err;
1533     AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1534     if (CacheAddStream)
1535       return RunThinBackend(CacheAddStream);
1536 
1537     return Error::success();
1538   }
1539 
start(unsigned Task,BitcodeModule BM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,MapVector<StringRef,BitcodeModule> & ModuleMap)1540   Error start(
1541       unsigned Task, BitcodeModule BM,
1542       const FunctionImporter::ImportMapTy &ImportList,
1543       const FunctionImporter::ExportSetTy &ExportList,
1544       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1545       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1546     StringRef ModulePath = BM.getModuleIdentifier();
1547     assert(ModuleToDefinedGVSummaries.count(ModulePath));
1548     const GVSummaryMapTy &DefinedGlobals =
1549         ModuleToDefinedGVSummaries.find(ModulePath)->second;
1550     BackendThreadPool.async(
1551         [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1552             const FunctionImporter::ImportMapTy &ImportList,
1553             const FunctionImporter::ExportSetTy &ExportList,
1554             const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
1555                 &ResolvedODR,
1556             const GVSummaryMapTy &DefinedGlobals,
1557             MapVector<StringRef, BitcodeModule> &ModuleMap) {
1558           if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1559             timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
1560                                         "thin backend");
1561           Error E = runThinLTOBackendThread(
1562               AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
1563               ResolvedODR, DefinedGlobals, ModuleMap);
1564           if (E) {
1565             std::unique_lock<std::mutex> L(ErrMu);
1566             if (Err)
1567               Err = joinErrors(std::move(*Err), std::move(E));
1568             else
1569               Err = std::move(E);
1570           }
1571           if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
1572             timeTraceProfilerFinishThread();
1573         },
1574         BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
1575         std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));
1576 
1577     if (OnWrite)
1578       OnWrite(std::string(ModulePath));
1579     return Error::success();
1580   }
1581 };
1582 
1583 /// This backend is utilized in the first round of a two-codegen round process.
1584 /// It first saves optimized bitcode files to disk before the codegen process
1585 /// begins. After codegen, it stores the resulting object files in a scratch
1586 /// buffer. Note the codegen data stored in the scratch buffer will be extracted
1587 /// and merged in the subsequent step.
1588 class FirstRoundThinBackend : public InProcessThinBackend {
1589   AddStreamFn IRAddStream;
1590   FileCache IRCache;
1591 
1592 public:
FirstRoundThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,ThreadPoolStrategy ThinLTOParallelism,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,AddStreamFn CGAddStream,FileCache CGCache,AddStreamFn IRAddStream,FileCache IRCache)1593   FirstRoundThinBackend(
1594       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1595       ThreadPoolStrategy ThinLTOParallelism,
1596       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1597       AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream,
1598       FileCache IRCache)
1599       : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1600                              ModuleToDefinedGVSummaries, std::move(CGAddStream),
1601                              std::move(CGCache), /*OnWrite=*/nullptr,
1602                              /*ShouldEmitIndexFiles=*/false,
1603                              /*ShouldEmitImportsFiles=*/false),
1604         IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {}
1605 
runThinLTOBackendThread(AddStreamFn CGAddStream,FileCache CGCache,unsigned Task,BitcodeModule BM,ModuleSummaryIndex & CombinedIndex,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedGlobals,MapVector<StringRef,BitcodeModule> & ModuleMap)1606   Error runThinLTOBackendThread(
1607       AddStreamFn CGAddStream, FileCache CGCache, unsigned Task,
1608       BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
1609       const FunctionImporter::ImportMapTy &ImportList,
1610       const FunctionImporter::ExportSetTy &ExportList,
1611       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1612       const GVSummaryMapTy &DefinedGlobals,
1613       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1614     auto RunThinBackend = [&](AddStreamFn CGAddStream,
1615                               AddStreamFn IRAddStream) {
1616       LTOLLVMContext BackendContext(Conf);
1617       Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
1618       if (!MOrErr)
1619         return MOrErr.takeError();
1620 
1621       return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex,
1622                          ImportList, DefinedGlobals, &ModuleMap,
1623                          Conf.CodeGenOnly, IRAddStream);
1624     };
1625 
1626     auto ModuleID = BM.getModuleIdentifier();
1627     // Like InProcessThinBackend, we produce index files as needed for
1628     // FirstRoundThinBackend. However, these files are not generated for
1629     // SecondRoundThinBackend.
1630     if (ShouldEmitIndexFiles) {
1631       if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str()))
1632         return E;
1633     }
1634 
1635     assert((CGCache.isValid() == IRCache.isValid()) &&
1636            "Both caches for CG and IR should have matching availability");
1637     if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
1638         all_of(CombinedIndex.getModuleHash(ModuleID),
1639                [](uint32_t V) { return V == 0; }))
1640       // Cache disabled or no entry for this module in the combined index or
1641       // no module hash.
1642       return RunThinBackend(CGAddStream, IRAddStream);
1643 
1644     // Get CGKey for caching object in CGCache.
1645     std::string CGKey = computeLTOCacheKey(
1646         Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1647         DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1648     Expected<AddStreamFn> CacheCGAddStreamOrErr =
1649         CGCache(Task, CGKey, ModuleID);
1650     if (Error Err = CacheCGAddStreamOrErr.takeError())
1651       return Err;
1652     AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr;
1653 
1654     // Get IRKey for caching (optimized) IR in IRCache with an extra ID.
1655     std::string IRKey = recomputeLTOCacheKey(CGKey, /*ExtraID=*/"IR");
1656     Expected<AddStreamFn> CacheIRAddStreamOrErr =
1657         IRCache(Task, IRKey, ModuleID);
1658     if (Error Err = CacheIRAddStreamOrErr.takeError())
1659       return Err;
1660     AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr;
1661 
1662     // Ideally, both CG and IR caching should be synchronized. However, in
1663     // practice, their availability may differ due to different expiration
1664     // times. Therefore, if either cache is missing, the backend process is
1665     // triggered.
1666     if (CacheCGAddStream || CacheIRAddStream) {
1667       LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for "
1668                         << BM.getModuleIdentifier() << "\n");
1669       return RunThinBackend(CacheCGAddStream ? CacheCGAddStream : CGAddStream,
1670                             CacheIRAddStream ? CacheIRAddStream : IRAddStream);
1671     }
1672 
1673     return Error::success();
1674   }
1675 };
1676 
1677 /// This backend operates in the second round of a two-codegen round process.
1678 /// It starts by reading the optimized bitcode files that were saved during the
1679 /// first round. The backend then executes the codegen only to further optimize
1680 /// the code, utilizing the codegen data merged from the first round. Finally,
1681 /// it writes the resulting object files as usual.
1682 class SecondRoundThinBackend : public InProcessThinBackend {
1683   std::unique_ptr<SmallVector<StringRef>> IRFiles;
1684   stable_hash CombinedCGDataHash;
1685 
1686 public:
SecondRoundThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,ThreadPoolStrategy ThinLTOParallelism,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,AddStreamFn AddStream,FileCache Cache,std::unique_ptr<SmallVector<StringRef>> IRFiles,stable_hash CombinedCGDataHash)1687   SecondRoundThinBackend(
1688       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1689       ThreadPoolStrategy ThinLTOParallelism,
1690       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1691       AddStreamFn AddStream, FileCache Cache,
1692       std::unique_ptr<SmallVector<StringRef>> IRFiles,
1693       stable_hash CombinedCGDataHash)
1694       : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism,
1695                              ModuleToDefinedGVSummaries, std::move(AddStream),
1696                              std::move(Cache),
1697                              /*OnWrite=*/nullptr,
1698                              /*ShouldEmitIndexFiles=*/false,
1699                              /*ShouldEmitImportsFiles=*/false),
1700         IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {}
1701 
runThinLTOBackendThread(AddStreamFn AddStream,FileCache Cache,unsigned Task,BitcodeModule BM,ModuleSummaryIndex & CombinedIndex,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedGlobals,MapVector<StringRef,BitcodeModule> & ModuleMap)1702   virtual Error runThinLTOBackendThread(
1703       AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
1704       ModuleSummaryIndex &CombinedIndex,
1705       const FunctionImporter::ImportMapTy &ImportList,
1706       const FunctionImporter::ExportSetTy &ExportList,
1707       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1708       const GVSummaryMapTy &DefinedGlobals,
1709       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1710     auto RunThinBackend = [&](AddStreamFn AddStream) {
1711       LTOLLVMContext BackendContext(Conf);
1712       std::unique_ptr<Module> LoadedModule =
1713           cgdata::loadModuleForTwoRounds(BM, Task, BackendContext, *IRFiles);
1714 
1715       return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex,
1716                          ImportList, DefinedGlobals, &ModuleMap,
1717                          /*CodeGenOnly=*/true);
1718     };
1719 
1720     auto ModuleID = BM.getModuleIdentifier();
1721     if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
1722         all_of(CombinedIndex.getModuleHash(ModuleID),
1723                [](uint32_t V) { return V == 0; }))
1724       // Cache disabled or no entry for this module in the combined index or
1725       // no module hash.
1726       return RunThinBackend(AddStream);
1727 
1728     // Get Key for caching the final object file in Cache with the combined
1729     // CGData hash.
1730     std::string Key = computeLTOCacheKey(
1731         Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR,
1732         DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls);
1733     Key = recomputeLTOCacheKey(Key,
1734                                /*ExtraID=*/std::to_string(CombinedCGDataHash));
1735     Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID);
1736     if (Error Err = CacheAddStreamOrErr.takeError())
1737       return Err;
1738     AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
1739 
1740     if (CacheAddStream) {
1741       LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for "
1742                         << BM.getModuleIdentifier() << "\n");
1743       return RunThinBackend(CacheAddStream);
1744     }
1745 
1746     return Error::success();
1747   }
1748 };
1749 } // end anonymous namespace
1750 
createInProcessThinBackend(ThreadPoolStrategy Parallelism,lto::IndexWriteCallback OnWrite,bool ShouldEmitIndexFiles,bool ShouldEmitImportsFiles)1751 ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
1752                                             lto::IndexWriteCallback OnWrite,
1753                                             bool ShouldEmitIndexFiles,
1754                                             bool ShouldEmitImportsFiles) {
1755   auto Func =
1756       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1757           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1758           AddStreamFn AddStream, FileCache Cache) {
1759         return std::make_unique<InProcessThinBackend>(
1760             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
1761             AddStream, Cache, OnWrite, ShouldEmitIndexFiles,
1762             ShouldEmitImportsFiles);
1763       };
1764   return ThinBackend(Func, Parallelism);
1765 }
1766 
getThinLTODefaultCPU(const Triple & TheTriple)1767 StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) {
1768   if (!TheTriple.isOSDarwin())
1769     return "";
1770   if (TheTriple.getArch() == Triple::x86_64)
1771     return "core2";
1772   if (TheTriple.getArch() == Triple::x86)
1773     return "yonah";
1774   if (TheTriple.isArm64e())
1775     return "apple-a12";
1776   if (TheTriple.getArch() == Triple::aarch64 ||
1777       TheTriple.getArch() == Triple::aarch64_32)
1778     return "cyclone";
1779   return "";
1780 }
1781 
1782 // Given the original \p Path to an output file, replace any path
1783 // prefix matching \p OldPrefix with \p NewPrefix. Also, create the
1784 // resulting directory if it does not yet exist.
getThinLTOOutputFile(StringRef Path,StringRef OldPrefix,StringRef NewPrefix)1785 std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
1786                                       StringRef NewPrefix) {
1787   if (OldPrefix.empty() && NewPrefix.empty())
1788     return std::string(Path);
1789   SmallString<128> NewPath(Path);
1790   llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix);
1791   StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str());
1792   if (!ParentPath.empty()) {
1793     // Make sure the new directory exists, creating it if necessary.
1794     if (std::error_code EC = llvm::sys::fs::create_directories(ParentPath))
1795       llvm::errs() << "warning: could not create directory '" << ParentPath
1796                    << "': " << EC.message() << '\n';
1797   }
1798   return std::string(NewPath);
1799 }
1800 
1801 namespace {
1802 class WriteIndexesThinBackend : public ThinBackendProc {
1803   std::string OldPrefix, NewPrefix, NativeObjectPrefix;
1804   raw_fd_ostream *LinkedObjectsFile;
1805 
1806 public:
WriteIndexesThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,ThreadPoolStrategy ThinLTOParallelism,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,std::string OldPrefix,std::string NewPrefix,std::string NativeObjectPrefix,bool ShouldEmitImportsFiles,raw_fd_ostream * LinkedObjectsFile,lto::IndexWriteCallback OnWrite)1807   WriteIndexesThinBackend(
1808       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1809       ThreadPoolStrategy ThinLTOParallelism,
1810       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1811       std::string OldPrefix, std::string NewPrefix,
1812       std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
1813       raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
1814       : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
1815                         OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism),
1816         OldPrefix(OldPrefix), NewPrefix(NewPrefix),
1817         NativeObjectPrefix(NativeObjectPrefix),
1818         LinkedObjectsFile(LinkedObjectsFile) {}
1819 
start(unsigned Task,BitcodeModule BM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,MapVector<StringRef,BitcodeModule> & ModuleMap)1820   Error start(
1821       unsigned Task, BitcodeModule BM,
1822       const FunctionImporter::ImportMapTy &ImportList,
1823       const FunctionImporter::ExportSetTy &ExportList,
1824       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
1825       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1826     StringRef ModulePath = BM.getModuleIdentifier();
1827 
1828     // The contents of this file may be used as input to a native link, and must
1829     // therefore contain the processed modules in a determinstic order that
1830     // match the order they are provided on the command line. For that reason,
1831     // we cannot include this in the asynchronously executed lambda below.
1832     if (LinkedObjectsFile) {
1833       std::string ObjectPrefix =
1834           NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
1835       std::string LinkedObjectsFilePath =
1836           getThinLTOOutputFile(ModulePath, OldPrefix, ObjectPrefix);
1837       *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
1838     }
1839 
1840     BackendThreadPool.async(
1841         [this](const StringRef ModulePath,
1842                const FunctionImporter::ImportMapTy &ImportList,
1843                const std::string &OldPrefix, const std::string &NewPrefix) {
1844           std::string NewModulePath =
1845               getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
1846           auto E = emitFiles(ImportList, ModulePath, NewModulePath);
1847           if (E) {
1848             std::unique_lock<std::mutex> L(ErrMu);
1849             if (Err)
1850               Err = joinErrors(std::move(*Err), std::move(E));
1851             else
1852               Err = std::move(E);
1853             return;
1854           }
1855         },
1856         ModulePath, ImportList, OldPrefix, NewPrefix);
1857 
1858     if (OnWrite)
1859       OnWrite(std::string(ModulePath));
1860     return Error::success();
1861   }
1862 
isSensitiveToInputOrder()1863   bool isSensitiveToInputOrder() override {
1864     // The order which modules are written to LinkedObjectsFile should be
1865     // deterministic and match the order they are passed on the command line.
1866     return true;
1867   }
1868 };
1869 } // end anonymous namespace
1870 
createWriteIndexesThinBackend(ThreadPoolStrategy Parallelism,std::string OldPrefix,std::string NewPrefix,std::string NativeObjectPrefix,bool ShouldEmitImportsFiles,raw_fd_ostream * LinkedObjectsFile,IndexWriteCallback OnWrite)1871 ThinBackend lto::createWriteIndexesThinBackend(
1872     ThreadPoolStrategy Parallelism, std::string OldPrefix,
1873     std::string NewPrefix, std::string NativeObjectPrefix,
1874     bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile,
1875     IndexWriteCallback OnWrite) {
1876   auto Func =
1877       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
1878           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1879           AddStreamFn AddStream, FileCache Cache) {
1880         return std::make_unique<WriteIndexesThinBackend>(
1881             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
1882             OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles,
1883             LinkedObjectsFile, OnWrite);
1884       };
1885   return ThinBackend(Func, Parallelism);
1886 }
1887 
runThinLTO(AddStreamFn AddStream,FileCache Cache,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols)1888 Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
1889                       const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1890   LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
1891   ThinLTO.CombinedIndex.releaseTemporaryMemory();
1892   timeTraceProfilerBegin("ThinLink", StringRef(""));
1893   auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
1894     if (llvm::timeTraceProfilerEnabled())
1895       llvm::timeTraceProfilerEnd();
1896   });
1897   if (ThinLTO.ModuleMap.empty())
1898     return Error::success();
1899 
1900   if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) {
1901     llvm::errs() << "warning: [ThinLTO] No module compiled\n";
1902     return Error::success();
1903   }
1904 
1905   if (Conf.CombinedIndexHook &&
1906       !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols))
1907     return Error::success();
1908 
1909   // Collect for each module the list of function it defines (GUID ->
1910   // Summary).
1911   DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries(
1912       ThinLTO.ModuleMap.size());
1913   ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
1914       ModuleToDefinedGVSummaries);
1915   // Create entries for any modules that didn't have any GV summaries
1916   // (either they didn't have any GVs to start with, or we suppressed
1917   // generation of the summaries because they e.g. had inline assembly
1918   // uses that couldn't be promoted/renamed on export). This is so
1919   // InProcessThinBackend::start can still launch a backend thread, which
1920   // is passed the map of summaries for the module, without any special
1921   // handling for this case.
1922   for (auto &Mod : ThinLTO.ModuleMap)
1923     if (!ModuleToDefinedGVSummaries.count(Mod.first))
1924       ModuleToDefinedGVSummaries.try_emplace(Mod.first);
1925 
1926   FunctionImporter::ImportListsTy ImportLists(ThinLTO.ModuleMap.size());
1927   DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists(
1928       ThinLTO.ModuleMap.size());
1929   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
1930 
1931   if (DumpThinCGSCCs)
1932     ThinLTO.CombinedIndex.dumpSCCs(outs());
1933 
1934   std::set<GlobalValue::GUID> ExportedGUIDs;
1935 
1936   bool WholeProgramVisibilityEnabledInLTO =
1937       Conf.HasWholeProgramVisibility &&
1938       // If validation is enabled, upgrade visibility only when all vtables
1939       // have typeinfos.
1940       (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos);
1941   if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO))
1942     ThinLTO.CombinedIndex.setWithWholeProgramVisibility();
1943 
1944   // If we're validating, get the vtable symbols that should not be
1945   // upgraded because they correspond to typeIDs outside of index-based
1946   // WPD info.
1947   DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols;
1948   if (WholeProgramVisibilityEnabledInLTO &&
1949       Conf.ValidateAllVtablesHaveTypeInfos) {
1950     // This returns true when the name is local or not defined. Locals are
1951     // expected to be handled separately.
1952     auto IsVisibleToRegularObj = [&](StringRef name) {
1953       auto It = GlobalResolutions->find(name);
1954       return (It == GlobalResolutions->end() ||
1955               It->second.VisibleOutsideSummary || !It->second.Prevailing);
1956     };
1957 
1958     getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex,
1959                                       VisibleToRegularObjSymbols,
1960                                       IsVisibleToRegularObj);
1961   }
1962 
1963   // If allowed, upgrade public vcall visibility to linkage unit visibility in
1964   // the summaries before whole program devirtualization below.
1965   updateVCallVisibilityInIndex(
1966       ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO,
1967       DynamicExportSymbols, VisibleToRegularObjSymbols);
1968 
1969   // Perform index-based WPD. This will return immediately if there are
1970   // no index entries in the typeIdMetadata map (e.g. if we are instead
1971   // performing IR-based WPD in hybrid regular/thin LTO mode).
1972   std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
1973   runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
1974                                LocalWPDTargetsMap);
1975 
1976   auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
1977     return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
1978   };
1979   if (EnableMemProfContextDisambiguation) {
1980     MemProfContextDisambiguation ContextDisambiguation;
1981     ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing);
1982   }
1983 
1984   // Figure out which symbols need to be internalized. This also needs to happen
1985   // at -O0 because summary-based DCE is implemented using internalization, and
1986   // we must apply DCE consistently with the full LTO module in order to avoid
1987   // undefined references during the final link.
1988   for (auto &Res : *GlobalResolutions) {
1989     // If the symbol does not have external references or it is not prevailing,
1990     // then not need to mark it as exported from a ThinLTO partition.
1991     if (Res.second.Partition != GlobalResolution::External ||
1992         !Res.second.isPrevailingIRSymbol())
1993       continue;
1994     auto GUID = GlobalValue::getGUIDAssumingExternalLinkage(
1995         GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
1996     // Mark exported unless index-based analysis determined it to be dead.
1997     if (ThinLTO.CombinedIndex.isGUIDLive(GUID))
1998       ExportedGUIDs.insert(GUID);
1999   }
2000 
2001   // Reset the GlobalResolutions to deallocate the associated memory, as there
2002   // are no further accesses. We specifically want to do this before computing
2003   // cross module importing, which adds to peak memory via the computed import
2004   // and export lists.
2005   releaseGlobalResolutionsMemory();
2006 
2007   if (Conf.OptLevel > 0)
2008     ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2009                              isPrevailing, ImportLists, ExportLists);
2010 
2011   // Any functions referenced by the jump table in the regular LTO object must
2012   // be exported.
2013   auto &Defs = ThinLTO.CombinedIndex.cfiFunctionDefs();
2014   ExportedGUIDs.insert(Defs.guid_begin(), Defs.guid_end());
2015   auto &Decls = ThinLTO.CombinedIndex.cfiFunctionDecls();
2016   ExportedGUIDs.insert(Decls.guid_begin(), Decls.guid_end());
2017 
2018   auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
2019     const auto &ExportList = ExportLists.find(ModuleIdentifier);
2020     return (ExportList != ExportLists.end() && ExportList->second.count(VI)) ||
2021            ExportedGUIDs.count(VI.getGUID());
2022   };
2023 
2024   // Update local devirtualized targets that were exported by cross-module
2025   // importing or by other devirtualizations marked in the ExportedGUIDs set.
2026   updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported,
2027                            LocalWPDTargetsMap);
2028 
2029   thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported,
2030                                       isPrevailing);
2031 
2032   auto recordNewLinkage = [&](StringRef ModuleIdentifier,
2033                               GlobalValue::GUID GUID,
2034                               GlobalValue::LinkageTypes NewLinkage) {
2035     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
2036   };
2037   thinLTOResolvePrevailingInIndex(Conf, ThinLTO.CombinedIndex, isPrevailing,
2038                                   recordNewLinkage, GUIDPreservedSymbols);
2039 
2040   thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex, isPrevailing);
2041 
2042   generateParamAccessSummary(ThinLTO.CombinedIndex);
2043 
2044   if (llvm::timeTraceProfilerEnabled())
2045     llvm::timeTraceProfilerEnd();
2046 
2047   TimeTraceScopeExit.release();
2048 
2049   auto &ModuleMap =
2050       ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
2051 
2052   auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error {
2053     auto ProcessOneModule = [&](int I) -> Error {
2054       auto &Mod = *(ModuleMap.begin() + I);
2055       // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for
2056       // combined module and parallel code generation partitions.
2057       return BackendProcess->start(
2058           RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second,
2059           ImportLists[Mod.first], ExportLists[Mod.first],
2060           ResolvedODR[Mod.first], ThinLTO.ModuleMap);
2061     };
2062 
2063     BackendProcess->setup(ModuleMap.size(),
2064                           RegularLTO.ParallelCodeGenParallelismLevel,
2065                           RegularLTO.CombinedModule->getTargetTriple());
2066 
2067     if (BackendProcess->getThreadCount() == 1 ||
2068         BackendProcess->isSensitiveToInputOrder()) {
2069       // Process the modules in the order they were provided on the
2070       // command-line. It is important for this codepath to be used for
2071       // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
2072       // ThinLTO objects in the same order as the inputs, which otherwise would
2073       // affect the final link order.
2074       for (int I = 0, E = ModuleMap.size(); I != E; ++I)
2075         if (Error E = ProcessOneModule(I))
2076           return E;
2077     } else {
2078       // When executing in parallel, process largest bitsize modules first to
2079       // improve parallelism, and avoid starving the thread pool near the end.
2080       // This saves about 15 sec on a 36-core machine while link `clang.exe`
2081       // (out of 100 sec).
2082       std::vector<BitcodeModule *> ModulesVec;
2083       ModulesVec.reserve(ModuleMap.size());
2084       for (auto &Mod : ModuleMap)
2085         ModulesVec.push_back(&Mod.second);
2086       for (int I : generateModulesOrdering(ModulesVec))
2087         if (Error E = ProcessOneModule(I))
2088           return E;
2089     }
2090     return BackendProcess->wait();
2091   };
2092 
2093   if (!CodeGenDataThinLTOTwoRounds) {
2094     std::unique_ptr<ThinBackendProc> BackendProc =
2095         ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
2096                         AddStream, Cache);
2097     return RunBackends(BackendProc.get());
2098   }
2099 
2100   // Perform two rounds of code generation for ThinLTO:
2101   // 1. First round: Perform optimization and code generation, outputting to
2102   // temporary scratch objects.
2103   // 2. Merge code generation data extracted from the temporary scratch objects.
2104   // 3. Second round: Execute code generation again using the merged data.
2105   LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n");
2106 
2107   unsigned MaxTasks = getMaxTasks();
2108   auto Parallelism = ThinLTO.Backend.getParallelism();
2109   // Set up two additional streams and caches for storing temporary scratch
2110   // objects and optimized IRs, using the same cache directory as the original.
2111   cgdata::StreamCacheData CG(MaxTasks, Cache, "CG"), IR(MaxTasks, Cache, "IR");
2112 
2113   // First round: Execute optimization and code generation, outputting to
2114   // temporary scratch objects. Serialize the optimized IRs before initiating
2115   // code generation.
2116   LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n");
2117   auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>(
2118       Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
2119       CG.AddStream, CG.Cache, IR.AddStream, IR.Cache);
2120   if (Error E = RunBackends(FirstRoundLTO.get()))
2121     return E;
2122 
2123   LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n");
2124   auto CombinedHashOrErr = cgdata::mergeCodeGenData(*CG.getResult());
2125   if (Error E = CombinedHashOrErr.takeError())
2126     return E;
2127   auto CombinedHash = *CombinedHashOrErr;
2128   LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n");
2129 
2130   // Second round: Read the optimized IRs and execute code generation using the
2131   // merged data.
2132   LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n");
2133   auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>(
2134       Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
2135       AddStream, Cache, IR.getResult(), CombinedHash);
2136   return RunBackends(SecondRoundLTO.get());
2137 }
2138 
setupLLVMOptimizationRemarks(LLVMContext & Context,StringRef RemarksFilename,StringRef RemarksPasses,StringRef RemarksFormat,bool RemarksWithHotness,std::optional<uint64_t> RemarksHotnessThreshold,int Count)2139 Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks(
2140     LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
2141     StringRef RemarksFormat, bool RemarksWithHotness,
2142     std::optional<uint64_t> RemarksHotnessThreshold, int Count) {
2143   std::string Filename = std::string(RemarksFilename);
2144   // For ThinLTO, file.opt.<format> becomes
2145   // file.opt.<format>.thin.<num>.<format>.
2146   if (!Filename.empty() && Count != -1)
2147     Filename =
2148         (Twine(Filename) + ".thin." + llvm::utostr(Count) + "." + RemarksFormat)
2149             .str();
2150 
2151   auto ResultOrErr = llvm::setupLLVMOptimizationRemarks(
2152       Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness,
2153       RemarksHotnessThreshold);
2154   if (Error E = ResultOrErr.takeError())
2155     return std::move(E);
2156 
2157   if (*ResultOrErr)
2158     (*ResultOrErr)->keep();
2159 
2160   return ResultOrErr;
2161 }
2162 
2163 Expected<std::unique_ptr<ToolOutputFile>>
setupStatsFile(StringRef StatsFilename)2164 lto::setupStatsFile(StringRef StatsFilename) {
2165   // Setup output file to emit statistics.
2166   if (StatsFilename.empty())
2167     return nullptr;
2168 
2169   llvm::EnableStatistics(false);
2170   std::error_code EC;
2171   auto StatsFile =
2172       std::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::OF_None);
2173   if (EC)
2174     return errorCodeToError(EC);
2175 
2176   StatsFile->keep();
2177   return std::move(StatsFile);
2178 }
2179 
2180 // Compute the ordering we will process the inputs: the rough heuristic here
2181 // is to sort them per size so that the largest module get schedule as soon as
2182 // possible. This is purely a compile-time optimization.
generateModulesOrdering(ArrayRef<BitcodeModule * > R)2183 std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
2184   auto Seq = llvm::seq<int>(0, R.size());
2185   std::vector<int> ModulesOrdering(Seq.begin(), Seq.end());
2186   llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
2187     auto LSize = R[LeftIndex]->getBuffer().size();
2188     auto RSize = R[RightIndex]->getBuffer().size();
2189     return LSize > RSize;
2190   });
2191   return ModulesOrdering;
2192 }
2193 
2194 namespace {
2195 /// This out-of-process backend does not perform code generation when invoked
2196 /// for each task. Instead, it generates the necessary information (e.g., the
2197 /// summary index shard, import list, etc.) to enable code generation to be
2198 /// performed externally, similar to WriteIndexesThinBackend. The backend's
2199 /// `wait` function then invokes an external distributor process to carry out
2200 /// the backend compilations.
2201 class OutOfProcessThinBackend : public CGThinBackend {
2202   using SString = SmallString<128>;
2203 
2204   BumpPtrAllocator Alloc;
2205   StringSaver Saver{Alloc};
2206 
2207   SString LinkerOutputFile;
2208 
2209   SString DistributorPath;
2210   ArrayRef<StringRef> DistributorArgs;
2211 
2212   SString RemoteCompiler;
2213   ArrayRef<StringRef> RemoteCompilerArgs;
2214 
2215   bool SaveTemps;
2216 
2217   SmallVector<StringRef, 0> CodegenOptions;
2218   DenseSet<StringRef> CommonInputs;
2219 
2220   // Information specific to individual backend compilation job.
2221   struct Job {
2222     unsigned Task;
2223     StringRef ModuleID;
2224     StringRef NativeObjectPath;
2225     StringRef SummaryIndexPath;
2226     ImportsFilesContainer ImportsFiles;
2227   };
2228   // The set of backend compilations jobs.
2229   SmallVector<Job> Jobs;
2230 
2231   // A unique string to identify the current link.
2232   SmallString<8> UID;
2233 
2234   // The offset to the first ThinLTO task.
2235   unsigned ThinLTOTaskOffset;
2236 
2237   // The target triple to supply for backend compilations.
2238   llvm::Triple Triple;
2239 
2240 public:
OutOfProcessThinBackend(const Config & Conf,ModuleSummaryIndex & CombinedIndex,ThreadPoolStrategy ThinLTOParallelism,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,AddStreamFn AddStream,lto::IndexWriteCallback OnWrite,bool ShouldEmitIndexFiles,bool ShouldEmitImportsFiles,StringRef LinkerOutputFile,StringRef Distributor,ArrayRef<StringRef> DistributorArgs,StringRef RemoteCompiler,ArrayRef<StringRef> RemoteCompilerArgs,bool SaveTemps)2241   OutOfProcessThinBackend(
2242       const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2243       ThreadPoolStrategy ThinLTOParallelism,
2244       const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2245       AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
2246       bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2247       StringRef LinkerOutputFile, StringRef Distributor,
2248       ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2249       ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps)
2250       : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
2251                       AddStream, OnWrite, ShouldEmitIndexFiles,
2252                       ShouldEmitImportsFiles, ThinLTOParallelism),
2253         LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor),
2254         DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler),
2255         RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {}
2256 
setup(unsigned ThinLTONumTasks,unsigned ThinLTOTaskOffset,llvm::Triple Triple)2257   virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset,
2258                      llvm::Triple Triple) override {
2259     UID = itostr(sys::Process::getProcessId());
2260     Jobs.resize((size_t)ThinLTONumTasks);
2261     this->ThinLTOTaskOffset = ThinLTOTaskOffset;
2262     this->Triple = Triple;
2263   }
2264 
start(unsigned Task,BitcodeModule BM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,MapVector<StringRef,BitcodeModule> & ModuleMap)2265   Error start(
2266       unsigned Task, BitcodeModule BM,
2267       const FunctionImporter::ImportMapTy &ImportList,
2268       const FunctionImporter::ExportSetTy &ExportList,
2269       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
2270       MapVector<StringRef, BitcodeModule> &ModuleMap) override {
2271 
2272     StringRef ModulePath = BM.getModuleIdentifier();
2273 
2274     SString ObjFilePath = sys::path::parent_path(LinkerOutputFile);
2275     sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." +
2276                                        itostr(Task) + "." + UID + ".native.o");
2277 
2278     Job &J = Jobs[Task - ThinLTOTaskOffset];
2279     J = {
2280         Task,
2281         ModulePath,
2282         Saver.save(ObjFilePath.str()),
2283         Saver.save(ObjFilePath.str() + ".thinlto.bc"),
2284         {} // Filled in by emitFiles below.
2285     };
2286 
2287     assert(ModuleToDefinedGVSummaries.count(ModulePath));
2288 
2289     // The BackendThreadPool is only used here to write the sharded index files
2290     // (similar to WriteIndexesThinBackend).
2291     BackendThreadPool.async(
2292         [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
2293           if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(),
2294                                  J.SummaryIndexPath, J.ImportsFiles)) {
2295             std::unique_lock<std::mutex> L(ErrMu);
2296             if (Err)
2297               Err = joinErrors(std::move(*Err), std::move(E));
2298             else
2299               Err = std::move(E);
2300           }
2301         },
2302         std::ref(J), std::ref(ImportList));
2303 
2304     return Error::success();
2305   }
2306 
2307   // Derive a set of Clang options that will be shared/common for all DTLTO
2308   // backend compilations. We are intentionally minimal here as these options
2309   // must remain synchronized with the behavior of Clang. DTLTO does not support
2310   // all the features available with in-process LTO. More features are expected
2311   // to be added over time. Users can specify Clang options directly if a
2312   // feature is not supported. Note that explicitly specified options that imply
2313   // additional input or output file dependencies must be communicated to the
2314   // distribution system, potentially by setting extra options on the
2315   // distributor program.
buildCommonRemoteCompilerOptions()2316   void buildCommonRemoteCompilerOptions() {
2317     const lto::Config &C = Conf;
2318     auto &Ops = CodegenOptions;
2319 
2320     Ops.push_back(Saver.save("-O" + Twine(C.OptLevel)));
2321 
2322     if (C.Options.EmitAddrsig)
2323       Ops.push_back("-faddrsig");
2324     if (C.Options.FunctionSections)
2325       Ops.push_back("-ffunction-sections");
2326     if (C.Options.DataSections)
2327       Ops.push_back("-fdata-sections");
2328 
2329     if (C.RelocModel == Reloc::PIC_)
2330       // Clang doesn't have -fpic for all triples.
2331       if (!Triple.isOSBinFormatCOFF())
2332         Ops.push_back("-fpic");
2333 
2334     // Turn on/off warnings about profile cfg mismatch (default on)
2335     // --lto-pgo-warn-mismatch.
2336     if (!C.PGOWarnMismatch) {
2337       Ops.push_back("-mllvm");
2338       Ops.push_back("-no-pgo-warn-mismatch");
2339     }
2340 
2341     // Enable sample-based profile guided optimizations.
2342     // Sample profile file path --lto-sample-profile=<value>.
2343     if (!C.SampleProfile.empty()) {
2344       Ops.push_back(
2345           Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile)));
2346       CommonInputs.insert(C.SampleProfile);
2347     }
2348 
2349     // We don't know which of options will be used by Clang.
2350     Ops.push_back("-Wno-unused-command-line-argument");
2351 
2352     // Forward any supplied options.
2353     if (!RemoteCompilerArgs.empty())
2354       for (auto &a : RemoteCompilerArgs)
2355         Ops.push_back(a);
2356   }
2357 
2358   // Generates a JSON file describing the backend compilations, for the
2359   // distributor.
emitDistributorJson(StringRef DistributorJson)2360   bool emitDistributorJson(StringRef DistributorJson) {
2361     using json::Array;
2362     std::error_code EC;
2363     raw_fd_ostream OS(DistributorJson, EC);
2364     if (EC)
2365       return false;
2366 
2367     json::OStream JOS(OS);
2368     JOS.object([&]() {
2369       // Information common to all jobs.
2370       JOS.attributeObject("common", [&]() {
2371         JOS.attribute("linker_output", LinkerOutputFile);
2372 
2373         JOS.attributeArray("args", [&]() {
2374           JOS.value(RemoteCompiler);
2375 
2376           JOS.value("-c");
2377 
2378           JOS.value(Saver.save("--target=" + Triple.str()));
2379 
2380           for (const auto &A : CodegenOptions)
2381             JOS.value(A);
2382         });
2383 
2384         JOS.attribute("inputs", Array(CommonInputs));
2385       });
2386 
2387       // Per-compilation-job information.
2388       JOS.attributeArray("jobs", [&]() {
2389         for (const auto &J : Jobs) {
2390           assert(J.Task != 0);
2391 
2392           SmallVector<StringRef, 2> Inputs;
2393           SmallVector<StringRef, 1> Outputs;
2394 
2395           JOS.object([&]() {
2396             JOS.attributeArray("args", [&]() {
2397               JOS.value(J.ModuleID);
2398               Inputs.push_back(J.ModuleID);
2399 
2400               JOS.value(
2401                   Saver.save("-fthinlto-index=" + Twine(J.SummaryIndexPath)));
2402               Inputs.push_back(J.SummaryIndexPath);
2403 
2404               JOS.value("-o");
2405               JOS.value(J.NativeObjectPath);
2406               Outputs.push_back(J.NativeObjectPath);
2407             });
2408 
2409             // Add the bitcode files from which imports will be made. These do
2410             // not explicitly appear on the backend compilation command lines
2411             // but are recorded in the summary index shards.
2412             llvm::append_range(Inputs, J.ImportsFiles);
2413             JOS.attribute("inputs", Array(Inputs));
2414 
2415             JOS.attribute("outputs", Array(Outputs));
2416           });
2417         }
2418       });
2419     });
2420 
2421     return true;
2422   }
2423 
removeFile(StringRef FileName)2424   void removeFile(StringRef FileName) {
2425     std::error_code EC = sys::fs::remove(FileName, true);
2426     if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory))
2427       errs() << "warning: could not remove the file '" << FileName
2428              << "': " << EC.message() << "\n";
2429   }
2430 
wait()2431   Error wait() override {
2432     // Wait for the information on the required backend compilations to be
2433     // gathered.
2434     BackendThreadPool.wait();
2435     if (Err)
2436       return std::move(*Err);
2437 
2438     auto CleanPerJobFiles = llvm::make_scope_exit([&] {
2439       if (!SaveTemps)
2440         for (auto &Job : Jobs) {
2441           removeFile(Job.NativeObjectPath);
2442           if (!ShouldEmitIndexFiles)
2443             removeFile(Job.SummaryIndexPath);
2444         }
2445     });
2446 
2447     const StringRef BCError = "DTLTO backend compilation: ";
2448 
2449     buildCommonRemoteCompilerOptions();
2450 
2451     SString JsonFile = sys::path::parent_path(LinkerOutputFile);
2452     sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
2453                                     ".dist-file.json");
2454     if (!emitDistributorJson(JsonFile))
2455       return make_error<StringError>(
2456           BCError + "failed to generate distributor JSON script: " + JsonFile,
2457           inconvertibleErrorCode());
2458     auto CleanJson = llvm::make_scope_exit([&] {
2459       if (!SaveTemps)
2460         removeFile(JsonFile);
2461     });
2462 
2463     SmallVector<StringRef, 3> Args = {DistributorPath};
2464     llvm::append_range(Args, DistributorArgs);
2465     Args.push_back(JsonFile);
2466     std::string ErrMsg;
2467     if (sys::ExecuteAndWait(Args[0], Args,
2468                             /*Env=*/std::nullopt, /*Redirects=*/{},
2469                             /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
2470       return make_error<StringError>(
2471           BCError + "distributor execution failed" +
2472               (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2473           inconvertibleErrorCode());
2474     }
2475 
2476     for (auto &Job : Jobs) {
2477       // Load the native object from a file into a memory buffer
2478       // and store its contents in the output buffer.
2479       auto ObjFileMbOrErr =
2480           MemoryBuffer::getFile(Job.NativeObjectPath, /*IsText=*/false,
2481                                 /*RequiresNullTerminator=*/false);
2482       if (std::error_code EC = ObjFileMbOrErr.getError())
2483         return make_error<StringError>(
2484             BCError + "cannot open native object file: " +
2485                 Job.NativeObjectPath + ": " + EC.message(),
2486             inconvertibleErrorCode());
2487       auto StreamOrErr = AddStream(Job.Task, Job.ModuleID);
2488       if (Error Err = StreamOrErr.takeError())
2489         report_fatal_error(std::move(Err));
2490       auto &Stream = *StreamOrErr->get();
2491       *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer();
2492       if (Error Err = Stream.commit())
2493         report_fatal_error(std::move(Err));
2494     }
2495 
2496     return Error::success();
2497   }
2498 };
2499 } // end anonymous namespace
2500 
createOutOfProcessThinBackend(ThreadPoolStrategy Parallelism,lto::IndexWriteCallback OnWrite,bool ShouldEmitIndexFiles,bool ShouldEmitImportsFiles,StringRef LinkerOutputFile,StringRef Distributor,ArrayRef<StringRef> DistributorArgs,StringRef RemoteCompiler,ArrayRef<StringRef> RemoteCompilerArgs,bool SaveTemps)2501 ThinBackend lto::createOutOfProcessThinBackend(
2502     ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
2503     bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2504     StringRef LinkerOutputFile, StringRef Distributor,
2505     ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
2506     ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) {
2507   auto Func =
2508       [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2509           const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2510           AddStreamFn AddStream, FileCache /*Cache*/) {
2511         return std::make_unique<OutOfProcessThinBackend>(
2512             Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
2513             AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
2514             LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler,
2515             RemoteCompilerArgs, SaveTemps);
2516       };
2517   return ThinBackend(Func, Parallelism);
2518 }
2519