xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionImport.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "function-import"
58 
59 STATISTIC(NumImportedFunctionsThinLink,
60           "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62           "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64           "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66           "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69           "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73 
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76     "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77     cl::desc("Only import functions with less than N instructions"));
78 
79 static cl::opt<int> ImportCutoff(
80     "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81     cl::desc("Only import first N functions if N>=0 (default -1)"));
82 
83 static cl::opt<bool>
84     ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85                    cl::desc("Import functions with noinline attribute"));
86 
87 static cl::opt<float>
88     ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89                       cl::Hidden, cl::value_desc("x"),
90                       cl::desc("As we import functions, multiply the "
91                                "`import-instr-limit` threshold by this factor "
92                                "before processing newly imported functions"));
93 
94 static cl::opt<float> ImportHotInstrFactor(
95     "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96     cl::value_desc("x"),
97     cl::desc("As we import functions called from hot callsite, multiply the "
98              "`import-instr-limit` threshold by this factor "
99              "before processing newly imported functions"));
100 
101 static cl::opt<float> ImportHotMultiplier(
102     "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103     cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104 
105 static cl::opt<float> ImportCriticalMultiplier(
106     "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107     cl::value_desc("x"),
108     cl::desc(
109         "Multiply the `import-instr-limit` threshold for critical callsites"));
110 
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113     "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114     cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115 
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117                                   cl::desc("Print imported functions"));
118 
119 static cl::opt<bool> PrintImportFailures(
120     "print-import-failures", cl::init(false), cl::Hidden,
121     cl::desc("Print information for functions rejected for importing"));
122 
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124                                  cl::desc("Compute dead symbols"));
125 
126 static cl::opt<bool> EnableImportMetadata(
127     "enable-import-metadata", cl::init(false), cl::Hidden,
128     cl::desc("Enable import metadata like 'thinlto_src_module' and "
129              "'thinlto_src_file'"));
130 
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134     SummaryFile("summary-file",
135                 cl::desc("The summary file to use for function importing."));
136 
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140     ImportAllIndex("import-all-index",
141                    cl::desc("Import all external functions in index."));
142 
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150     "import-declaration", cl::init(false), cl::Hidden,
151     cl::desc("If true, import function declaration as fallback if the function "
152              "definition is not imported."));
153 
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163     "thinlto-workload-def",
164     cl::desc("Pass a workload definition. This is a file containing a JSON "
165              "dictionary. The keys are root functions, the values are lists of "
166              "functions to import in the module defining the root. It is "
167              "assumed -funique-internal-linkage-names was used, to ensure "
168              "local linkage functions have unique names. For example: \n"
169              "{\n"
170              "  \"rootFunction_1\": [\"function_to_import_1\", "
171              "\"function_to_import_2\"], \n"
172              "  \"rootFunction_2\": [\"function_to_import_3\", "
173              "\"function_to_import_4\"] \n"
174              "}"),
175     cl::Hidden);
176 
177 namespace llvm {
178 extern cl::opt<bool> EnableMemProfContextDisambiguation;
179 }
180 
181 // Load lazily a module from \p FileName in \p Context.
loadFile(const std::string & FileName,LLVMContext & Context)182 static std::unique_ptr<Module> loadFile(const std::string &FileName,
183                                         LLVMContext &Context) {
184   SMDiagnostic Err;
185   LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
186   // Metadata isn't loaded until functions are imported, to minimize
187   // the memory overhead.
188   std::unique_ptr<Module> Result =
189       getLazyIRFileModule(FileName, Err, Context,
190                           /* ShouldLazyLoadMetadata = */ true);
191   if (!Result) {
192     Err.print("function-import", errs());
193     report_fatal_error("Abort");
194   }
195 
196   return Result;
197 }
198 
199 /// Given a list of possible callee implementation for a call site, qualify the
200 /// legality of importing each. The return is a range of pairs. Each pair
201 /// corresponds to a candidate. The first value is the ImportFailureReason for
202 /// that candidate, the second is the candidate.
qualifyCalleeCandidates(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,StringRef CallerModulePath)203 static auto qualifyCalleeCandidates(
204     const ModuleSummaryIndex &Index,
205     ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
206     StringRef CallerModulePath) {
207   return llvm::map_range(
208       CalleeSummaryList,
209       [&Index, CalleeSummaryList,
210        CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
211           -> std::pair<FunctionImporter::ImportFailureReason,
212                        const GlobalValueSummary *> {
213         auto *GVSummary = SummaryPtr.get();
214         if (!Index.isGlobalValueLive(GVSummary))
215           return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
216 
217         if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
218           return {FunctionImporter::ImportFailureReason::InterposableLinkage,
219                   GVSummary};
220 
221         auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
222 
223         // Ignore any callees that aren't actually functions. This could happen
224         // in the case of GUID hash collisions. It could also happen in theory
225         // for SamplePGO profiles collected on old versions of the code after
226         // renaming, since we synthesize edges to any inlined callees appearing
227         // in the profile.
228         if (!Summary)
229           return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
230 
231         // If this is a local function, make sure we import the copy
232         // in the caller's module. The only time a local function can
233         // share an entry in the index is if there is a local with the same name
234         // in another module that had the same source file name (in a different
235         // directory), where each was compiled in their own directory so there
236         // was not distinguishing path.
237         // However, do the import from another module if there is only one
238         // entry in the list - in that case this must be a reference due
239         // to indirect call profile data, since a function pointer can point to
240         // a local in another module.
241         if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
242             CalleeSummaryList.size() > 1 &&
243             Summary->modulePath() != CallerModulePath)
244           return {
245               FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
246               GVSummary};
247 
248         // Skip if it isn't legal to import (e.g. may reference unpromotable
249         // locals).
250         if (Summary->notEligibleToImport())
251           return {FunctionImporter::ImportFailureReason::NotEligible,
252                   GVSummary};
253 
254         return {FunctionImporter::ImportFailureReason::None, GVSummary};
255       });
256 }
257 
258 /// Given a list of possible callee implementation for a call site, select one
259 /// that fits the \p Threshold for function definition import. If none are
260 /// found, the Reason will give the last reason for the failure (last, in the
261 /// order of CalleeSummaryList entries). While looking for a callee definition,
262 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
263 /// candidate; other modules may want to know the function summary or
264 /// declaration even if a definition is not needed.
265 ///
266 /// FIXME: select "best" instead of first that fits. But what is "best"?
267 /// - The smallest: more likely to be inlined.
268 /// - The one with the least outgoing edges (already well optimized).
269 /// - One from a module already being imported from in order to reduce the
270 ///   number of source modules parsed/linked.
271 /// - One that has PGO data attached.
272 /// - [insert you fancy metric here]
273 static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,unsigned Threshold,StringRef CallerModulePath,const GlobalValueSummary * & TooLargeOrNoInlineSummary,FunctionImporter::ImportFailureReason & Reason)274 selectCallee(const ModuleSummaryIndex &Index,
275              ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
276              unsigned Threshold, StringRef CallerModulePath,
277              const GlobalValueSummary *&TooLargeOrNoInlineSummary,
278              FunctionImporter::ImportFailureReason &Reason) {
279   // Records the last summary with reason noinline or too-large.
280   TooLargeOrNoInlineSummary = nullptr;
281   auto QualifiedCandidates =
282       qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
283   for (auto QualifiedValue : QualifiedCandidates) {
284     Reason = QualifiedValue.first;
285     // Skip a summary if its import is not (proved to be) legal.
286     if (Reason != FunctionImporter::ImportFailureReason::None)
287       continue;
288     auto *Summary =
289         cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
290 
291     // Don't bother importing the definition if the chance of inlining it is
292     // not high enough (except under `--force-import-all`).
293     if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
294         !ForceImportAll) {
295       TooLargeOrNoInlineSummary = Summary;
296       Reason = FunctionImporter::ImportFailureReason::TooLarge;
297       continue;
298     }
299 
300     // Don't bother importing the definition if we can't inline it anyway.
301     if (Summary->fflags().NoInline && !ForceImportAll) {
302       TooLargeOrNoInlineSummary = Summary;
303       Reason = FunctionImporter::ImportFailureReason::NoInline;
304       continue;
305     }
306 
307     return Summary;
308   }
309   return nullptr;
310 }
311 
312 namespace {
313 
314 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
315 
316 } // anonymous namespace
317 
318 /// Import globals referenced by a function or other globals that are being
319 /// imported, if importing such global is possible.
320 class GlobalsImporter final {
321   const ModuleSummaryIndex &Index;
322   const GVSummaryMapTy &DefinedGVSummaries;
323   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
324       IsPrevailing;
325   FunctionImporter::ImportMapTy &ImportList;
326   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
327 
shouldImportGlobal(const ValueInfo & VI)328   bool shouldImportGlobal(const ValueInfo &VI) {
329     const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
330     if (GVS == DefinedGVSummaries.end())
331       return true;
332     // We should not skip import if the module contains a non-prevailing
333     // definition with interposable linkage type. This is required for
334     // correctness in the situation where there is a prevailing def available
335     // for import and marked read-only. In this case, the non-prevailing def
336     // will be converted to a declaration, while the prevailing one becomes
337     // internal, thus no definitions will be available for linking. In order to
338     // prevent undefined symbol link error, the prevailing definition must be
339     // imported.
340     // FIXME: Consider adding a check that the suitable prevailing definition
341     // exists and marked read-only.
342     if (VI.getSummaryList().size() > 1 &&
343         GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
344         !IsPrevailing(VI.getGUID(), GVS->second))
345       return true;
346 
347     return false;
348   }
349 
350   void
onImportingSummaryImpl(const GlobalValueSummary & Summary,SmallVectorImpl<const GlobalVarSummary * > & Worklist)351   onImportingSummaryImpl(const GlobalValueSummary &Summary,
352                          SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
353     for (const auto &VI : Summary.refs()) {
354       if (!shouldImportGlobal(VI)) {
355         LLVM_DEBUG(
356             dbgs() << "Ref ignored! Target already in destination module.\n");
357         continue;
358       }
359 
360       LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
361 
362       // If this is a local variable, make sure we import the copy
363       // in the caller's module. The only time a local variable can
364       // share an entry in the index is if there is a local with the same name
365       // in another module that had the same source file name (in a different
366       // directory), where each was compiled in their own directory so there
367       // was not distinguishing path.
368       auto LocalNotInModule =
369           [&](const GlobalValueSummary *RefSummary) -> bool {
370         return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
371                RefSummary->modulePath() != Summary.modulePath();
372       };
373 
374       for (const auto &RefSummary : VI.getSummaryList()) {
375         const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
376         // Functions could be referenced by global vars - e.g. a vtable; but we
377         // don't currently imagine a reason those would be imported here, rather
378         // than as part of the logic deciding which functions to import (i.e.
379         // based on profile information). Should we decide to handle them here,
380         // we can refactor accordingly at that time.
381         if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
382             LocalNotInModule(GVS))
383           continue;
384 
385         // If there isn't an entry for GUID, insert <GUID, Definition> pair.
386         // Otherwise, definition should take precedence over declaration.
387         auto [Iter, Inserted] =
388             ImportList[RefSummary->modulePath()].try_emplace(
389                 VI.getGUID(), GlobalValueSummary::Definition);
390         // Only update stat and exports if we haven't already imported this
391         // variable.
392         if (!Inserted) {
393           // Set the value to 'std::min(existing-value, new-value)' to make
394           // sure a definition takes precedence over a declaration.
395           Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
396           break;
397         }
398         NumImportedGlobalVarsThinLink++;
399         // Any references made by this variable will be marked exported
400         // later, in ComputeCrossModuleImport, after import decisions are
401         // complete, which is more efficient than adding them here.
402         if (ExportLists)
403           (*ExportLists)[RefSummary->modulePath()].insert(VI);
404 
405         // If variable is not writeonly we attempt to recursively analyze
406         // its references in order to import referenced constants.
407         if (!Index.isWriteOnly(GVS))
408           Worklist.emplace_back(GVS);
409         break;
410       }
411     }
412   }
413 
414 public:
GlobalsImporter(const ModuleSummaryIndex & Index,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)415   GlobalsImporter(
416       const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
417       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
418           IsPrevailing,
419       FunctionImporter::ImportMapTy &ImportList,
420       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
421       : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
422         IsPrevailing(IsPrevailing), ImportList(ImportList),
423         ExportLists(ExportLists) {}
424 
onImportingSummary(const GlobalValueSummary & Summary)425   void onImportingSummary(const GlobalValueSummary &Summary) {
426     SmallVector<const GlobalVarSummary *, 128> Worklist;
427     onImportingSummaryImpl(Summary, Worklist);
428     while (!Worklist.empty())
429       onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
430   }
431 };
432 
433 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
434 
435 /// Determine the list of imports and exports for each module.
436 class ModuleImportsManager {
437 protected:
438   function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
439       IsPrevailing;
440   const ModuleSummaryIndex &Index;
441   DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
442 
ModuleImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists=nullptr)443   ModuleImportsManager(
444       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
445           IsPrevailing,
446       const ModuleSummaryIndex &Index,
447       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
448       : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
449 
450 public:
451   virtual ~ModuleImportsManager() = default;
452 
453   /// Given the list of globals defined in a module, compute the list of imports
454   /// as well as the list of "exports", i.e. the list of symbols referenced from
455   /// another module (that may require promotion).
456   virtual void
457   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
458                          StringRef ModName,
459                          FunctionImporter::ImportMapTy &ImportList);
460 
461   static std::unique_ptr<ModuleImportsManager>
462   create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
463              IsPrevailing,
464          const ModuleSummaryIndex &Index,
465          DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
466              nullptr);
467 };
468 
469 /// A ModuleImportsManager that operates based on a workload definition (see
470 /// -thinlto-workload-def). For modules that do not define workload roots, it
471 /// applies the base ModuleImportsManager import policy.
472 class WorkloadImportsManager : public ModuleImportsManager {
473   // Keep a module name -> value infos to import association. We use it to
474   // determine if a module's import list should be done by the base
475   // ModuleImportsManager or by us.
476   StringMap<DenseSet<ValueInfo>> Workloads;
477 
478   void
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)479   computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
480                          StringRef ModName,
481                          FunctionImporter::ImportMapTy &ImportList) override {
482     auto SetIter = Workloads.find(ModName);
483     if (SetIter == Workloads.end()) {
484       LLVM_DEBUG(dbgs() << "[Workload] " << ModName
485                         << " does not contain the root of any context.\n");
486       return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
487                                                           ModName, ImportList);
488     }
489     LLVM_DEBUG(dbgs() << "[Workload] " << ModName
490                       << " contains the root(s) of context(s).\n");
491 
492     GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
493                         ExportLists);
494     auto &ValueInfos = SetIter->second;
495     SmallVector<EdgeInfo, 128> GlobWorklist;
496     for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
497       auto It = DefinedGVSummaries.find(VI.getGUID());
498       if (It != DefinedGVSummaries.end() &&
499           IsPrevailing(VI.getGUID(), It->second)) {
500         LLVM_DEBUG(
501             dbgs() << "[Workload] " << VI.name()
502                    << " has the prevailing variant already in the module "
503                    << ModName << ". No need to import\n");
504         continue;
505       }
506       auto Candidates =
507           qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
508 
509       const GlobalValueSummary *GVS = nullptr;
510       auto PotentialCandidates = llvm::map_range(
511           llvm::make_filter_range(
512               Candidates,
513               [&](const auto &Candidate) {
514                 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
515                                   << " from " << Candidate.second->modulePath()
516                                   << " ImportFailureReason: "
517                                   << getFailureName(Candidate.first) << "\n");
518                 return Candidate.first ==
519                         FunctionImporter::ImportFailureReason::None;
520               }),
521           [](const auto &Candidate) { return Candidate.second; });
522       if (PotentialCandidates.empty()) {
523         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
524                           << " because can't find eligible Callee. Guid is: "
525                           << Function::getGUID(VI.name()) << "\n");
526         continue;
527       }
528       /// We will prefer importing the prevailing candidate, if not, we'll
529       /// still pick the first available candidate. The reason we want to make
530       /// sure we do import the prevailing candidate is because the goal of
531       /// workload-awareness is to enable optimizations specializing the call
532       /// graph of that workload. Suppose a function is already defined in the
533       /// module, but it's not the prevailing variant. Suppose also we do not
534       /// inline it (in fact, if it were interposable, we can't inline it),
535       /// but we could specialize it to the workload in other ways. However,
536       /// the linker would drop it in the favor of the prevailing copy.
537       /// Instead, by importing the prevailing variant (assuming also the use
538       /// of `-avail-extern-to-local`), we keep the specialization. We could
539       /// alteranatively make the non-prevailing variant local, but the
540       /// prevailing one is also the one for which we would have previously
541       /// collected profiles, making it preferrable.
542       auto PrevailingCandidates = llvm::make_filter_range(
543           PotentialCandidates, [&](const auto *Candidate) {
544             return IsPrevailing(VI.getGUID(), Candidate);
545           });
546       if (PrevailingCandidates.empty()) {
547         GVS = *PotentialCandidates.begin();
548         if (!llvm::hasSingleElement(PotentialCandidates) &&
549             GlobalValue::isLocalLinkage(GVS->linkage()))
550           LLVM_DEBUG(
551               dbgs()
552               << "[Workload] Found multiple non-prevailing candidates for "
553               << VI.name()
554               << ". This is unexpected. Are module paths passed to the "
555                  "compiler unique for the modules passed to the linker?");
556         // We could in theory have multiple (interposable) copies of a symbol
557         // when there is no prevailing candidate, if say the prevailing copy was
558         // in a native object being linked in. However, we should in theory be
559         // marking all of these non-prevailing IR copies dead in that case, in
560         // which case they won't be candidates.
561         assert(GVS->isLive());
562       } else {
563         assert(llvm::hasSingleElement(PrevailingCandidates));
564         GVS = *PrevailingCandidates.begin();
565       }
566 
567       auto ExportingModule = GVS->modulePath();
568       // We checked that for the prevailing case, but if we happen to have for
569       // example an internal that's defined in this module, it'd have no
570       // PrevailingCandidates.
571       if (ExportingModule == ModName) {
572         LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
573                           << " because its defining module is the same as the "
574                              "current module\n");
575         continue;
576       }
577       LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
578                         << ExportingModule << " : "
579                         << Function::getGUID(VI.name()) << "\n");
580       ImportList[ExportingModule][VI.getGUID()] =
581           GlobalValueSummary::Definition;
582       GVI.onImportingSummary(*GVS);
583       if (ExportLists)
584         (*ExportLists)[ExportingModule].insert(VI);
585     }
586     LLVM_DEBUG(dbgs() << "[Workload] Done\n");
587   }
588 
589 public:
WorkloadImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)590   WorkloadImportsManager(
591       function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
592           IsPrevailing,
593       const ModuleSummaryIndex &Index,
594       DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
595       : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
596     // Since the workload def uses names, we need a quick lookup
597     // name->ValueInfo.
598     StringMap<ValueInfo> NameToValueInfo;
599     StringSet<> AmbiguousNames;
600     for (auto &I : Index) {
601       ValueInfo VI = Index.getValueInfo(I);
602       if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
603         LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
604     }
605     auto DbgReportIfAmbiguous = [&](StringRef Name) {
606       LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
607         dbgs() << "[Workload] Function name " << Name
608                << " present in the workload definition is ambiguous. Consider "
609                   "compiling with -funique-internal-linkage-names.";
610       });
611     };
612     std::error_code EC;
613     auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
614     if (std::error_code EC = BufferOrErr.getError()) {
615       report_fatal_error("Failed to open context file");
616       return;
617     }
618     auto Buffer = std::move(BufferOrErr.get());
619     std::map<std::string, std::vector<std::string>> WorkloadDefs;
620     json::Path::Root NullRoot;
621     // The JSON is supposed to contain a dictionary matching the type of
622     // WorkloadDefs. For example:
623     // {
624     //   "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
625     //   "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
626     // }
627     auto Parsed = json::parse(Buffer->getBuffer());
628     if (!Parsed)
629       report_fatal_error(Parsed.takeError());
630     if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
631       report_fatal_error("Invalid thinlto contextual profile format.");
632     for (const auto &Workload : WorkloadDefs) {
633       const auto &Root = Workload.first;
634       DbgReportIfAmbiguous(Root);
635       LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
636       const auto &AllCallees = Workload.second;
637       auto RootIt = NameToValueInfo.find(Root);
638       if (RootIt == NameToValueInfo.end()) {
639         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
640                           << " not found in this linkage unit.\n");
641         continue;
642       }
643       auto RootVI = RootIt->second;
644       if (RootVI.getSummaryList().size() != 1) {
645         LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
646                           << " should have exactly one summary, but has "
647                           << RootVI.getSummaryList().size() << ". Skipping.\n");
648         continue;
649       }
650       StringRef RootDefiningModule =
651           RootVI.getSummaryList().front()->modulePath();
652       LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
653                         << " is : " << RootDefiningModule << "\n");
654       auto &Set = Workloads[RootDefiningModule];
655       for (const auto &Callee : AllCallees) {
656         LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
657         DbgReportIfAmbiguous(Callee);
658         auto ElemIt = NameToValueInfo.find(Callee);
659         if (ElemIt == NameToValueInfo.end()) {
660           LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
661           continue;
662         }
663         Set.insert(ElemIt->second);
664       }
665       LLVM_DEBUG({
666         dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
667                << " distinct callees.\n";
668         for (const auto &VI : Set) {
669           dbgs() << "[Workload] Root: " << Root
670                  << " Would include: " << VI.getGUID() << "\n";
671         }
672       });
673     }
674   }
675 };
676 
create(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)677 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
678     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
679         IsPrevailing,
680     const ModuleSummaryIndex &Index,
681     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
682   if (WorkloadDefinitions.empty()) {
683     LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
684     return std::unique_ptr<ModuleImportsManager>(
685         new ModuleImportsManager(IsPrevailing, Index, ExportLists));
686   }
687   LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
688   return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
689                                                   ExportLists);
690 }
691 
692 static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason)693 getFailureName(FunctionImporter::ImportFailureReason Reason) {
694   switch (Reason) {
695   case FunctionImporter::ImportFailureReason::None:
696     return "None";
697   case FunctionImporter::ImportFailureReason::GlobalVar:
698     return "GlobalVar";
699   case FunctionImporter::ImportFailureReason::NotLive:
700     return "NotLive";
701   case FunctionImporter::ImportFailureReason::TooLarge:
702     return "TooLarge";
703   case FunctionImporter::ImportFailureReason::InterposableLinkage:
704     return "InterposableLinkage";
705   case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
706     return "LocalLinkageNotInModule";
707   case FunctionImporter::ImportFailureReason::NotEligible:
708     return "NotEligible";
709   case FunctionImporter::ImportFailureReason::NoInline:
710     return "NoInline";
711   }
712   llvm_unreachable("invalid reason");
713 }
714 
715 /// Compute the list of functions to import for a given caller. Mark these
716 /// imported functions and the symbols they reference in their source module as
717 /// exported from their source module.
computeImportForFunction(const FunctionSummary & Summary,const ModuleSummaryIndex & Index,const unsigned Threshold,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,SmallVectorImpl<EdgeInfo> & Worklist,GlobalsImporter & GVImporter,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists,FunctionImporter::ImportThresholdsTy & ImportThresholds)718 static void computeImportForFunction(
719     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
720     const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
721     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
722         isPrevailing,
723     SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
724     FunctionImporter::ImportMapTy &ImportList,
725     DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
726     FunctionImporter::ImportThresholdsTy &ImportThresholds) {
727   GVImporter.onImportingSummary(Summary);
728   static int ImportCount = 0;
729   for (const auto &Edge : Summary.calls()) {
730     ValueInfo VI = Edge.first;
731     LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
732                       << "\n");
733 
734     if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
735       LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
736                         << " reached.\n");
737       continue;
738     }
739 
740     if (DefinedGVSummaries.count(VI.getGUID())) {
741       // FIXME: Consider not skipping import if the module contains
742       // a non-prevailing def with interposable linkage. The prevailing copy
743       // can safely be imported (see shouldImportGlobal()).
744       LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
745       continue;
746     }
747 
748     auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
749       if (Hotness == CalleeInfo::HotnessType::Hot)
750         return ImportHotMultiplier;
751       if (Hotness == CalleeInfo::HotnessType::Cold)
752         return ImportColdMultiplier;
753       if (Hotness == CalleeInfo::HotnessType::Critical)
754         return ImportCriticalMultiplier;
755       return 1.0;
756     };
757 
758     const auto NewThreshold =
759         Threshold * GetBonusMultiplier(Edge.second.getHotness());
760 
761     auto IT = ImportThresholds.insert(std::make_pair(
762         VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
763     bool PreviouslyVisited = !IT.second;
764     auto &ProcessedThreshold = std::get<0>(IT.first->second);
765     auto &CalleeSummary = std::get<1>(IT.first->second);
766     auto &FailureInfo = std::get<2>(IT.first->second);
767 
768     bool IsHotCallsite =
769         Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
770     bool IsCriticalCallsite =
771         Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
772 
773     const FunctionSummary *ResolvedCalleeSummary = nullptr;
774     if (CalleeSummary) {
775       assert(PreviouslyVisited);
776       // Since the traversal of the call graph is DFS, we can revisit a function
777       // a second time with a higher threshold. In this case, it is added back
778       // to the worklist with the new threshold (so that its own callee chains
779       // can be considered with the higher threshold).
780       if (NewThreshold <= ProcessedThreshold) {
781         LLVM_DEBUG(
782             dbgs() << "ignored! Target was already imported with Threshold "
783                    << ProcessedThreshold << "\n");
784         continue;
785       }
786       // Update with new larger threshold.
787       ProcessedThreshold = NewThreshold;
788       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
789     } else {
790       // If we already rejected importing a callee at the same or higher
791       // threshold, don't waste time calling selectCallee.
792       if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
793         LLVM_DEBUG(
794             dbgs() << "ignored! Target was already rejected with Threshold "
795             << ProcessedThreshold << "\n");
796         if (PrintImportFailures) {
797           assert(FailureInfo &&
798                  "Expected FailureInfo for previously rejected candidate");
799           FailureInfo->Attempts++;
800         }
801         continue;
802       }
803 
804       FunctionImporter::ImportFailureReason Reason{};
805 
806       // `SummaryForDeclImport` is an summary eligible for declaration import.
807       const GlobalValueSummary *SummaryForDeclImport = nullptr;
808       CalleeSummary =
809           selectCallee(Index, VI.getSummaryList(), NewThreshold,
810                        Summary.modulePath(), SummaryForDeclImport, Reason);
811       if (!CalleeSummary) {
812         // There isn't a callee for definition import but one for declaration
813         // import.
814         if (ImportDeclaration && SummaryForDeclImport) {
815           StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
816 
817           // Since definition takes precedence over declaration for the same VI,
818           // try emplace <VI, declaration> pair without checking insert result.
819           // If insert doesn't happen, there must be an existing entry keyed by
820           // VI. Note `ExportLists` only keeps track of exports due to imported
821           // definitions.
822           ImportList[DeclSourceModule].try_emplace(
823               VI.getGUID(), GlobalValueSummary::Declaration);
824         }
825         // Update with new larger threshold if this was a retry (otherwise
826         // we would have already inserted with NewThreshold above). Also
827         // update failure info if requested.
828         if (PreviouslyVisited) {
829           ProcessedThreshold = NewThreshold;
830           if (PrintImportFailures) {
831             assert(FailureInfo &&
832                    "Expected FailureInfo for previously rejected candidate");
833             FailureInfo->Reason = Reason;
834             FailureInfo->Attempts++;
835             FailureInfo->MaxHotness =
836                 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
837           }
838         } else if (PrintImportFailures) {
839           assert(!FailureInfo &&
840                  "Expected no FailureInfo for newly rejected candidate");
841           FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
842               VI, Edge.second.getHotness(), Reason, 1);
843         }
844         if (ForceImportAll) {
845           std::string Msg = std::string("Failed to import function ") +
846                             VI.name().str() + " due to " +
847                             getFailureName(Reason);
848           auto Error = make_error<StringError>(
849               Msg, make_error_code(errc::not_supported));
850           logAllUnhandledErrors(std::move(Error), errs(),
851                                 "Error importing module: ");
852           break;
853         } else {
854           LLVM_DEBUG(dbgs()
855                      << "ignored! No qualifying callee with summary found.\n");
856           continue;
857         }
858       }
859 
860       // "Resolve" the summary
861       CalleeSummary = CalleeSummary->getBaseObject();
862       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
863 
864       assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
865               (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
866              "selectCallee() didn't honor the threshold");
867 
868       auto ExportModulePath = ResolvedCalleeSummary->modulePath();
869 
870       // Try emplace the definition entry, and update stats based on insertion
871       // status.
872       auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
873           VI.getGUID(), GlobalValueSummary::Definition);
874 
875       // We previously decided to import this GUID definition if it was already
876       // inserted in the set of imports from the exporting module.
877       if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
878         NumImportedFunctionsThinLink++;
879         if (IsHotCallsite)
880           NumImportedHotFunctionsThinLink++;
881         if (IsCriticalCallsite)
882           NumImportedCriticalFunctionsThinLink++;
883       }
884 
885       if (Iter->second == GlobalValueSummary::Declaration)
886         Iter->second = GlobalValueSummary::Definition;
887 
888       // Any calls/references made by this function will be marked exported
889       // later, in ComputeCrossModuleImport, after import decisions are
890       // complete, which is more efficient than adding them here.
891       if (ExportLists)
892         (*ExportLists)[ExportModulePath].insert(VI);
893     }
894 
895     auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
896       // Adjust the threshold for next level of imported functions.
897       // The threshold is different for hot callsites because we can then
898       // inline chains of hot calls.
899       if (IsHotCallsite)
900         return Threshold * ImportHotInstrFactor;
901       return Threshold * ImportInstrFactor;
902     };
903 
904     const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
905 
906     ImportCount++;
907 
908     // Insert the newly imported function to the worklist.
909     Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
910   }
911 }
912 
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)913 void ModuleImportsManager::computeImportForModule(
914     const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
915     FunctionImporter::ImportMapTy &ImportList) {
916   // Worklist contains the list of function imported in this module, for which
917   // we will analyse the callees and may import further down the callgraph.
918   SmallVector<EdgeInfo, 128> Worklist;
919   GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
920                       ExportLists);
921   FunctionImporter::ImportThresholdsTy ImportThresholds;
922 
923   // Populate the worklist with the import for the functions in the current
924   // module
925   for (const auto &GVSummary : DefinedGVSummaries) {
926 #ifndef NDEBUG
927     // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
928     // so this map look up (and possibly others) can be avoided.
929     auto VI = Index.getValueInfo(GVSummary.first);
930 #endif
931     if (!Index.isGlobalValueLive(GVSummary.second)) {
932       LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
933       continue;
934     }
935     auto *FuncSummary =
936         dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
937     if (!FuncSummary)
938       // Skip import for global variables
939       continue;
940     LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
941     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
942                              DefinedGVSummaries, IsPrevailing, Worklist, GVI,
943                              ImportList, ExportLists, ImportThresholds);
944   }
945 
946   // Process the newly imported functions and add callees to the worklist.
947   while (!Worklist.empty()) {
948     auto GVInfo = Worklist.pop_back_val();
949     auto *Summary = std::get<0>(GVInfo);
950     auto Threshold = std::get<1>(GVInfo);
951 
952     if (auto *FS = dyn_cast<FunctionSummary>(Summary))
953       computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
954                                IsPrevailing, Worklist, GVI, ImportList,
955                                ExportLists, ImportThresholds);
956   }
957 
958   // Print stats about functions considered but rejected for importing
959   // when requested.
960   if (PrintImportFailures) {
961     dbgs() << "Missed imports into module " << ModName << "\n";
962     for (auto &I : ImportThresholds) {
963       auto &ProcessedThreshold = std::get<0>(I.second);
964       auto &CalleeSummary = std::get<1>(I.second);
965       auto &FailureInfo = std::get<2>(I.second);
966       if (CalleeSummary)
967         continue; // We are going to import.
968       assert(FailureInfo);
969       FunctionSummary *FS = nullptr;
970       if (!FailureInfo->VI.getSummaryList().empty())
971         FS = dyn_cast<FunctionSummary>(
972             FailureInfo->VI.getSummaryList()[0]->getBaseObject());
973       dbgs() << FailureInfo->VI
974              << ": Reason = " << getFailureName(FailureInfo->Reason)
975              << ", Threshold = " << ProcessedThreshold
976              << ", Size = " << (FS ? (int)FS->instCount() : -1)
977              << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
978              << ", Attempts = " << FailureInfo->Attempts << "\n";
979     }
980   }
981 }
982 
983 #ifndef NDEBUG
isGlobalVarSummary(const ModuleSummaryIndex & Index,ValueInfo VI)984 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
985   auto SL = VI.getSummaryList();
986   return SL.empty()
987              ? false
988              : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
989 }
990 
isGlobalVarSummary(const ModuleSummaryIndex & Index,GlobalValue::GUID G)991 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
992                                GlobalValue::GUID G) {
993   if (const auto &VI = Index.getValueInfo(G))
994     return isGlobalVarSummary(Index, VI);
995   return false;
996 }
997 
998 // Return the number of global variable summaries in ExportSet.
999 static unsigned
numGlobalVarSummaries(const ModuleSummaryIndex & Index,FunctionImporter::ExportSetTy & ExportSet)1000 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1001                       FunctionImporter::ExportSetTy &ExportSet) {
1002   unsigned NumGVS = 0;
1003   for (auto &VI : ExportSet)
1004     if (isGlobalVarSummary(Index, VI.getGUID()))
1005       ++NumGVS;
1006   return NumGVS;
1007 }
1008 
1009 // Given ImportMap, return the number of global variable summaries and record
1010 // the number of defined function summaries as output parameter.
1011 static unsigned
numGlobalVarSummaries(const ModuleSummaryIndex & Index,FunctionImporter::FunctionsToImportTy & ImportMap,unsigned & DefinedFS)1012 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1013                       FunctionImporter::FunctionsToImportTy &ImportMap,
1014                       unsigned &DefinedFS) {
1015   unsigned NumGVS = 0;
1016   DefinedFS = 0;
1017   for (auto &[GUID, Type] : ImportMap) {
1018     if (isGlobalVarSummary(Index, GUID))
1019       ++NumGVS;
1020     else if (Type == GlobalValueSummary::Definition)
1021       ++DefinedFS;
1022   }
1023   return NumGVS;
1024 }
1025 #endif
1026 
1027 #ifndef NDEBUG
checkVariableImport(const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)1028 static bool checkVariableImport(
1029     const ModuleSummaryIndex &Index,
1030     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1031     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1032   DenseSet<GlobalValue::GUID> FlattenedImports;
1033 
1034   for (auto &ImportPerModule : ImportLists)
1035     for (auto &ExportPerModule : ImportPerModule.second)
1036       for (auto &[GUID, Type] : ExportPerModule.second)
1037         FlattenedImports.insert(GUID);
1038 
1039   // Checks that all GUIDs of read/writeonly vars we see in export lists
1040   // are also in the import lists. Otherwise we my face linker undefs,
1041   // because readonly and writeonly vars are internalized in their
1042   // source modules. The exception would be if it has a linkage type indicating
1043   // that there may have been a copy existing in the importing module (e.g.
1044   // linkonce_odr). In that case we cannot accurately do this checking.
1045   auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1046                                                   const ValueInfo &VI) {
1047     auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1048         Index.findSummaryInModule(VI, ModulePath));
1049     return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1050            !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1051              GVS->linkage() == GlobalValue::WeakODRLinkage ||
1052              GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1053   };
1054 
1055   for (auto &ExportPerModule : ExportLists)
1056     for (auto &VI : ExportPerModule.second)
1057       if (!FlattenedImports.count(VI.getGUID()) &&
1058           IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1059         return false;
1060 
1061   return true;
1062 }
1063 #endif
1064 
1065 /// Compute all the import and export for every module using the Index.
ComputeCrossModuleImport(const ModuleSummaryIndex & Index,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)1066 void llvm::ComputeCrossModuleImport(
1067     const ModuleSummaryIndex &Index,
1068     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1069     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1070         isPrevailing,
1071     DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1072     DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1073   auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1074   // For each module that has function defined, compute the import/export lists.
1075   for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1076     auto &ImportList = ImportLists[DefinedGVSummaries.first];
1077     LLVM_DEBUG(dbgs() << "Computing import for Module '"
1078                       << DefinedGVSummaries.first << "'\n");
1079     MIS->computeImportForModule(DefinedGVSummaries.second,
1080                                 DefinedGVSummaries.first, ImportList);
1081   }
1082 
1083   // When computing imports we only added the variables and functions being
1084   // imported to the export list. We also need to mark any references and calls
1085   // they make as exported as well. We do this here, as it is more efficient
1086   // since we may import the same values multiple times into different modules
1087   // during the import computation.
1088   for (auto &ELI : ExportLists) {
1089     // `NewExports` tracks the VI that gets exported because the full definition
1090     // of its user/referencer gets exported.
1091     FunctionImporter::ExportSetTy NewExports;
1092     const auto &DefinedGVSummaries =
1093         ModuleToDefinedGVSummaries.lookup(ELI.first);
1094     for (auto &EI : ELI.second) {
1095       // Find the copy defined in the exporting module so that we can mark the
1096       // values it references in that specific definition as exported.
1097       // Below we will add all references and called values, without regard to
1098       // whether they are also defined in this module. We subsequently prune the
1099       // list to only include those defined in the exporting module, see comment
1100       // there as to why.
1101       auto DS = DefinedGVSummaries.find(EI.getGUID());
1102       // Anything marked exported during the import computation must have been
1103       // defined in the exporting module.
1104       assert(DS != DefinedGVSummaries.end());
1105       auto *S = DS->getSecond();
1106       S = S->getBaseObject();
1107       if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1108         // Export referenced functions and variables. We don't export/promote
1109         // objects referenced by writeonly variable initializer, because
1110         // we convert such variables initializers to "zeroinitializer".
1111         // See processGlobalForThinLTO.
1112         if (!Index.isWriteOnly(GVS))
1113           for (const auto &VI : GVS->refs())
1114             NewExports.insert(VI);
1115       } else {
1116         auto *FS = cast<FunctionSummary>(S);
1117         for (const auto &Edge : FS->calls())
1118           NewExports.insert(Edge.first);
1119         for (const auto &Ref : FS->refs())
1120           NewExports.insert(Ref);
1121       }
1122     }
1123     // Prune list computed above to only include values defined in the
1124     // exporting module. We do this after the above insertion since we may hit
1125     // the same ref/call target multiple times in above loop, and it is more
1126     // efficient to avoid a set lookup each time.
1127     for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1128       if (!DefinedGVSummaries.count(EI->getGUID()))
1129         NewExports.erase(EI++);
1130       else
1131         ++EI;
1132     }
1133     ELI.second.insert(NewExports.begin(), NewExports.end());
1134   }
1135 
1136   assert(checkVariableImport(Index, ImportLists, ExportLists));
1137 #ifndef NDEBUG
1138   LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1139                     << " modules:\n");
1140   for (auto &ModuleImports : ImportLists) {
1141     auto ModName = ModuleImports.first;
1142     auto &Exports = ExportLists[ModName];
1143     unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1144     LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1145                       << Exports.size() - NumGVS << " functions and " << NumGVS
1146                       << " vars. Imports from " << ModuleImports.second.size()
1147                       << " modules.\n");
1148     for (auto &Src : ModuleImports.second) {
1149       auto SrcModName = Src.first;
1150       unsigned DefinedFS = 0;
1151       unsigned NumGVSPerMod =
1152           numGlobalVarSummaries(Index, Src.second, DefinedFS);
1153       LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1154                         << Src.second.size() - NumGVSPerMod - DefinedFS
1155                         << " function declarations imported from " << SrcModName
1156                         << "\n");
1157       LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1158                         << " global vars imported from " << SrcModName << "\n");
1159     }
1160   }
1161 #endif
1162 }
1163 
1164 #ifndef NDEBUG
dumpImportListForModule(const ModuleSummaryIndex & Index,StringRef ModulePath,FunctionImporter::ImportMapTy & ImportList)1165 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1166                                     StringRef ModulePath,
1167                                     FunctionImporter::ImportMapTy &ImportList) {
1168   LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1169                     << ImportList.size() << " modules.\n");
1170   for (auto &Src : ImportList) {
1171     auto SrcModName = Src.first;
1172     unsigned DefinedFS = 0;
1173     unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1174     LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1175                       << Src.second.size() - DefinedFS - NumGVSPerMod
1176                       << " function declarations imported from " << SrcModName
1177                       << "\n");
1178     LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1179                       << SrcModName << "\n");
1180   }
1181 }
1182 #endif
1183 
1184 /// Compute all the imports for the given module using the Index.
1185 ///
1186 /// \p isPrevailing is a callback that will be called with a global value's GUID
1187 /// and summary and should return whether the module corresponding to the
1188 /// summary contains the linker-prevailing copy of that value.
1189 ///
1190 /// \p ImportList will be populated with a map that can be passed to
1191 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleForTest(StringRef ModulePath,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1192 static void ComputeCrossModuleImportForModuleForTest(
1193     StringRef ModulePath,
1194     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1195         isPrevailing,
1196     const ModuleSummaryIndex &Index,
1197     FunctionImporter::ImportMapTy &ImportList) {
1198   // Collect the list of functions this module defines.
1199   // GUID -> Summary
1200   GVSummaryMapTy FunctionSummaryMap;
1201   Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1202 
1203   // Compute the import list for this module.
1204   LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1205   auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1206   MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1207 
1208 #ifndef NDEBUG
1209   dumpImportListForModule(Index, ModulePath, ImportList);
1210 #endif
1211 }
1212 
1213 /// Mark all external summaries in \p Index for import into the given module.
1214 /// Used for testing the case of distributed builds using a distributed index.
1215 ///
1216 /// \p ImportList will be populated with a map that can be passed to
1217 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleFromIndexForTest(StringRef ModulePath,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1218 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1219     StringRef ModulePath, const ModuleSummaryIndex &Index,
1220     FunctionImporter::ImportMapTy &ImportList) {
1221   for (const auto &GlobalList : Index) {
1222     // Ignore entries for undefined references.
1223     if (GlobalList.second.SummaryList.empty())
1224       continue;
1225 
1226     auto GUID = GlobalList.first;
1227     assert(GlobalList.second.SummaryList.size() == 1 &&
1228            "Expected individual combined index to have one summary per GUID");
1229     auto &Summary = GlobalList.second.SummaryList[0];
1230     // Skip the summaries for the importing module. These are included to
1231     // e.g. record required linkage changes.
1232     if (Summary->modulePath() == ModulePath)
1233       continue;
1234     // Add an entry to provoke importing by thinBackend.
1235     auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1236         GUID, Summary->importType());
1237     if (!Inserted) {
1238       // Use 'std::min' to make sure definition (with enum value 0) takes
1239       // precedence over declaration (with enum value 1).
1240       Iter->second = std::min(Iter->second, Summary->importType());
1241     }
1242   }
1243 #ifndef NDEBUG
1244   dumpImportListForModule(Index, ModulePath, ImportList);
1245 #endif
1246 }
1247 
1248 // For SamplePGO, the indirect call targets for local functions will
1249 // have its original name annotated in profile. We try to find the
1250 // corresponding PGOFuncName as the GUID, and fix up the edges
1251 // accordingly.
updateValueInfoForIndirectCalls(ModuleSummaryIndex & Index,FunctionSummary * FS)1252 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1253                                      FunctionSummary *FS) {
1254   for (auto &EI : FS->mutableCalls()) {
1255     if (!EI.first.getSummaryList().empty())
1256       continue;
1257     auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1258     if (GUID == 0)
1259       continue;
1260     // Update the edge to point directly to the correct GUID.
1261     auto VI = Index.getValueInfo(GUID);
1262     if (llvm::any_of(
1263             VI.getSummaryList(),
1264             [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1265               // The mapping from OriginalId to GUID may return a GUID
1266               // that corresponds to a static variable. Filter it out here.
1267               // This can happen when
1268               // 1) There is a call to a library function which is not defined
1269               // in the index.
1270               // 2) There is a static variable with the  OriginalGUID identical
1271               // to the GUID of the library function in 1);
1272               // When this happens the static variable in 2) will be found,
1273               // which needs to be filtered out.
1274               return SummaryPtr->getSummaryKind() ==
1275                      GlobalValueSummary::GlobalVarKind;
1276             }))
1277       continue;
1278     EI.first = VI;
1279   }
1280 }
1281 
updateIndirectCalls(ModuleSummaryIndex & Index)1282 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1283   for (const auto &Entry : Index) {
1284     for (const auto &S : Entry.second.SummaryList) {
1285       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1286         updateValueInfoForIndirectCalls(Index, FS);
1287     }
1288   }
1289 }
1290 
computeDeadSymbolsAndUpdateIndirectCalls(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing)1291 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1292     ModuleSummaryIndex &Index,
1293     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1294     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1295   assert(!Index.withGlobalValueDeadStripping());
1296   if (!ComputeDead ||
1297       // Don't do anything when nothing is live, this is friendly with tests.
1298       GUIDPreservedSymbols.empty()) {
1299     // Still need to update indirect calls.
1300     updateIndirectCalls(Index);
1301     return;
1302   }
1303   unsigned LiveSymbols = 0;
1304   SmallVector<ValueInfo, 128> Worklist;
1305   Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1306   for (auto GUID : GUIDPreservedSymbols) {
1307     ValueInfo VI = Index.getValueInfo(GUID);
1308     if (!VI)
1309       continue;
1310     for (const auto &S : VI.getSummaryList())
1311       S->setLive(true);
1312   }
1313 
1314   // Add values flagged in the index as live roots to the worklist.
1315   for (const auto &Entry : Index) {
1316     auto VI = Index.getValueInfo(Entry);
1317     for (const auto &S : Entry.second.SummaryList) {
1318       if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1319         updateValueInfoForIndirectCalls(Index, FS);
1320       if (S->isLive()) {
1321         LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1322         Worklist.push_back(VI);
1323         ++LiveSymbols;
1324         break;
1325       }
1326     }
1327   }
1328 
1329   // Make value live and add it to the worklist if it was not live before.
1330   auto visit = [&](ValueInfo VI, bool IsAliasee) {
1331     // FIXME: If we knew which edges were created for indirect call profiles,
1332     // we could skip them here. Any that are live should be reached via
1333     // other edges, e.g. reference edges. Otherwise, using a profile collected
1334     // on a slightly different binary might provoke preserving, importing
1335     // and ultimately promoting calls to functions not linked into this
1336     // binary, which increases the binary size unnecessarily. Note that
1337     // if this code changes, the importer needs to change so that edges
1338     // to functions marked dead are skipped.
1339 
1340     if (llvm::any_of(VI.getSummaryList(),
1341                      [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1342                        return S->isLive();
1343                      }))
1344       return;
1345 
1346     // We only keep live symbols that are known to be non-prevailing if any are
1347     // available_externally, linkonceodr, weakodr. Those symbols are discarded
1348     // later in the EliminateAvailableExternally pass and setting them to
1349     // not-live could break downstreams users of liveness information (PR36483)
1350     // or limit optimization opportunities.
1351     if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1352       bool KeepAliveLinkage = false;
1353       bool Interposable = false;
1354       for (const auto &S : VI.getSummaryList()) {
1355         if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1356             S->linkage() == GlobalValue::WeakODRLinkage ||
1357             S->linkage() == GlobalValue::LinkOnceODRLinkage)
1358           KeepAliveLinkage = true;
1359         else if (GlobalValue::isInterposableLinkage(S->linkage()))
1360           Interposable = true;
1361       }
1362 
1363       if (!IsAliasee) {
1364         if (!KeepAliveLinkage)
1365           return;
1366 
1367         if (Interposable)
1368           report_fatal_error(
1369               "Interposable and available_externally/linkonce_odr/weak_odr "
1370               "symbol");
1371       }
1372     }
1373 
1374     for (const auto &S : VI.getSummaryList())
1375       S->setLive(true);
1376     ++LiveSymbols;
1377     Worklist.push_back(VI);
1378   };
1379 
1380   while (!Worklist.empty()) {
1381     auto VI = Worklist.pop_back_val();
1382     for (const auto &Summary : VI.getSummaryList()) {
1383       if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1384         // If this is an alias, visit the aliasee VI to ensure that all copies
1385         // are marked live and it is added to the worklist for further
1386         // processing of its references.
1387         visit(AS->getAliaseeVI(), true);
1388         continue;
1389       }
1390       for (auto Ref : Summary->refs())
1391         visit(Ref, false);
1392       if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1393         for (auto Call : FS->calls())
1394           visit(Call.first, false);
1395     }
1396   }
1397   Index.setWithGlobalValueDeadStripping();
1398 
1399   unsigned DeadSymbols = Index.size() - LiveSymbols;
1400   LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1401                     << " symbols Dead \n");
1402   NumDeadSymbols += DeadSymbols;
1403   NumLiveSymbols += LiveSymbols;
1404 }
1405 
1406 // Compute dead symbols and propagate constants in combined index.
computeDeadSymbolsWithConstProp(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing,bool ImportEnabled)1407 void llvm::computeDeadSymbolsWithConstProp(
1408     ModuleSummaryIndex &Index,
1409     const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1410     function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1411     bool ImportEnabled) {
1412   computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1413                                            isPrevailing);
1414   if (ImportEnabled)
1415     Index.propagateAttributes(GUIDPreservedSymbols);
1416 }
1417 
1418 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1419 /// \p ModulePath.
gatherImportedSummariesForModule(StringRef ModulePath,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,const FunctionImporter::ImportMapTy & ImportList,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex,GVSummaryPtrSet & DecSummaries)1420 void llvm::gatherImportedSummariesForModule(
1421     StringRef ModulePath,
1422     const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1423     const FunctionImporter::ImportMapTy &ImportList,
1424     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1425     GVSummaryPtrSet &DecSummaries) {
1426   // Include all summaries from the importing module.
1427   ModuleToSummariesForIndex[std::string(ModulePath)] =
1428       ModuleToDefinedGVSummaries.lookup(ModulePath);
1429   // Include summaries for imports.
1430   for (const auto &ILI : ImportList) {
1431     auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1432 
1433     const auto &DefinedGVSummaries =
1434         ModuleToDefinedGVSummaries.lookup(ILI.first);
1435     for (const auto &[GUID, Type] : ILI.second) {
1436       const auto &DS = DefinedGVSummaries.find(GUID);
1437       assert(DS != DefinedGVSummaries.end() &&
1438              "Expected a defined summary for imported global value");
1439       if (Type == GlobalValueSummary::Declaration)
1440         DecSummaries.insert(DS->second);
1441 
1442       SummariesForIndex[GUID] = DS->second;
1443     }
1444   }
1445 }
1446 
1447 /// Emit the files \p ModulePath will import from into \p OutputFilename.
EmitImportsFiles(StringRef ModulePath,StringRef OutputFilename,const std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)1448 std::error_code llvm::EmitImportsFiles(
1449     StringRef ModulePath, StringRef OutputFilename,
1450     const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1451   std::error_code EC;
1452   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1453   if (EC)
1454     return EC;
1455   for (const auto &ILI : ModuleToSummariesForIndex)
1456     // The ModuleToSummariesForIndex map includes an entry for the current
1457     // Module (needed for writing out the index files). We don't want to
1458     // include it in the imports file, however, so filter it out.
1459     if (ILI.first != ModulePath)
1460       ImportsOS << ILI.first << "\n";
1461   return std::error_code();
1462 }
1463 
convertToDeclaration(GlobalValue & GV)1464 bool llvm::convertToDeclaration(GlobalValue &GV) {
1465   LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1466                     << "\n");
1467   if (Function *F = dyn_cast<Function>(&GV)) {
1468     F->deleteBody();
1469     F->clearMetadata();
1470     F->setComdat(nullptr);
1471   } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1472     V->setInitializer(nullptr);
1473     V->setLinkage(GlobalValue::ExternalLinkage);
1474     V->clearMetadata();
1475     V->setComdat(nullptr);
1476   } else {
1477     GlobalValue *NewGV;
1478     if (GV.getValueType()->isFunctionTy())
1479       NewGV =
1480           Function::Create(cast<FunctionType>(GV.getValueType()),
1481                            GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1482                            "", GV.getParent());
1483     else
1484       NewGV =
1485           new GlobalVariable(*GV.getParent(), GV.getValueType(),
1486                              /*isConstant*/ false, GlobalValue::ExternalLinkage,
1487                              /*init*/ nullptr, "",
1488                              /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1489                              GV.getType()->getAddressSpace());
1490     NewGV->takeName(&GV);
1491     GV.replaceAllUsesWith(NewGV);
1492     return false;
1493   }
1494   if (!GV.isImplicitDSOLocal())
1495     GV.setDSOLocal(false);
1496   return true;
1497 }
1498 
thinLTOFinalizeInModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals,bool PropagateAttrs)1499 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1500                                    const GVSummaryMapTy &DefinedGlobals,
1501                                    bool PropagateAttrs) {
1502   DenseSet<Comdat *> NonPrevailingComdats;
1503   auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1504     // See if the global summary analysis computed a new resolved linkage.
1505     const auto &GS = DefinedGlobals.find(GV.getGUID());
1506     if (GS == DefinedGlobals.end())
1507       return;
1508 
1509     if (Propagate)
1510       if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1511         if (Function *F = dyn_cast<Function>(&GV)) {
1512           // TODO: propagate ReadNone and ReadOnly.
1513           if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1514             F->setDoesNotAccessMemory();
1515 
1516           if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1517             F->setOnlyReadsMemory();
1518 
1519           if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1520             F->setDoesNotRecurse();
1521 
1522           if (FS->fflags().NoUnwind && !F->doesNotThrow())
1523             F->setDoesNotThrow();
1524         }
1525       }
1526 
1527     auto NewLinkage = GS->second->linkage();
1528     if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1529         // Don't internalize anything here, because the code below
1530         // lacks necessary correctness checks. Leave this job to
1531         // LLVM 'internalize' pass.
1532         GlobalValue::isLocalLinkage(NewLinkage) ||
1533         // In case it was dead and already converted to declaration.
1534         GV.isDeclaration())
1535       return;
1536 
1537     // Set the potentially more constraining visibility computed from summaries.
1538     // The DefaultVisibility condition is because older GlobalValueSummary does
1539     // not record DefaultVisibility and we don't want to change protected/hidden
1540     // to default.
1541     if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1542       GV.setVisibility(GS->second->getVisibility());
1543 
1544     if (NewLinkage == GV.getLinkage())
1545       return;
1546 
1547     // Check for a non-prevailing def that has interposable linkage
1548     // (e.g. non-odr weak or linkonce). In that case we can't simply
1549     // convert to available_externally, since it would lose the
1550     // interposable property and possibly get inlined. Simply drop
1551     // the definition in that case.
1552     if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1553         GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1554       if (!convertToDeclaration(GV))
1555         // FIXME: Change this to collect replaced GVs and later erase
1556         // them from the parent module once thinLTOResolvePrevailingGUID is
1557         // changed to enable this for aliases.
1558         llvm_unreachable("Expected GV to be converted");
1559     } else {
1560       // If all copies of the original symbol had global unnamed addr and
1561       // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1562       // and are constants, then it should be an auto hide symbol. In that case
1563       // the thin link would have marked it as CanAutoHide. Add hidden
1564       // visibility to the symbol to preserve the property.
1565       if (NewLinkage == GlobalValue::WeakODRLinkage &&
1566           GS->second->canAutoHide()) {
1567         assert(GV.canBeOmittedFromSymbolTable());
1568         GV.setVisibility(GlobalValue::HiddenVisibility);
1569       }
1570 
1571       LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1572                         << "` from " << GV.getLinkage() << " to " << NewLinkage
1573                         << "\n");
1574       GV.setLinkage(NewLinkage);
1575     }
1576     // Remove declarations from comdats, including available_externally
1577     // as this is a declaration for the linker, and will be dropped eventually.
1578     // It is illegal for comdats to contain declarations.
1579     auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1580     if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1581       if (GO->getComdat()->getName() == GO->getName())
1582         NonPrevailingComdats.insert(GO->getComdat());
1583       GO->setComdat(nullptr);
1584     }
1585   };
1586 
1587   // Process functions and global now
1588   for (auto &GV : TheModule)
1589     FinalizeInModule(GV, PropagateAttrs);
1590   for (auto &GV : TheModule.globals())
1591     FinalizeInModule(GV);
1592   for (auto &GV : TheModule.aliases())
1593     FinalizeInModule(GV);
1594 
1595   // For a non-prevailing comdat, all its members must be available_externally.
1596   // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1597   // local linkage GlobalValues.
1598   if (NonPrevailingComdats.empty())
1599     return;
1600   for (auto &GO : TheModule.global_objects()) {
1601     if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1602       GO.setComdat(nullptr);
1603       GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1604     }
1605   }
1606   bool Changed;
1607   do {
1608     Changed = false;
1609     // If an alias references a GlobalValue in a non-prevailing comdat, change
1610     // it to available_externally. For simplicity we only handle GlobalValue and
1611     // ConstantExpr with a base object. ConstantExpr without a base object is
1612     // unlikely used in a COMDAT.
1613     for (auto &GA : TheModule.aliases()) {
1614       if (GA.hasAvailableExternallyLinkage())
1615         continue;
1616       GlobalObject *Obj = GA.getAliaseeObject();
1617       assert(Obj && "aliasee without an base object is unimplemented");
1618       if (Obj->hasAvailableExternallyLinkage()) {
1619         GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1620         Changed = true;
1621       }
1622     }
1623   } while (Changed);
1624 }
1625 
1626 /// Run internalization on \p TheModule based on symmary analysis.
thinLTOInternalizeModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals)1627 void llvm::thinLTOInternalizeModule(Module &TheModule,
1628                                     const GVSummaryMapTy &DefinedGlobals) {
1629   // Declare a callback for the internalize pass that will ask for every
1630   // candidate GlobalValue if it can be internalized or not.
1631   auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1632     // It may be the case that GV is on a chain of an ifunc, its alias and
1633     // subsequent aliases. In this case, the summary for the value is not
1634     // available.
1635     if (isa<GlobalIFunc>(&GV) ||
1636         (isa<GlobalAlias>(&GV) &&
1637          isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1638       return true;
1639 
1640     // Lookup the linkage recorded in the summaries during global analysis.
1641     auto GS = DefinedGlobals.find(GV.getGUID());
1642     if (GS == DefinedGlobals.end()) {
1643       // Must have been promoted (possibly conservatively). Find original
1644       // name so that we can access the correct summary and see if it can
1645       // be internalized again.
1646       // FIXME: Eventually we should control promotion instead of promoting
1647       // and internalizing again.
1648       StringRef OrigName =
1649           ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1650       std::string OrigId = GlobalValue::getGlobalIdentifier(
1651           OrigName, GlobalValue::InternalLinkage,
1652           TheModule.getSourceFileName());
1653       GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1654       if (GS == DefinedGlobals.end()) {
1655         // Also check the original non-promoted non-globalized name. In some
1656         // cases a preempted weak value is linked in as a local copy because
1657         // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1658         // In that case, since it was originally not a local value, it was
1659         // recorded in the index using the original name.
1660         // FIXME: This may not be needed once PR27866 is fixed.
1661         GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1662         assert(GS != DefinedGlobals.end());
1663       }
1664     }
1665     return !GlobalValue::isLocalLinkage(GS->second->linkage());
1666   };
1667 
1668   // FIXME: See if we can just internalize directly here via linkage changes
1669   // based on the index, rather than invoking internalizeModule.
1670   internalizeModule(TheModule, MustPreserveGV);
1671 }
1672 
1673 /// Make alias a clone of its aliasee.
replaceAliasWithAliasee(Module * SrcModule,GlobalAlias * GA)1674 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1675   Function *Fn = cast<Function>(GA->getAliaseeObject());
1676 
1677   ValueToValueMapTy VMap;
1678   Function *NewFn = CloneFunction(Fn, VMap);
1679   // Clone should use the original alias's linkage, visibility and name, and we
1680   // ensure all uses of alias instead use the new clone (casted if necessary).
1681   NewFn->setLinkage(GA->getLinkage());
1682   NewFn->setVisibility(GA->getVisibility());
1683   GA->replaceAllUsesWith(NewFn);
1684   NewFn->takeName(GA);
1685   return NewFn;
1686 }
1687 
1688 // Internalize values that we marked with specific attribute
1689 // in processGlobalForThinLTO.
internalizeGVsAfterImport(Module & M)1690 static void internalizeGVsAfterImport(Module &M) {
1691   for (auto &GV : M.globals())
1692     // Skip GVs which have been converted to declarations
1693     // by dropDeadSymbols.
1694     if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1695       GV.setLinkage(GlobalValue::InternalLinkage);
1696       GV.setVisibility(GlobalValue::DefaultVisibility);
1697     }
1698 }
1699 
1700 // Automatically import functions in Module \p DestModule based on the summaries
1701 // index.
importFunctions(Module & DestModule,const FunctionImporter::ImportMapTy & ImportList)1702 Expected<bool> FunctionImporter::importFunctions(
1703     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1704   LLVM_DEBUG(dbgs() << "Starting import for Module "
1705                     << DestModule.getModuleIdentifier() << "\n");
1706   unsigned ImportedCount = 0, ImportedGVCount = 0;
1707 
1708   IRMover Mover(DestModule);
1709   // Do the actual import of functions now, one Module at a time
1710   std::set<StringRef> ModuleNameOrderedList;
1711   for (const auto &FunctionsToImportPerModule : ImportList) {
1712     ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1713   }
1714 
1715   auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1716                            GlobalValue::GUID GUID)
1717       -> std::optional<GlobalValueSummary::ImportKind> {
1718     auto Iter = GUIDToImportType.find(GUID);
1719     if (Iter == GUIDToImportType.end())
1720       return std::nullopt;
1721     return Iter->second;
1722   };
1723 
1724   for (const auto &Name : ModuleNameOrderedList) {
1725     // Get the module for the import
1726     const auto &FunctionsToImportPerModule = ImportList.find(Name);
1727     assert(FunctionsToImportPerModule != ImportList.end());
1728     Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1729     if (!SrcModuleOrErr)
1730       return SrcModuleOrErr.takeError();
1731     std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1732     assert(&DestModule.getContext() == &SrcModule->getContext() &&
1733            "Context mismatch");
1734 
1735     // If modules were created with lazy metadata loading, materialize it
1736     // now, before linking it (otherwise this will be a noop).
1737     if (Error Err = SrcModule->materializeMetadata())
1738       return std::move(Err);
1739 
1740     auto &ImportGUIDs = FunctionsToImportPerModule->second;
1741 
1742     // Find the globals to import
1743     SetVector<GlobalValue *> GlobalsToImport;
1744     for (Function &F : *SrcModule) {
1745       if (!F.hasName())
1746         continue;
1747       auto GUID = F.getGUID();
1748       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1749 
1750       bool ImportDefinition =
1751           (MaybeImportType &&
1752            (*MaybeImportType == GlobalValueSummary::Definition));
1753 
1754       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1755                         << " importing function"
1756                         << (ImportDefinition
1757                                 ? " definition "
1758                                 : (MaybeImportType ? " declaration " : " "))
1759                         << GUID << " " << F.getName() << " from "
1760                         << SrcModule->getSourceFileName() << "\n");
1761       if (ImportDefinition) {
1762         if (Error Err = F.materialize())
1763           return std::move(Err);
1764         // MemProf should match function's definition and summary,
1765         // 'thinlto_src_module' is needed.
1766         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1767           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1768           // statistics and debugging.
1769           F.setMetadata(
1770               "thinlto_src_module",
1771               MDNode::get(DestModule.getContext(),
1772                           {MDString::get(DestModule.getContext(),
1773                                          SrcModule->getModuleIdentifier())}));
1774           F.setMetadata(
1775               "thinlto_src_file",
1776               MDNode::get(DestModule.getContext(),
1777                           {MDString::get(DestModule.getContext(),
1778                                          SrcModule->getSourceFileName())}));
1779         }
1780         GlobalsToImport.insert(&F);
1781       }
1782     }
1783     for (GlobalVariable &GV : SrcModule->globals()) {
1784       if (!GV.hasName())
1785         continue;
1786       auto GUID = GV.getGUID();
1787       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1788 
1789       bool ImportDefinition =
1790           (MaybeImportType &&
1791            (*MaybeImportType == GlobalValueSummary::Definition));
1792 
1793       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1794                         << " importing global"
1795                         << (ImportDefinition
1796                                 ? " definition "
1797                                 : (MaybeImportType ? " declaration " : " "))
1798                         << GUID << " " << GV.getName() << " from "
1799                         << SrcModule->getSourceFileName() << "\n");
1800       if (ImportDefinition) {
1801         if (Error Err = GV.materialize())
1802           return std::move(Err);
1803         ImportedGVCount += GlobalsToImport.insert(&GV);
1804       }
1805     }
1806     for (GlobalAlias &GA : SrcModule->aliases()) {
1807       if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1808         continue;
1809       auto GUID = GA.getGUID();
1810       auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1811 
1812       bool ImportDefinition =
1813           (MaybeImportType &&
1814            (*MaybeImportType == GlobalValueSummary::Definition));
1815 
1816       LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1817                         << " importing alias"
1818                         << (ImportDefinition
1819                                 ? " definition "
1820                                 : (MaybeImportType ? " declaration " : " "))
1821                         << GUID << " " << GA.getName() << " from "
1822                         << SrcModule->getSourceFileName() << "\n");
1823       if (ImportDefinition) {
1824         if (Error Err = GA.materialize())
1825           return std::move(Err);
1826         // Import alias as a copy of its aliasee.
1827         GlobalObject *GO = GA.getAliaseeObject();
1828         if (Error Err = GO->materialize())
1829           return std::move(Err);
1830         auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1831         LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1832                           << GO->getName() << " from "
1833                           << SrcModule->getSourceFileName() << "\n");
1834         if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1835           // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1836           // statistics and debugging.
1837           Fn->setMetadata(
1838               "thinlto_src_module",
1839               MDNode::get(DestModule.getContext(),
1840                           {MDString::get(DestModule.getContext(),
1841                                          SrcModule->getModuleIdentifier())}));
1842           Fn->setMetadata(
1843               "thinlto_src_file",
1844               MDNode::get(DestModule.getContext(),
1845                           {MDString::get(DestModule.getContext(),
1846                                          SrcModule->getSourceFileName())}));
1847         }
1848         GlobalsToImport.insert(Fn);
1849       }
1850     }
1851 
1852     // Upgrade debug info after we're done materializing all the globals and we
1853     // have loaded all the required metadata!
1854     UpgradeDebugInfo(*SrcModule);
1855 
1856     // Set the partial sample profile ratio in the profile summary module flag
1857     // of the imported source module, if applicable, so that the profile summary
1858     // module flag will match with that of the destination module when it's
1859     // imported.
1860     SrcModule->setPartialSampleProfileRatio(Index);
1861 
1862     // Link in the specified functions.
1863     if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1864                                &GlobalsToImport))
1865       return true;
1866 
1867     if (PrintImports) {
1868       for (const auto *GV : GlobalsToImport)
1869         dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1870                << " from " << SrcModule->getSourceFileName() << "\n";
1871     }
1872 
1873     if (Error Err = Mover.move(std::move(SrcModule),
1874                                GlobalsToImport.getArrayRef(), nullptr,
1875                                /*IsPerformingImport=*/true))
1876       return createStringError(errc::invalid_argument,
1877                                Twine("Function Import: link error: ") +
1878                                    toString(std::move(Err)));
1879 
1880     ImportedCount += GlobalsToImport.size();
1881     NumImportedModules++;
1882   }
1883 
1884   internalizeGVsAfterImport(DestModule);
1885 
1886   NumImportedFunctions += (ImportedCount - ImportedGVCount);
1887   NumImportedGlobalVars += ImportedGVCount;
1888 
1889   // TODO: Print counters for definitions and declarations in the debugging log.
1890   LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1891                     << " functions for Module "
1892                     << DestModule.getModuleIdentifier() << "\n");
1893   LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1894                     << " global variables for Module "
1895                     << DestModule.getModuleIdentifier() << "\n");
1896   return ImportedCount;
1897 }
1898 
doImportingForModuleForTest(Module & M,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing)1899 static bool doImportingForModuleForTest(
1900     Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1901                    isPrevailing) {
1902   if (SummaryFile.empty())
1903     report_fatal_error("error: -function-import requires -summary-file\n");
1904   Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1905       getModuleSummaryIndexForFile(SummaryFile);
1906   if (!IndexPtrOrErr) {
1907     logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1908                           "Error loading file '" + SummaryFile + "': ");
1909     return false;
1910   }
1911   std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1912 
1913   // First step is collecting the import list.
1914   FunctionImporter::ImportMapTy ImportList;
1915   // If requested, simply import all functions in the index. This is used
1916   // when testing distributed backend handling via the opt tool, when
1917   // we have distributed indexes containing exactly the summaries to import.
1918   if (ImportAllIndex)
1919     ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1920                                                       *Index, ImportList);
1921   else
1922     ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1923                                              isPrevailing, *Index, ImportList);
1924 
1925   // Conservatively mark all internal values as promoted. This interface is
1926   // only used when doing importing via the function importing pass. The pass
1927   // is only enabled when testing importing via the 'opt' tool, which does
1928   // not do the ThinLink that would normally determine what values to promote.
1929   for (auto &I : *Index) {
1930     for (auto &S : I.second.SummaryList) {
1931       if (GlobalValue::isLocalLinkage(S->linkage()))
1932         S->setLinkage(GlobalValue::ExternalLinkage);
1933     }
1934   }
1935 
1936   // Next we need to promote to global scope and rename any local values that
1937   // are potentially exported to other modules.
1938   if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1939                              /*GlobalsToImport=*/nullptr)) {
1940     errs() << "Error renaming module\n";
1941     return true;
1942   }
1943 
1944   // Perform the import now.
1945   auto ModuleLoader = [&M](StringRef Identifier) {
1946     return loadFile(std::string(Identifier), M.getContext());
1947   };
1948   FunctionImporter Importer(*Index, ModuleLoader,
1949                             /*ClearDSOLocalOnDeclarations=*/false);
1950   Expected<bool> Result = Importer.importFunctions(M, ImportList);
1951 
1952   // FIXME: Probably need to propagate Errors through the pass manager.
1953   if (!Result) {
1954     logAllUnhandledErrors(Result.takeError(), errs(),
1955                           "Error importing module: ");
1956     return true;
1957   }
1958 
1959   return true;
1960 }
1961 
run(Module & M,ModuleAnalysisManager & AM)1962 PreservedAnalyses FunctionImportPass::run(Module &M,
1963                                           ModuleAnalysisManager &AM) {
1964   // This is only used for testing the function import pass via opt, where we
1965   // don't have prevailing information from the LTO context available, so just
1966   // conservatively assume everything is prevailing (which is fine for the very
1967   // limited use of prevailing checking in this pass).
1968   auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1969     return true;
1970   };
1971   if (!doImportingForModuleForTest(M, isPrevailing))
1972     return PreservedAnalyses::all();
1973 
1974   return PreservedAnalyses::none();
1975 }
1976