1 //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements Function import based on summaries.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Transforms/IPO/FunctionImport.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/IR/AutoUpgrade.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalAlias.h"
25 #include "llvm/IR/GlobalObject.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/Metadata.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/ModuleSummaryIndex.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/Linker/IRMover.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Errc.h"
37 #include "llvm/Support/Error.h"
38 #include "llvm/Support/ErrorHandling.h"
39 #include "llvm/Support/FileSystem.h"
40 #include "llvm/Support/JSON.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/IPO/Internalize.h"
44 #include "llvm/Transforms/Utils/Cloning.h"
45 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
46 #include "llvm/Transforms/Utils/ValueMapper.h"
47 #include <cassert>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <system_error>
52 #include <tuple>
53 #include <utility>
54
55 using namespace llvm;
56
57 #define DEBUG_TYPE "function-import"
58
59 STATISTIC(NumImportedFunctionsThinLink,
60 "Number of functions thin link decided to import");
61 STATISTIC(NumImportedHotFunctionsThinLink,
62 "Number of hot functions thin link decided to import");
63 STATISTIC(NumImportedCriticalFunctionsThinLink,
64 "Number of critical functions thin link decided to import");
65 STATISTIC(NumImportedGlobalVarsThinLink,
66 "Number of global variables thin link decided to import");
67 STATISTIC(NumImportedFunctions, "Number of functions imported in backend");
68 STATISTIC(NumImportedGlobalVars,
69 "Number of global variables imported in backend");
70 STATISTIC(NumImportedModules, "Number of modules imported from");
71 STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index");
72 STATISTIC(NumLiveSymbols, "Number of live symbols in index");
73
74 /// Limit on instruction count of imported functions.
75 static cl::opt<unsigned> ImportInstrLimit(
76 "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
77 cl::desc("Only import functions with less than N instructions"));
78
79 static cl::opt<int> ImportCutoff(
80 "import-cutoff", cl::init(-1), cl::Hidden, cl::value_desc("N"),
81 cl::desc("Only import first N functions if N>=0 (default -1)"));
82
83 static cl::opt<bool>
84 ForceImportAll("force-import-all", cl::init(false), cl::Hidden,
85 cl::desc("Import functions with noinline attribute"));
86
87 static cl::opt<float>
88 ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
89 cl::Hidden, cl::value_desc("x"),
90 cl::desc("As we import functions, multiply the "
91 "`import-instr-limit` threshold by this factor "
92 "before processing newly imported functions"));
93
94 static cl::opt<float> ImportHotInstrFactor(
95 "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
96 cl::value_desc("x"),
97 cl::desc("As we import functions called from hot callsite, multiply the "
98 "`import-instr-limit` threshold by this factor "
99 "before processing newly imported functions"));
100
101 static cl::opt<float> ImportHotMultiplier(
102 "import-hot-multiplier", cl::init(10.0), cl::Hidden, cl::value_desc("x"),
103 cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
104
105 static cl::opt<float> ImportCriticalMultiplier(
106 "import-critical-multiplier", cl::init(100.0), cl::Hidden,
107 cl::value_desc("x"),
108 cl::desc(
109 "Multiply the `import-instr-limit` threshold for critical callsites"));
110
111 // FIXME: This multiplier was not really tuned up.
112 static cl::opt<float> ImportColdMultiplier(
113 "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
114 cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
115
116 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
117 cl::desc("Print imported functions"));
118
119 static cl::opt<bool> PrintImportFailures(
120 "print-import-failures", cl::init(false), cl::Hidden,
121 cl::desc("Print information for functions rejected for importing"));
122
123 static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
124 cl::desc("Compute dead symbols"));
125
126 static cl::opt<bool> EnableImportMetadata(
127 "enable-import-metadata", cl::init(false), cl::Hidden,
128 cl::desc("Enable import metadata like 'thinlto_src_module' and "
129 "'thinlto_src_file'"));
130
131 /// Summary file to use for function importing when using -function-import from
132 /// the command line.
133 static cl::opt<std::string>
134 SummaryFile("summary-file",
135 cl::desc("The summary file to use for function importing."));
136
137 /// Used when testing importing from distributed indexes via opt
138 // -function-import.
139 static cl::opt<bool>
140 ImportAllIndex("import-all-index",
141 cl::desc("Import all external functions in index."));
142
143 /// This is a test-only option.
144 /// If this option is enabled, the ThinLTO indexing step will import each
145 /// function declaration as a fallback. In a real build this may increase ram
146 /// usage of the indexing step unnecessarily.
147 /// TODO: Implement selective import (based on combined summary analysis) to
148 /// ensure the imported function has a use case in the postlink pipeline.
149 static cl::opt<bool> ImportDeclaration(
150 "import-declaration", cl::init(false), cl::Hidden,
151 cl::desc("If true, import function declaration as fallback if the function "
152 "definition is not imported."));
153
154 /// Pass a workload description file - an example of workload would be the
155 /// functions executed to satisfy a RPC request. A workload is defined by a root
156 /// function and the list of functions that are (frequently) needed to satisfy
157 /// it. The module that defines the root will have all those functions imported.
158 /// The file contains a JSON dictionary. The keys are root functions, the values
159 /// are lists of functions to import in the module defining the root. It is
160 /// assumed -funique-internal-linkage-names was used, thus ensuring function
161 /// names are unique even for local linkage ones.
162 static cl::opt<std::string> WorkloadDefinitions(
163 "thinlto-workload-def",
164 cl::desc("Pass a workload definition. This is a file containing a JSON "
165 "dictionary. The keys are root functions, the values are lists of "
166 "functions to import in the module defining the root. It is "
167 "assumed -funique-internal-linkage-names was used, to ensure "
168 "local linkage functions have unique names. For example: \n"
169 "{\n"
170 " \"rootFunction_1\": [\"function_to_import_1\", "
171 "\"function_to_import_2\"], \n"
172 " \"rootFunction_2\": [\"function_to_import_3\", "
173 "\"function_to_import_4\"] \n"
174 "}"),
175 cl::Hidden);
176
177 namespace llvm {
178 extern cl::opt<bool> EnableMemProfContextDisambiguation;
179 }
180
181 // Load lazily a module from \p FileName in \p Context.
loadFile(const std::string & FileName,LLVMContext & Context)182 static std::unique_ptr<Module> loadFile(const std::string &FileName,
183 LLVMContext &Context) {
184 SMDiagnostic Err;
185 LLVM_DEBUG(dbgs() << "Loading '" << FileName << "'\n");
186 // Metadata isn't loaded until functions are imported, to minimize
187 // the memory overhead.
188 std::unique_ptr<Module> Result =
189 getLazyIRFileModule(FileName, Err, Context,
190 /* ShouldLazyLoadMetadata = */ true);
191 if (!Result) {
192 Err.print("function-import", errs());
193 report_fatal_error("Abort");
194 }
195
196 return Result;
197 }
198
199 /// Given a list of possible callee implementation for a call site, qualify the
200 /// legality of importing each. The return is a range of pairs. Each pair
201 /// corresponds to a candidate. The first value is the ImportFailureReason for
202 /// that candidate, the second is the candidate.
qualifyCalleeCandidates(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,StringRef CallerModulePath)203 static auto qualifyCalleeCandidates(
204 const ModuleSummaryIndex &Index,
205 ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
206 StringRef CallerModulePath) {
207 return llvm::map_range(
208 CalleeSummaryList,
209 [&Index, CalleeSummaryList,
210 CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
211 -> std::pair<FunctionImporter::ImportFailureReason,
212 const GlobalValueSummary *> {
213 auto *GVSummary = SummaryPtr.get();
214 if (!Index.isGlobalValueLive(GVSummary))
215 return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
216
217 if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
218 return {FunctionImporter::ImportFailureReason::InterposableLinkage,
219 GVSummary};
220
221 auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
222
223 // Ignore any callees that aren't actually functions. This could happen
224 // in the case of GUID hash collisions. It could also happen in theory
225 // for SamplePGO profiles collected on old versions of the code after
226 // renaming, since we synthesize edges to any inlined callees appearing
227 // in the profile.
228 if (!Summary)
229 return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
230
231 // If this is a local function, make sure we import the copy
232 // in the caller's module. The only time a local function can
233 // share an entry in the index is if there is a local with the same name
234 // in another module that had the same source file name (in a different
235 // directory), where each was compiled in their own directory so there
236 // was not distinguishing path.
237 // However, do the import from another module if there is only one
238 // entry in the list - in that case this must be a reference due
239 // to indirect call profile data, since a function pointer can point to
240 // a local in another module.
241 if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
242 CalleeSummaryList.size() > 1 &&
243 Summary->modulePath() != CallerModulePath)
244 return {
245 FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
246 GVSummary};
247
248 // Skip if it isn't legal to import (e.g. may reference unpromotable
249 // locals).
250 if (Summary->notEligibleToImport())
251 return {FunctionImporter::ImportFailureReason::NotEligible,
252 GVSummary};
253
254 return {FunctionImporter::ImportFailureReason::None, GVSummary};
255 });
256 }
257
258 /// Given a list of possible callee implementation for a call site, select one
259 /// that fits the \p Threshold for function definition import. If none are
260 /// found, the Reason will give the last reason for the failure (last, in the
261 /// order of CalleeSummaryList entries). While looking for a callee definition,
262 /// sets \p TooLargeOrNoInlineSummary to the last seen too-large or noinline
263 /// candidate; other modules may want to know the function summary or
264 /// declaration even if a definition is not needed.
265 ///
266 /// FIXME: select "best" instead of first that fits. But what is "best"?
267 /// - The smallest: more likely to be inlined.
268 /// - The one with the least outgoing edges (already well optimized).
269 /// - One from a module already being imported from in order to reduce the
270 /// number of source modules parsed/linked.
271 /// - One that has PGO data attached.
272 /// - [insert you fancy metric here]
273 static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex & Index,ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,unsigned Threshold,StringRef CallerModulePath,const GlobalValueSummary * & TooLargeOrNoInlineSummary,FunctionImporter::ImportFailureReason & Reason)274 selectCallee(const ModuleSummaryIndex &Index,
275 ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
276 unsigned Threshold, StringRef CallerModulePath,
277 const GlobalValueSummary *&TooLargeOrNoInlineSummary,
278 FunctionImporter::ImportFailureReason &Reason) {
279 // Records the last summary with reason noinline or too-large.
280 TooLargeOrNoInlineSummary = nullptr;
281 auto QualifiedCandidates =
282 qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
283 for (auto QualifiedValue : QualifiedCandidates) {
284 Reason = QualifiedValue.first;
285 // Skip a summary if its import is not (proved to be) legal.
286 if (Reason != FunctionImporter::ImportFailureReason::None)
287 continue;
288 auto *Summary =
289 cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
290
291 // Don't bother importing the definition if the chance of inlining it is
292 // not high enough (except under `--force-import-all`).
293 if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
294 !ForceImportAll) {
295 TooLargeOrNoInlineSummary = Summary;
296 Reason = FunctionImporter::ImportFailureReason::TooLarge;
297 continue;
298 }
299
300 // Don't bother importing the definition if we can't inline it anyway.
301 if (Summary->fflags().NoInline && !ForceImportAll) {
302 TooLargeOrNoInlineSummary = Summary;
303 Reason = FunctionImporter::ImportFailureReason::NoInline;
304 continue;
305 }
306
307 return Summary;
308 }
309 return nullptr;
310 }
311
312 namespace {
313
314 using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
315
316 } // anonymous namespace
317
318 /// Import globals referenced by a function or other globals that are being
319 /// imported, if importing such global is possible.
320 class GlobalsImporter final {
321 const ModuleSummaryIndex &Index;
322 const GVSummaryMapTy &DefinedGVSummaries;
323 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
324 IsPrevailing;
325 FunctionImporter::ImportMapTy &ImportList;
326 DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
327
shouldImportGlobal(const ValueInfo & VI)328 bool shouldImportGlobal(const ValueInfo &VI) {
329 const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
330 if (GVS == DefinedGVSummaries.end())
331 return true;
332 // We should not skip import if the module contains a non-prevailing
333 // definition with interposable linkage type. This is required for
334 // correctness in the situation where there is a prevailing def available
335 // for import and marked read-only. In this case, the non-prevailing def
336 // will be converted to a declaration, while the prevailing one becomes
337 // internal, thus no definitions will be available for linking. In order to
338 // prevent undefined symbol link error, the prevailing definition must be
339 // imported.
340 // FIXME: Consider adding a check that the suitable prevailing definition
341 // exists and marked read-only.
342 if (VI.getSummaryList().size() > 1 &&
343 GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
344 !IsPrevailing(VI.getGUID(), GVS->second))
345 return true;
346
347 return false;
348 }
349
350 void
onImportingSummaryImpl(const GlobalValueSummary & Summary,SmallVectorImpl<const GlobalVarSummary * > & Worklist)351 onImportingSummaryImpl(const GlobalValueSummary &Summary,
352 SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
353 for (const auto &VI : Summary.refs()) {
354 if (!shouldImportGlobal(VI)) {
355 LLVM_DEBUG(
356 dbgs() << "Ref ignored! Target already in destination module.\n");
357 continue;
358 }
359
360 LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
361
362 // If this is a local variable, make sure we import the copy
363 // in the caller's module. The only time a local variable can
364 // share an entry in the index is if there is a local with the same name
365 // in another module that had the same source file name (in a different
366 // directory), where each was compiled in their own directory so there
367 // was not distinguishing path.
368 auto LocalNotInModule =
369 [&](const GlobalValueSummary *RefSummary) -> bool {
370 return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
371 RefSummary->modulePath() != Summary.modulePath();
372 };
373
374 for (const auto &RefSummary : VI.getSummaryList()) {
375 const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
376 // Functions could be referenced by global vars - e.g. a vtable; but we
377 // don't currently imagine a reason those would be imported here, rather
378 // than as part of the logic deciding which functions to import (i.e.
379 // based on profile information). Should we decide to handle them here,
380 // we can refactor accordingly at that time.
381 if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
382 LocalNotInModule(GVS))
383 continue;
384
385 // If there isn't an entry for GUID, insert <GUID, Definition> pair.
386 // Otherwise, definition should take precedence over declaration.
387 auto [Iter, Inserted] =
388 ImportList[RefSummary->modulePath()].try_emplace(
389 VI.getGUID(), GlobalValueSummary::Definition);
390 // Only update stat and exports if we haven't already imported this
391 // variable.
392 if (!Inserted) {
393 // Set the value to 'std::min(existing-value, new-value)' to make
394 // sure a definition takes precedence over a declaration.
395 Iter->second = std::min(GlobalValueSummary::Definition, Iter->second);
396 break;
397 }
398 NumImportedGlobalVarsThinLink++;
399 // Any references made by this variable will be marked exported
400 // later, in ComputeCrossModuleImport, after import decisions are
401 // complete, which is more efficient than adding them here.
402 if (ExportLists)
403 (*ExportLists)[RefSummary->modulePath()].insert(VI);
404
405 // If variable is not writeonly we attempt to recursively analyze
406 // its references in order to import referenced constants.
407 if (!Index.isWriteOnly(GVS))
408 Worklist.emplace_back(GVS);
409 break;
410 }
411 }
412 }
413
414 public:
GlobalsImporter(const ModuleSummaryIndex & Index,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)415 GlobalsImporter(
416 const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
417 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
418 IsPrevailing,
419 FunctionImporter::ImportMapTy &ImportList,
420 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
421 : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
422 IsPrevailing(IsPrevailing), ImportList(ImportList),
423 ExportLists(ExportLists) {}
424
onImportingSummary(const GlobalValueSummary & Summary)425 void onImportingSummary(const GlobalValueSummary &Summary) {
426 SmallVector<const GlobalVarSummary *, 128> Worklist;
427 onImportingSummaryImpl(Summary, Worklist);
428 while (!Worklist.empty())
429 onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
430 }
431 };
432
433 static const char *getFailureName(FunctionImporter::ImportFailureReason Reason);
434
435 /// Determine the list of imports and exports for each module.
436 class ModuleImportsManager {
437 protected:
438 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
439 IsPrevailing;
440 const ModuleSummaryIndex &Index;
441 DenseMap<StringRef, FunctionImporter::ExportSetTy> *const ExportLists;
442
ModuleImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists=nullptr)443 ModuleImportsManager(
444 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
445 IsPrevailing,
446 const ModuleSummaryIndex &Index,
447 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists = nullptr)
448 : IsPrevailing(IsPrevailing), Index(Index), ExportLists(ExportLists) {}
449
450 public:
451 virtual ~ModuleImportsManager() = default;
452
453 /// Given the list of globals defined in a module, compute the list of imports
454 /// as well as the list of "exports", i.e. the list of symbols referenced from
455 /// another module (that may require promotion).
456 virtual void
457 computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
458 StringRef ModName,
459 FunctionImporter::ImportMapTy &ImportList);
460
461 static std::unique_ptr<ModuleImportsManager>
462 create(function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
463 IsPrevailing,
464 const ModuleSummaryIndex &Index,
465 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists =
466 nullptr);
467 };
468
469 /// A ModuleImportsManager that operates based on a workload definition (see
470 /// -thinlto-workload-def). For modules that do not define workload roots, it
471 /// applies the base ModuleImportsManager import policy.
472 class WorkloadImportsManager : public ModuleImportsManager {
473 // Keep a module name -> value infos to import association. We use it to
474 // determine if a module's import list should be done by the base
475 // ModuleImportsManager or by us.
476 StringMap<DenseSet<ValueInfo>> Workloads;
477
478 void
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)479 computeImportForModule(const GVSummaryMapTy &DefinedGVSummaries,
480 StringRef ModName,
481 FunctionImporter::ImportMapTy &ImportList) override {
482 auto SetIter = Workloads.find(ModName);
483 if (SetIter == Workloads.end()) {
484 LLVM_DEBUG(dbgs() << "[Workload] " << ModName
485 << " does not contain the root of any context.\n");
486 return ModuleImportsManager::computeImportForModule(DefinedGVSummaries,
487 ModName, ImportList);
488 }
489 LLVM_DEBUG(dbgs() << "[Workload] " << ModName
490 << " contains the root(s) of context(s).\n");
491
492 GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
493 ExportLists);
494 auto &ValueInfos = SetIter->second;
495 SmallVector<EdgeInfo, 128> GlobWorklist;
496 for (auto &VI : llvm::make_early_inc_range(ValueInfos)) {
497 auto It = DefinedGVSummaries.find(VI.getGUID());
498 if (It != DefinedGVSummaries.end() &&
499 IsPrevailing(VI.getGUID(), It->second)) {
500 LLVM_DEBUG(
501 dbgs() << "[Workload] " << VI.name()
502 << " has the prevailing variant already in the module "
503 << ModName << ". No need to import\n");
504 continue;
505 }
506 auto Candidates =
507 qualifyCalleeCandidates(Index, VI.getSummaryList(), ModName);
508
509 const GlobalValueSummary *GVS = nullptr;
510 auto PotentialCandidates = llvm::map_range(
511 llvm::make_filter_range(
512 Candidates,
513 [&](const auto &Candidate) {
514 LLVM_DEBUG(dbgs() << "[Workflow] Candidate for " << VI.name()
515 << " from " << Candidate.second->modulePath()
516 << " ImportFailureReason: "
517 << getFailureName(Candidate.first) << "\n");
518 return Candidate.first ==
519 FunctionImporter::ImportFailureReason::None;
520 }),
521 [](const auto &Candidate) { return Candidate.second; });
522 if (PotentialCandidates.empty()) {
523 LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
524 << " because can't find eligible Callee. Guid is: "
525 << Function::getGUID(VI.name()) << "\n");
526 continue;
527 }
528 /// We will prefer importing the prevailing candidate, if not, we'll
529 /// still pick the first available candidate. The reason we want to make
530 /// sure we do import the prevailing candidate is because the goal of
531 /// workload-awareness is to enable optimizations specializing the call
532 /// graph of that workload. Suppose a function is already defined in the
533 /// module, but it's not the prevailing variant. Suppose also we do not
534 /// inline it (in fact, if it were interposable, we can't inline it),
535 /// but we could specialize it to the workload in other ways. However,
536 /// the linker would drop it in the favor of the prevailing copy.
537 /// Instead, by importing the prevailing variant (assuming also the use
538 /// of `-avail-extern-to-local`), we keep the specialization. We could
539 /// alteranatively make the non-prevailing variant local, but the
540 /// prevailing one is also the one for which we would have previously
541 /// collected profiles, making it preferrable.
542 auto PrevailingCandidates = llvm::make_filter_range(
543 PotentialCandidates, [&](const auto *Candidate) {
544 return IsPrevailing(VI.getGUID(), Candidate);
545 });
546 if (PrevailingCandidates.empty()) {
547 GVS = *PotentialCandidates.begin();
548 if (!llvm::hasSingleElement(PotentialCandidates) &&
549 GlobalValue::isLocalLinkage(GVS->linkage()))
550 LLVM_DEBUG(
551 dbgs()
552 << "[Workload] Found multiple non-prevailing candidates for "
553 << VI.name()
554 << ". This is unexpected. Are module paths passed to the "
555 "compiler unique for the modules passed to the linker?");
556 // We could in theory have multiple (interposable) copies of a symbol
557 // when there is no prevailing candidate, if say the prevailing copy was
558 // in a native object being linked in. However, we should in theory be
559 // marking all of these non-prevailing IR copies dead in that case, in
560 // which case they won't be candidates.
561 assert(GVS->isLive());
562 } else {
563 assert(llvm::hasSingleElement(PrevailingCandidates));
564 GVS = *PrevailingCandidates.begin();
565 }
566
567 auto ExportingModule = GVS->modulePath();
568 // We checked that for the prevailing case, but if we happen to have for
569 // example an internal that's defined in this module, it'd have no
570 // PrevailingCandidates.
571 if (ExportingModule == ModName) {
572 LLVM_DEBUG(dbgs() << "[Workload] Not importing " << VI.name()
573 << " because its defining module is the same as the "
574 "current module\n");
575 continue;
576 }
577 LLVM_DEBUG(dbgs() << "[Workload][Including]" << VI.name() << " from "
578 << ExportingModule << " : "
579 << Function::getGUID(VI.name()) << "\n");
580 ImportList[ExportingModule][VI.getGUID()] =
581 GlobalValueSummary::Definition;
582 GVI.onImportingSummary(*GVS);
583 if (ExportLists)
584 (*ExportLists)[ExportingModule].insert(VI);
585 }
586 LLVM_DEBUG(dbgs() << "[Workload] Done\n");
587 }
588
589 public:
WorkloadImportsManager(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)590 WorkloadImportsManager(
591 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
592 IsPrevailing,
593 const ModuleSummaryIndex &Index,
594 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
595 : ModuleImportsManager(IsPrevailing, Index, ExportLists) {
596 // Since the workload def uses names, we need a quick lookup
597 // name->ValueInfo.
598 StringMap<ValueInfo> NameToValueInfo;
599 StringSet<> AmbiguousNames;
600 for (auto &I : Index) {
601 ValueInfo VI = Index.getValueInfo(I);
602 if (!NameToValueInfo.insert(std::make_pair(VI.name(), VI)).second)
603 LLVM_DEBUG(AmbiguousNames.insert(VI.name()));
604 }
605 auto DbgReportIfAmbiguous = [&](StringRef Name) {
606 LLVM_DEBUG(if (AmbiguousNames.count(Name) > 0) {
607 dbgs() << "[Workload] Function name " << Name
608 << " present in the workload definition is ambiguous. Consider "
609 "compiling with -funique-internal-linkage-names.";
610 });
611 };
612 std::error_code EC;
613 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(WorkloadDefinitions);
614 if (std::error_code EC = BufferOrErr.getError()) {
615 report_fatal_error("Failed to open context file");
616 return;
617 }
618 auto Buffer = std::move(BufferOrErr.get());
619 std::map<std::string, std::vector<std::string>> WorkloadDefs;
620 json::Path::Root NullRoot;
621 // The JSON is supposed to contain a dictionary matching the type of
622 // WorkloadDefs. For example:
623 // {
624 // "rootFunction_1": ["function_to_import_1", "function_to_import_2"],
625 // "rootFunction_2": ["function_to_import_3", "function_to_import_4"]
626 // }
627 auto Parsed = json::parse(Buffer->getBuffer());
628 if (!Parsed)
629 report_fatal_error(Parsed.takeError());
630 if (!json::fromJSON(*Parsed, WorkloadDefs, NullRoot))
631 report_fatal_error("Invalid thinlto contextual profile format.");
632 for (const auto &Workload : WorkloadDefs) {
633 const auto &Root = Workload.first;
634 DbgReportIfAmbiguous(Root);
635 LLVM_DEBUG(dbgs() << "[Workload] Root: " << Root << "\n");
636 const auto &AllCallees = Workload.second;
637 auto RootIt = NameToValueInfo.find(Root);
638 if (RootIt == NameToValueInfo.end()) {
639 LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
640 << " not found in this linkage unit.\n");
641 continue;
642 }
643 auto RootVI = RootIt->second;
644 if (RootVI.getSummaryList().size() != 1) {
645 LLVM_DEBUG(dbgs() << "[Workload] Root " << Root
646 << " should have exactly one summary, but has "
647 << RootVI.getSummaryList().size() << ". Skipping.\n");
648 continue;
649 }
650 StringRef RootDefiningModule =
651 RootVI.getSummaryList().front()->modulePath();
652 LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << Root
653 << " is : " << RootDefiningModule << "\n");
654 auto &Set = Workloads[RootDefiningModule];
655 for (const auto &Callee : AllCallees) {
656 LLVM_DEBUG(dbgs() << "[Workload] " << Callee << "\n");
657 DbgReportIfAmbiguous(Callee);
658 auto ElemIt = NameToValueInfo.find(Callee);
659 if (ElemIt == NameToValueInfo.end()) {
660 LLVM_DEBUG(dbgs() << "[Workload] " << Callee << " not found\n");
661 continue;
662 }
663 Set.insert(ElemIt->second);
664 }
665 LLVM_DEBUG({
666 dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
667 << " distinct callees.\n";
668 for (const auto &VI : Set) {
669 dbgs() << "[Workload] Root: " << Root
670 << " Would include: " << VI.getGUID() << "\n";
671 }
672 });
673 }
674 }
675 };
676
create(function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing,const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists)677 std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
678 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
679 IsPrevailing,
680 const ModuleSummaryIndex &Index,
681 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
682 if (WorkloadDefinitions.empty()) {
683 LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
684 return std::unique_ptr<ModuleImportsManager>(
685 new ModuleImportsManager(IsPrevailing, Index, ExportLists));
686 }
687 LLVM_DEBUG(dbgs() << "[Workload] Using the contextual imports manager.\n");
688 return std::make_unique<WorkloadImportsManager>(IsPrevailing, Index,
689 ExportLists);
690 }
691
692 static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason)693 getFailureName(FunctionImporter::ImportFailureReason Reason) {
694 switch (Reason) {
695 case FunctionImporter::ImportFailureReason::None:
696 return "None";
697 case FunctionImporter::ImportFailureReason::GlobalVar:
698 return "GlobalVar";
699 case FunctionImporter::ImportFailureReason::NotLive:
700 return "NotLive";
701 case FunctionImporter::ImportFailureReason::TooLarge:
702 return "TooLarge";
703 case FunctionImporter::ImportFailureReason::InterposableLinkage:
704 return "InterposableLinkage";
705 case FunctionImporter::ImportFailureReason::LocalLinkageNotInModule:
706 return "LocalLinkageNotInModule";
707 case FunctionImporter::ImportFailureReason::NotEligible:
708 return "NotEligible";
709 case FunctionImporter::ImportFailureReason::NoInline:
710 return "NoInline";
711 }
712 llvm_unreachable("invalid reason");
713 }
714
715 /// Compute the list of functions to import for a given caller. Mark these
716 /// imported functions and the symbols they reference in their source module as
717 /// exported from their source module.
computeImportForFunction(const FunctionSummary & Summary,const ModuleSummaryIndex & Index,const unsigned Threshold,const GVSummaryMapTy & DefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,SmallVectorImpl<EdgeInfo> & Worklist,GlobalsImporter & GVImporter,FunctionImporter::ImportMapTy & ImportList,DenseMap<StringRef,FunctionImporter::ExportSetTy> * ExportLists,FunctionImporter::ImportThresholdsTy & ImportThresholds)718 static void computeImportForFunction(
719 const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
720 const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
721 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
722 isPrevailing,
723 SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
724 FunctionImporter::ImportMapTy &ImportList,
725 DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists,
726 FunctionImporter::ImportThresholdsTy &ImportThresholds) {
727 GVImporter.onImportingSummary(Summary);
728 static int ImportCount = 0;
729 for (const auto &Edge : Summary.calls()) {
730 ValueInfo VI = Edge.first;
731 LLVM_DEBUG(dbgs() << " edge -> " << VI << " Threshold:" << Threshold
732 << "\n");
733
734 if (ImportCutoff >= 0 && ImportCount >= ImportCutoff) {
735 LLVM_DEBUG(dbgs() << "ignored! import-cutoff value of " << ImportCutoff
736 << " reached.\n");
737 continue;
738 }
739
740 if (DefinedGVSummaries.count(VI.getGUID())) {
741 // FIXME: Consider not skipping import if the module contains
742 // a non-prevailing def with interposable linkage. The prevailing copy
743 // can safely be imported (see shouldImportGlobal()).
744 LLVM_DEBUG(dbgs() << "ignored! Target already in destination module.\n");
745 continue;
746 }
747
748 auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
749 if (Hotness == CalleeInfo::HotnessType::Hot)
750 return ImportHotMultiplier;
751 if (Hotness == CalleeInfo::HotnessType::Cold)
752 return ImportColdMultiplier;
753 if (Hotness == CalleeInfo::HotnessType::Critical)
754 return ImportCriticalMultiplier;
755 return 1.0;
756 };
757
758 const auto NewThreshold =
759 Threshold * GetBonusMultiplier(Edge.second.getHotness());
760
761 auto IT = ImportThresholds.insert(std::make_pair(
762 VI.getGUID(), std::make_tuple(NewThreshold, nullptr, nullptr)));
763 bool PreviouslyVisited = !IT.second;
764 auto &ProcessedThreshold = std::get<0>(IT.first->second);
765 auto &CalleeSummary = std::get<1>(IT.first->second);
766 auto &FailureInfo = std::get<2>(IT.first->second);
767
768 bool IsHotCallsite =
769 Edge.second.getHotness() == CalleeInfo::HotnessType::Hot;
770 bool IsCriticalCallsite =
771 Edge.second.getHotness() == CalleeInfo::HotnessType::Critical;
772
773 const FunctionSummary *ResolvedCalleeSummary = nullptr;
774 if (CalleeSummary) {
775 assert(PreviouslyVisited);
776 // Since the traversal of the call graph is DFS, we can revisit a function
777 // a second time with a higher threshold. In this case, it is added back
778 // to the worklist with the new threshold (so that its own callee chains
779 // can be considered with the higher threshold).
780 if (NewThreshold <= ProcessedThreshold) {
781 LLVM_DEBUG(
782 dbgs() << "ignored! Target was already imported with Threshold "
783 << ProcessedThreshold << "\n");
784 continue;
785 }
786 // Update with new larger threshold.
787 ProcessedThreshold = NewThreshold;
788 ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
789 } else {
790 // If we already rejected importing a callee at the same or higher
791 // threshold, don't waste time calling selectCallee.
792 if (PreviouslyVisited && NewThreshold <= ProcessedThreshold) {
793 LLVM_DEBUG(
794 dbgs() << "ignored! Target was already rejected with Threshold "
795 << ProcessedThreshold << "\n");
796 if (PrintImportFailures) {
797 assert(FailureInfo &&
798 "Expected FailureInfo for previously rejected candidate");
799 FailureInfo->Attempts++;
800 }
801 continue;
802 }
803
804 FunctionImporter::ImportFailureReason Reason{};
805
806 // `SummaryForDeclImport` is an summary eligible for declaration import.
807 const GlobalValueSummary *SummaryForDeclImport = nullptr;
808 CalleeSummary =
809 selectCallee(Index, VI.getSummaryList(), NewThreshold,
810 Summary.modulePath(), SummaryForDeclImport, Reason);
811 if (!CalleeSummary) {
812 // There isn't a callee for definition import but one for declaration
813 // import.
814 if (ImportDeclaration && SummaryForDeclImport) {
815 StringRef DeclSourceModule = SummaryForDeclImport->modulePath();
816
817 // Since definition takes precedence over declaration for the same VI,
818 // try emplace <VI, declaration> pair without checking insert result.
819 // If insert doesn't happen, there must be an existing entry keyed by
820 // VI. Note `ExportLists` only keeps track of exports due to imported
821 // definitions.
822 ImportList[DeclSourceModule].try_emplace(
823 VI.getGUID(), GlobalValueSummary::Declaration);
824 }
825 // Update with new larger threshold if this was a retry (otherwise
826 // we would have already inserted with NewThreshold above). Also
827 // update failure info if requested.
828 if (PreviouslyVisited) {
829 ProcessedThreshold = NewThreshold;
830 if (PrintImportFailures) {
831 assert(FailureInfo &&
832 "Expected FailureInfo for previously rejected candidate");
833 FailureInfo->Reason = Reason;
834 FailureInfo->Attempts++;
835 FailureInfo->MaxHotness =
836 std::max(FailureInfo->MaxHotness, Edge.second.getHotness());
837 }
838 } else if (PrintImportFailures) {
839 assert(!FailureInfo &&
840 "Expected no FailureInfo for newly rejected candidate");
841 FailureInfo = std::make_unique<FunctionImporter::ImportFailureInfo>(
842 VI, Edge.second.getHotness(), Reason, 1);
843 }
844 if (ForceImportAll) {
845 std::string Msg = std::string("Failed to import function ") +
846 VI.name().str() + " due to " +
847 getFailureName(Reason);
848 auto Error = make_error<StringError>(
849 Msg, make_error_code(errc::not_supported));
850 logAllUnhandledErrors(std::move(Error), errs(),
851 "Error importing module: ");
852 break;
853 } else {
854 LLVM_DEBUG(dbgs()
855 << "ignored! No qualifying callee with summary found.\n");
856 continue;
857 }
858 }
859
860 // "Resolve" the summary
861 CalleeSummary = CalleeSummary->getBaseObject();
862 ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
863
864 assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll ||
865 (ResolvedCalleeSummary->instCount() <= NewThreshold)) &&
866 "selectCallee() didn't honor the threshold");
867
868 auto ExportModulePath = ResolvedCalleeSummary->modulePath();
869
870 // Try emplace the definition entry, and update stats based on insertion
871 // status.
872 auto [Iter, Inserted] = ImportList[ExportModulePath].try_emplace(
873 VI.getGUID(), GlobalValueSummary::Definition);
874
875 // We previously decided to import this GUID definition if it was already
876 // inserted in the set of imports from the exporting module.
877 if (Inserted || Iter->second == GlobalValueSummary::Declaration) {
878 NumImportedFunctionsThinLink++;
879 if (IsHotCallsite)
880 NumImportedHotFunctionsThinLink++;
881 if (IsCriticalCallsite)
882 NumImportedCriticalFunctionsThinLink++;
883 }
884
885 if (Iter->second == GlobalValueSummary::Declaration)
886 Iter->second = GlobalValueSummary::Definition;
887
888 // Any calls/references made by this function will be marked exported
889 // later, in ComputeCrossModuleImport, after import decisions are
890 // complete, which is more efficient than adding them here.
891 if (ExportLists)
892 (*ExportLists)[ExportModulePath].insert(VI);
893 }
894
895 auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
896 // Adjust the threshold for next level of imported functions.
897 // The threshold is different for hot callsites because we can then
898 // inline chains of hot calls.
899 if (IsHotCallsite)
900 return Threshold * ImportHotInstrFactor;
901 return Threshold * ImportInstrFactor;
902 };
903
904 const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
905
906 ImportCount++;
907
908 // Insert the newly imported function to the worklist.
909 Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold);
910 }
911 }
912
computeImportForModule(const GVSummaryMapTy & DefinedGVSummaries,StringRef ModName,FunctionImporter::ImportMapTy & ImportList)913 void ModuleImportsManager::computeImportForModule(
914 const GVSummaryMapTy &DefinedGVSummaries, StringRef ModName,
915 FunctionImporter::ImportMapTy &ImportList) {
916 // Worklist contains the list of function imported in this module, for which
917 // we will analyse the callees and may import further down the callgraph.
918 SmallVector<EdgeInfo, 128> Worklist;
919 GlobalsImporter GVI(Index, DefinedGVSummaries, IsPrevailing, ImportList,
920 ExportLists);
921 FunctionImporter::ImportThresholdsTy ImportThresholds;
922
923 // Populate the worklist with the import for the functions in the current
924 // module
925 for (const auto &GVSummary : DefinedGVSummaries) {
926 #ifndef NDEBUG
927 // FIXME: Change the GVSummaryMapTy to hold ValueInfo instead of GUID
928 // so this map look up (and possibly others) can be avoided.
929 auto VI = Index.getValueInfo(GVSummary.first);
930 #endif
931 if (!Index.isGlobalValueLive(GVSummary.second)) {
932 LLVM_DEBUG(dbgs() << "Ignores Dead GUID: " << VI << "\n");
933 continue;
934 }
935 auto *FuncSummary =
936 dyn_cast<FunctionSummary>(GVSummary.second->getBaseObject());
937 if (!FuncSummary)
938 // Skip import for global variables
939 continue;
940 LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
941 computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
942 DefinedGVSummaries, IsPrevailing, Worklist, GVI,
943 ImportList, ExportLists, ImportThresholds);
944 }
945
946 // Process the newly imported functions and add callees to the worklist.
947 while (!Worklist.empty()) {
948 auto GVInfo = Worklist.pop_back_val();
949 auto *Summary = std::get<0>(GVInfo);
950 auto Threshold = std::get<1>(GVInfo);
951
952 if (auto *FS = dyn_cast<FunctionSummary>(Summary))
953 computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
954 IsPrevailing, Worklist, GVI, ImportList,
955 ExportLists, ImportThresholds);
956 }
957
958 // Print stats about functions considered but rejected for importing
959 // when requested.
960 if (PrintImportFailures) {
961 dbgs() << "Missed imports into module " << ModName << "\n";
962 for (auto &I : ImportThresholds) {
963 auto &ProcessedThreshold = std::get<0>(I.second);
964 auto &CalleeSummary = std::get<1>(I.second);
965 auto &FailureInfo = std::get<2>(I.second);
966 if (CalleeSummary)
967 continue; // We are going to import.
968 assert(FailureInfo);
969 FunctionSummary *FS = nullptr;
970 if (!FailureInfo->VI.getSummaryList().empty())
971 FS = dyn_cast<FunctionSummary>(
972 FailureInfo->VI.getSummaryList()[0]->getBaseObject());
973 dbgs() << FailureInfo->VI
974 << ": Reason = " << getFailureName(FailureInfo->Reason)
975 << ", Threshold = " << ProcessedThreshold
976 << ", Size = " << (FS ? (int)FS->instCount() : -1)
977 << ", MaxHotness = " << getHotnessName(FailureInfo->MaxHotness)
978 << ", Attempts = " << FailureInfo->Attempts << "\n";
979 }
980 }
981 }
982
983 #ifndef NDEBUG
isGlobalVarSummary(const ModuleSummaryIndex & Index,ValueInfo VI)984 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index, ValueInfo VI) {
985 auto SL = VI.getSummaryList();
986 return SL.empty()
987 ? false
988 : SL[0]->getSummaryKind() == GlobalValueSummary::GlobalVarKind;
989 }
990
isGlobalVarSummary(const ModuleSummaryIndex & Index,GlobalValue::GUID G)991 static bool isGlobalVarSummary(const ModuleSummaryIndex &Index,
992 GlobalValue::GUID G) {
993 if (const auto &VI = Index.getValueInfo(G))
994 return isGlobalVarSummary(Index, VI);
995 return false;
996 }
997
998 // Return the number of global variable summaries in ExportSet.
999 static unsigned
numGlobalVarSummaries(const ModuleSummaryIndex & Index,FunctionImporter::ExportSetTy & ExportSet)1000 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1001 FunctionImporter::ExportSetTy &ExportSet) {
1002 unsigned NumGVS = 0;
1003 for (auto &VI : ExportSet)
1004 if (isGlobalVarSummary(Index, VI.getGUID()))
1005 ++NumGVS;
1006 return NumGVS;
1007 }
1008
1009 // Given ImportMap, return the number of global variable summaries and record
1010 // the number of defined function summaries as output parameter.
1011 static unsigned
numGlobalVarSummaries(const ModuleSummaryIndex & Index,FunctionImporter::FunctionsToImportTy & ImportMap,unsigned & DefinedFS)1012 numGlobalVarSummaries(const ModuleSummaryIndex &Index,
1013 FunctionImporter::FunctionsToImportTy &ImportMap,
1014 unsigned &DefinedFS) {
1015 unsigned NumGVS = 0;
1016 DefinedFS = 0;
1017 for (auto &[GUID, Type] : ImportMap) {
1018 if (isGlobalVarSummary(Index, GUID))
1019 ++NumGVS;
1020 else if (Type == GlobalValueSummary::Definition)
1021 ++DefinedFS;
1022 }
1023 return NumGVS;
1024 }
1025 #endif
1026
1027 #ifndef NDEBUG
checkVariableImport(const ModuleSummaryIndex & Index,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)1028 static bool checkVariableImport(
1029 const ModuleSummaryIndex &Index,
1030 DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1031 DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1032 DenseSet<GlobalValue::GUID> FlattenedImports;
1033
1034 for (auto &ImportPerModule : ImportLists)
1035 for (auto &ExportPerModule : ImportPerModule.second)
1036 for (auto &[GUID, Type] : ExportPerModule.second)
1037 FlattenedImports.insert(GUID);
1038
1039 // Checks that all GUIDs of read/writeonly vars we see in export lists
1040 // are also in the import lists. Otherwise we my face linker undefs,
1041 // because readonly and writeonly vars are internalized in their
1042 // source modules. The exception would be if it has a linkage type indicating
1043 // that there may have been a copy existing in the importing module (e.g.
1044 // linkonce_odr). In that case we cannot accurately do this checking.
1045 auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
1046 const ValueInfo &VI) {
1047 auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
1048 Index.findSummaryInModule(VI, ModulePath));
1049 return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
1050 !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
1051 GVS->linkage() == GlobalValue::WeakODRLinkage ||
1052 GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
1053 };
1054
1055 for (auto &ExportPerModule : ExportLists)
1056 for (auto &VI : ExportPerModule.second)
1057 if (!FlattenedImports.count(VI.getGUID()) &&
1058 IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
1059 return false;
1060
1061 return true;
1062 }
1063 #endif
1064
1065 /// Compute all the import and export for every module using the Index.
ComputeCrossModuleImport(const ModuleSummaryIndex & Index,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,DenseMap<StringRef,FunctionImporter::ImportMapTy> & ImportLists,DenseMap<StringRef,FunctionImporter::ExportSetTy> & ExportLists)1066 void llvm::ComputeCrossModuleImport(
1067 const ModuleSummaryIndex &Index,
1068 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1069 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1070 isPrevailing,
1071 DenseMap<StringRef, FunctionImporter::ImportMapTy> &ImportLists,
1072 DenseMap<StringRef, FunctionImporter::ExportSetTy> &ExportLists) {
1073 auto MIS = ModuleImportsManager::create(isPrevailing, Index, &ExportLists);
1074 // For each module that has function defined, compute the import/export lists.
1075 for (const auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
1076 auto &ImportList = ImportLists[DefinedGVSummaries.first];
1077 LLVM_DEBUG(dbgs() << "Computing import for Module '"
1078 << DefinedGVSummaries.first << "'\n");
1079 MIS->computeImportForModule(DefinedGVSummaries.second,
1080 DefinedGVSummaries.first, ImportList);
1081 }
1082
1083 // When computing imports we only added the variables and functions being
1084 // imported to the export list. We also need to mark any references and calls
1085 // they make as exported as well. We do this here, as it is more efficient
1086 // since we may import the same values multiple times into different modules
1087 // during the import computation.
1088 for (auto &ELI : ExportLists) {
1089 // `NewExports` tracks the VI that gets exported because the full definition
1090 // of its user/referencer gets exported.
1091 FunctionImporter::ExportSetTy NewExports;
1092 const auto &DefinedGVSummaries =
1093 ModuleToDefinedGVSummaries.lookup(ELI.first);
1094 for (auto &EI : ELI.second) {
1095 // Find the copy defined in the exporting module so that we can mark the
1096 // values it references in that specific definition as exported.
1097 // Below we will add all references and called values, without regard to
1098 // whether they are also defined in this module. We subsequently prune the
1099 // list to only include those defined in the exporting module, see comment
1100 // there as to why.
1101 auto DS = DefinedGVSummaries.find(EI.getGUID());
1102 // Anything marked exported during the import computation must have been
1103 // defined in the exporting module.
1104 assert(DS != DefinedGVSummaries.end());
1105 auto *S = DS->getSecond();
1106 S = S->getBaseObject();
1107 if (auto *GVS = dyn_cast<GlobalVarSummary>(S)) {
1108 // Export referenced functions and variables. We don't export/promote
1109 // objects referenced by writeonly variable initializer, because
1110 // we convert such variables initializers to "zeroinitializer".
1111 // See processGlobalForThinLTO.
1112 if (!Index.isWriteOnly(GVS))
1113 for (const auto &VI : GVS->refs())
1114 NewExports.insert(VI);
1115 } else {
1116 auto *FS = cast<FunctionSummary>(S);
1117 for (const auto &Edge : FS->calls())
1118 NewExports.insert(Edge.first);
1119 for (const auto &Ref : FS->refs())
1120 NewExports.insert(Ref);
1121 }
1122 }
1123 // Prune list computed above to only include values defined in the
1124 // exporting module. We do this after the above insertion since we may hit
1125 // the same ref/call target multiple times in above loop, and it is more
1126 // efficient to avoid a set lookup each time.
1127 for (auto EI = NewExports.begin(); EI != NewExports.end();) {
1128 if (!DefinedGVSummaries.count(EI->getGUID()))
1129 NewExports.erase(EI++);
1130 else
1131 ++EI;
1132 }
1133 ELI.second.insert(NewExports.begin(), NewExports.end());
1134 }
1135
1136 assert(checkVariableImport(Index, ImportLists, ExportLists));
1137 #ifndef NDEBUG
1138 LLVM_DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
1139 << " modules:\n");
1140 for (auto &ModuleImports : ImportLists) {
1141 auto ModName = ModuleImports.first;
1142 auto &Exports = ExportLists[ModName];
1143 unsigned NumGVS = numGlobalVarSummaries(Index, Exports);
1144 LLVM_DEBUG(dbgs() << "* Module " << ModName << " exports "
1145 << Exports.size() - NumGVS << " functions and " << NumGVS
1146 << " vars. Imports from " << ModuleImports.second.size()
1147 << " modules.\n");
1148 for (auto &Src : ModuleImports.second) {
1149 auto SrcModName = Src.first;
1150 unsigned DefinedFS = 0;
1151 unsigned NumGVSPerMod =
1152 numGlobalVarSummaries(Index, Src.second, DefinedFS);
1153 LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1154 << Src.second.size() - NumGVSPerMod - DefinedFS
1155 << " function declarations imported from " << SrcModName
1156 << "\n");
1157 LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod
1158 << " global vars imported from " << SrcModName << "\n");
1159 }
1160 }
1161 #endif
1162 }
1163
1164 #ifndef NDEBUG
dumpImportListForModule(const ModuleSummaryIndex & Index,StringRef ModulePath,FunctionImporter::ImportMapTy & ImportList)1165 static void dumpImportListForModule(const ModuleSummaryIndex &Index,
1166 StringRef ModulePath,
1167 FunctionImporter::ImportMapTy &ImportList) {
1168 LLVM_DEBUG(dbgs() << "* Module " << ModulePath << " imports from "
1169 << ImportList.size() << " modules.\n");
1170 for (auto &Src : ImportList) {
1171 auto SrcModName = Src.first;
1172 unsigned DefinedFS = 0;
1173 unsigned NumGVSPerMod = numGlobalVarSummaries(Index, Src.second, DefinedFS);
1174 LLVM_DEBUG(dbgs() << " - " << DefinedFS << " function definitions and "
1175 << Src.second.size() - DefinedFS - NumGVSPerMod
1176 << " function declarations imported from " << SrcModName
1177 << "\n");
1178 LLVM_DEBUG(dbgs() << " - " << NumGVSPerMod << " vars imported from "
1179 << SrcModName << "\n");
1180 }
1181 }
1182 #endif
1183
1184 /// Compute all the imports for the given module using the Index.
1185 ///
1186 /// \p isPrevailing is a callback that will be called with a global value's GUID
1187 /// and summary and should return whether the module corresponding to the
1188 /// summary contains the linker-prevailing copy of that value.
1189 ///
1190 /// \p ImportList will be populated with a map that can be passed to
1191 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleForTest(StringRef ModulePath,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1192 static void ComputeCrossModuleImportForModuleForTest(
1193 StringRef ModulePath,
1194 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1195 isPrevailing,
1196 const ModuleSummaryIndex &Index,
1197 FunctionImporter::ImportMapTy &ImportList) {
1198 // Collect the list of functions this module defines.
1199 // GUID -> Summary
1200 GVSummaryMapTy FunctionSummaryMap;
1201 Index.collectDefinedFunctionsForModule(ModulePath, FunctionSummaryMap);
1202
1203 // Compute the import list for this module.
1204 LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
1205 auto MIS = ModuleImportsManager::create(isPrevailing, Index);
1206 MIS->computeImportForModule(FunctionSummaryMap, ModulePath, ImportList);
1207
1208 #ifndef NDEBUG
1209 dumpImportListForModule(Index, ModulePath, ImportList);
1210 #endif
1211 }
1212
1213 /// Mark all external summaries in \p Index for import into the given module.
1214 /// Used for testing the case of distributed builds using a distributed index.
1215 ///
1216 /// \p ImportList will be populated with a map that can be passed to
1217 /// FunctionImporter::importFunctions() above (see description there).
ComputeCrossModuleImportForModuleFromIndexForTest(StringRef ModulePath,const ModuleSummaryIndex & Index,FunctionImporter::ImportMapTy & ImportList)1218 static void ComputeCrossModuleImportForModuleFromIndexForTest(
1219 StringRef ModulePath, const ModuleSummaryIndex &Index,
1220 FunctionImporter::ImportMapTy &ImportList) {
1221 for (const auto &GlobalList : Index) {
1222 // Ignore entries for undefined references.
1223 if (GlobalList.second.SummaryList.empty())
1224 continue;
1225
1226 auto GUID = GlobalList.first;
1227 assert(GlobalList.second.SummaryList.size() == 1 &&
1228 "Expected individual combined index to have one summary per GUID");
1229 auto &Summary = GlobalList.second.SummaryList[0];
1230 // Skip the summaries for the importing module. These are included to
1231 // e.g. record required linkage changes.
1232 if (Summary->modulePath() == ModulePath)
1233 continue;
1234 // Add an entry to provoke importing by thinBackend.
1235 auto [Iter, Inserted] = ImportList[Summary->modulePath()].try_emplace(
1236 GUID, Summary->importType());
1237 if (!Inserted) {
1238 // Use 'std::min' to make sure definition (with enum value 0) takes
1239 // precedence over declaration (with enum value 1).
1240 Iter->second = std::min(Iter->second, Summary->importType());
1241 }
1242 }
1243 #ifndef NDEBUG
1244 dumpImportListForModule(Index, ModulePath, ImportList);
1245 #endif
1246 }
1247
1248 // For SamplePGO, the indirect call targets for local functions will
1249 // have its original name annotated in profile. We try to find the
1250 // corresponding PGOFuncName as the GUID, and fix up the edges
1251 // accordingly.
updateValueInfoForIndirectCalls(ModuleSummaryIndex & Index,FunctionSummary * FS)1252 void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
1253 FunctionSummary *FS) {
1254 for (auto &EI : FS->mutableCalls()) {
1255 if (!EI.first.getSummaryList().empty())
1256 continue;
1257 auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
1258 if (GUID == 0)
1259 continue;
1260 // Update the edge to point directly to the correct GUID.
1261 auto VI = Index.getValueInfo(GUID);
1262 if (llvm::any_of(
1263 VI.getSummaryList(),
1264 [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
1265 // The mapping from OriginalId to GUID may return a GUID
1266 // that corresponds to a static variable. Filter it out here.
1267 // This can happen when
1268 // 1) There is a call to a library function which is not defined
1269 // in the index.
1270 // 2) There is a static variable with the OriginalGUID identical
1271 // to the GUID of the library function in 1);
1272 // When this happens the static variable in 2) will be found,
1273 // which needs to be filtered out.
1274 return SummaryPtr->getSummaryKind() ==
1275 GlobalValueSummary::GlobalVarKind;
1276 }))
1277 continue;
1278 EI.first = VI;
1279 }
1280 }
1281
updateIndirectCalls(ModuleSummaryIndex & Index)1282 void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
1283 for (const auto &Entry : Index) {
1284 for (const auto &S : Entry.second.SummaryList) {
1285 if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1286 updateValueInfoForIndirectCalls(Index, FS);
1287 }
1288 }
1289 }
1290
computeDeadSymbolsAndUpdateIndirectCalls(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing)1291 void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
1292 ModuleSummaryIndex &Index,
1293 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1294 function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
1295 assert(!Index.withGlobalValueDeadStripping());
1296 if (!ComputeDead ||
1297 // Don't do anything when nothing is live, this is friendly with tests.
1298 GUIDPreservedSymbols.empty()) {
1299 // Still need to update indirect calls.
1300 updateIndirectCalls(Index);
1301 return;
1302 }
1303 unsigned LiveSymbols = 0;
1304 SmallVector<ValueInfo, 128> Worklist;
1305 Worklist.reserve(GUIDPreservedSymbols.size() * 2);
1306 for (auto GUID : GUIDPreservedSymbols) {
1307 ValueInfo VI = Index.getValueInfo(GUID);
1308 if (!VI)
1309 continue;
1310 for (const auto &S : VI.getSummaryList())
1311 S->setLive(true);
1312 }
1313
1314 // Add values flagged in the index as live roots to the worklist.
1315 for (const auto &Entry : Index) {
1316 auto VI = Index.getValueInfo(Entry);
1317 for (const auto &S : Entry.second.SummaryList) {
1318 if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
1319 updateValueInfoForIndirectCalls(Index, FS);
1320 if (S->isLive()) {
1321 LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
1322 Worklist.push_back(VI);
1323 ++LiveSymbols;
1324 break;
1325 }
1326 }
1327 }
1328
1329 // Make value live and add it to the worklist if it was not live before.
1330 auto visit = [&](ValueInfo VI, bool IsAliasee) {
1331 // FIXME: If we knew which edges were created for indirect call profiles,
1332 // we could skip them here. Any that are live should be reached via
1333 // other edges, e.g. reference edges. Otherwise, using a profile collected
1334 // on a slightly different binary might provoke preserving, importing
1335 // and ultimately promoting calls to functions not linked into this
1336 // binary, which increases the binary size unnecessarily. Note that
1337 // if this code changes, the importer needs to change so that edges
1338 // to functions marked dead are skipped.
1339
1340 if (llvm::any_of(VI.getSummaryList(),
1341 [](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
1342 return S->isLive();
1343 }))
1344 return;
1345
1346 // We only keep live symbols that are known to be non-prevailing if any are
1347 // available_externally, linkonceodr, weakodr. Those symbols are discarded
1348 // later in the EliminateAvailableExternally pass and setting them to
1349 // not-live could break downstreams users of liveness information (PR36483)
1350 // or limit optimization opportunities.
1351 if (isPrevailing(VI.getGUID()) == PrevailingType::No) {
1352 bool KeepAliveLinkage = false;
1353 bool Interposable = false;
1354 for (const auto &S : VI.getSummaryList()) {
1355 if (S->linkage() == GlobalValue::AvailableExternallyLinkage ||
1356 S->linkage() == GlobalValue::WeakODRLinkage ||
1357 S->linkage() == GlobalValue::LinkOnceODRLinkage)
1358 KeepAliveLinkage = true;
1359 else if (GlobalValue::isInterposableLinkage(S->linkage()))
1360 Interposable = true;
1361 }
1362
1363 if (!IsAliasee) {
1364 if (!KeepAliveLinkage)
1365 return;
1366
1367 if (Interposable)
1368 report_fatal_error(
1369 "Interposable and available_externally/linkonce_odr/weak_odr "
1370 "symbol");
1371 }
1372 }
1373
1374 for (const auto &S : VI.getSummaryList())
1375 S->setLive(true);
1376 ++LiveSymbols;
1377 Worklist.push_back(VI);
1378 };
1379
1380 while (!Worklist.empty()) {
1381 auto VI = Worklist.pop_back_val();
1382 for (const auto &Summary : VI.getSummaryList()) {
1383 if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
1384 // If this is an alias, visit the aliasee VI to ensure that all copies
1385 // are marked live and it is added to the worklist for further
1386 // processing of its references.
1387 visit(AS->getAliaseeVI(), true);
1388 continue;
1389 }
1390 for (auto Ref : Summary->refs())
1391 visit(Ref, false);
1392 if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
1393 for (auto Call : FS->calls())
1394 visit(Call.first, false);
1395 }
1396 }
1397 Index.setWithGlobalValueDeadStripping();
1398
1399 unsigned DeadSymbols = Index.size() - LiveSymbols;
1400 LLVM_DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
1401 << " symbols Dead \n");
1402 NumDeadSymbols += DeadSymbols;
1403 NumLiveSymbols += LiveSymbols;
1404 }
1405
1406 // Compute dead symbols and propagate constants in combined index.
computeDeadSymbolsWithConstProp(ModuleSummaryIndex & Index,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,function_ref<PrevailingType (GlobalValue::GUID)> isPrevailing,bool ImportEnabled)1407 void llvm::computeDeadSymbolsWithConstProp(
1408 ModuleSummaryIndex &Index,
1409 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
1410 function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
1411 bool ImportEnabled) {
1412 computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
1413 isPrevailing);
1414 if (ImportEnabled)
1415 Index.propagateAttributes(GUIDPreservedSymbols);
1416 }
1417
1418 /// Compute the set of summaries needed for a ThinLTO backend compilation of
1419 /// \p ModulePath.
gatherImportedSummariesForModule(StringRef ModulePath,const DenseMap<StringRef,GVSummaryMapTy> & ModuleToDefinedGVSummaries,const FunctionImporter::ImportMapTy & ImportList,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex,GVSummaryPtrSet & DecSummaries)1420 void llvm::gatherImportedSummariesForModule(
1421 StringRef ModulePath,
1422 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
1423 const FunctionImporter::ImportMapTy &ImportList,
1424 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex,
1425 GVSummaryPtrSet &DecSummaries) {
1426 // Include all summaries from the importing module.
1427 ModuleToSummariesForIndex[std::string(ModulePath)] =
1428 ModuleToDefinedGVSummaries.lookup(ModulePath);
1429 // Include summaries for imports.
1430 for (const auto &ILI : ImportList) {
1431 auto &SummariesForIndex = ModuleToSummariesForIndex[std::string(ILI.first)];
1432
1433 const auto &DefinedGVSummaries =
1434 ModuleToDefinedGVSummaries.lookup(ILI.first);
1435 for (const auto &[GUID, Type] : ILI.second) {
1436 const auto &DS = DefinedGVSummaries.find(GUID);
1437 assert(DS != DefinedGVSummaries.end() &&
1438 "Expected a defined summary for imported global value");
1439 if (Type == GlobalValueSummary::Declaration)
1440 DecSummaries.insert(DS->second);
1441
1442 SummariesForIndex[GUID] = DS->second;
1443 }
1444 }
1445 }
1446
1447 /// Emit the files \p ModulePath will import from into \p OutputFilename.
EmitImportsFiles(StringRef ModulePath,StringRef OutputFilename,const std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)1448 std::error_code llvm::EmitImportsFiles(
1449 StringRef ModulePath, StringRef OutputFilename,
1450 const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
1451 std::error_code EC;
1452 raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::OF_Text);
1453 if (EC)
1454 return EC;
1455 for (const auto &ILI : ModuleToSummariesForIndex)
1456 // The ModuleToSummariesForIndex map includes an entry for the current
1457 // Module (needed for writing out the index files). We don't want to
1458 // include it in the imports file, however, so filter it out.
1459 if (ILI.first != ModulePath)
1460 ImportsOS << ILI.first << "\n";
1461 return std::error_code();
1462 }
1463
convertToDeclaration(GlobalValue & GV)1464 bool llvm::convertToDeclaration(GlobalValue &GV) {
1465 LLVM_DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName()
1466 << "\n");
1467 if (Function *F = dyn_cast<Function>(&GV)) {
1468 F->deleteBody();
1469 F->clearMetadata();
1470 F->setComdat(nullptr);
1471 } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
1472 V->setInitializer(nullptr);
1473 V->setLinkage(GlobalValue::ExternalLinkage);
1474 V->clearMetadata();
1475 V->setComdat(nullptr);
1476 } else {
1477 GlobalValue *NewGV;
1478 if (GV.getValueType()->isFunctionTy())
1479 NewGV =
1480 Function::Create(cast<FunctionType>(GV.getValueType()),
1481 GlobalValue::ExternalLinkage, GV.getAddressSpace(),
1482 "", GV.getParent());
1483 else
1484 NewGV =
1485 new GlobalVariable(*GV.getParent(), GV.getValueType(),
1486 /*isConstant*/ false, GlobalValue::ExternalLinkage,
1487 /*init*/ nullptr, "",
1488 /*insertbefore*/ nullptr, GV.getThreadLocalMode(),
1489 GV.getType()->getAddressSpace());
1490 NewGV->takeName(&GV);
1491 GV.replaceAllUsesWith(NewGV);
1492 return false;
1493 }
1494 if (!GV.isImplicitDSOLocal())
1495 GV.setDSOLocal(false);
1496 return true;
1497 }
1498
thinLTOFinalizeInModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals,bool PropagateAttrs)1499 void llvm::thinLTOFinalizeInModule(Module &TheModule,
1500 const GVSummaryMapTy &DefinedGlobals,
1501 bool PropagateAttrs) {
1502 DenseSet<Comdat *> NonPrevailingComdats;
1503 auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
1504 // See if the global summary analysis computed a new resolved linkage.
1505 const auto &GS = DefinedGlobals.find(GV.getGUID());
1506 if (GS == DefinedGlobals.end())
1507 return;
1508
1509 if (Propagate)
1510 if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
1511 if (Function *F = dyn_cast<Function>(&GV)) {
1512 // TODO: propagate ReadNone and ReadOnly.
1513 if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
1514 F->setDoesNotAccessMemory();
1515
1516 if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
1517 F->setOnlyReadsMemory();
1518
1519 if (FS->fflags().NoRecurse && !F->doesNotRecurse())
1520 F->setDoesNotRecurse();
1521
1522 if (FS->fflags().NoUnwind && !F->doesNotThrow())
1523 F->setDoesNotThrow();
1524 }
1525 }
1526
1527 auto NewLinkage = GS->second->linkage();
1528 if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
1529 // Don't internalize anything here, because the code below
1530 // lacks necessary correctness checks. Leave this job to
1531 // LLVM 'internalize' pass.
1532 GlobalValue::isLocalLinkage(NewLinkage) ||
1533 // In case it was dead and already converted to declaration.
1534 GV.isDeclaration())
1535 return;
1536
1537 // Set the potentially more constraining visibility computed from summaries.
1538 // The DefaultVisibility condition is because older GlobalValueSummary does
1539 // not record DefaultVisibility and we don't want to change protected/hidden
1540 // to default.
1541 if (GS->second->getVisibility() != GlobalValue::DefaultVisibility)
1542 GV.setVisibility(GS->second->getVisibility());
1543
1544 if (NewLinkage == GV.getLinkage())
1545 return;
1546
1547 // Check for a non-prevailing def that has interposable linkage
1548 // (e.g. non-odr weak or linkonce). In that case we can't simply
1549 // convert to available_externally, since it would lose the
1550 // interposable property and possibly get inlined. Simply drop
1551 // the definition in that case.
1552 if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
1553 GlobalValue::isInterposableLinkage(GV.getLinkage())) {
1554 if (!convertToDeclaration(GV))
1555 // FIXME: Change this to collect replaced GVs and later erase
1556 // them from the parent module once thinLTOResolvePrevailingGUID is
1557 // changed to enable this for aliases.
1558 llvm_unreachable("Expected GV to be converted");
1559 } else {
1560 // If all copies of the original symbol had global unnamed addr and
1561 // linkonce_odr linkage, or if all of them had local unnamed addr linkage
1562 // and are constants, then it should be an auto hide symbol. In that case
1563 // the thin link would have marked it as CanAutoHide. Add hidden
1564 // visibility to the symbol to preserve the property.
1565 if (NewLinkage == GlobalValue::WeakODRLinkage &&
1566 GS->second->canAutoHide()) {
1567 assert(GV.canBeOmittedFromSymbolTable());
1568 GV.setVisibility(GlobalValue::HiddenVisibility);
1569 }
1570
1571 LLVM_DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName()
1572 << "` from " << GV.getLinkage() << " to " << NewLinkage
1573 << "\n");
1574 GV.setLinkage(NewLinkage);
1575 }
1576 // Remove declarations from comdats, including available_externally
1577 // as this is a declaration for the linker, and will be dropped eventually.
1578 // It is illegal for comdats to contain declarations.
1579 auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
1580 if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
1581 if (GO->getComdat()->getName() == GO->getName())
1582 NonPrevailingComdats.insert(GO->getComdat());
1583 GO->setComdat(nullptr);
1584 }
1585 };
1586
1587 // Process functions and global now
1588 for (auto &GV : TheModule)
1589 FinalizeInModule(GV, PropagateAttrs);
1590 for (auto &GV : TheModule.globals())
1591 FinalizeInModule(GV);
1592 for (auto &GV : TheModule.aliases())
1593 FinalizeInModule(GV);
1594
1595 // For a non-prevailing comdat, all its members must be available_externally.
1596 // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle
1597 // local linkage GlobalValues.
1598 if (NonPrevailingComdats.empty())
1599 return;
1600 for (auto &GO : TheModule.global_objects()) {
1601 if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) {
1602 GO.setComdat(nullptr);
1603 GO.setLinkage(GlobalValue::AvailableExternallyLinkage);
1604 }
1605 }
1606 bool Changed;
1607 do {
1608 Changed = false;
1609 // If an alias references a GlobalValue in a non-prevailing comdat, change
1610 // it to available_externally. For simplicity we only handle GlobalValue and
1611 // ConstantExpr with a base object. ConstantExpr without a base object is
1612 // unlikely used in a COMDAT.
1613 for (auto &GA : TheModule.aliases()) {
1614 if (GA.hasAvailableExternallyLinkage())
1615 continue;
1616 GlobalObject *Obj = GA.getAliaseeObject();
1617 assert(Obj && "aliasee without an base object is unimplemented");
1618 if (Obj->hasAvailableExternallyLinkage()) {
1619 GA.setLinkage(GlobalValue::AvailableExternallyLinkage);
1620 Changed = true;
1621 }
1622 }
1623 } while (Changed);
1624 }
1625
1626 /// Run internalization on \p TheModule based on symmary analysis.
thinLTOInternalizeModule(Module & TheModule,const GVSummaryMapTy & DefinedGlobals)1627 void llvm::thinLTOInternalizeModule(Module &TheModule,
1628 const GVSummaryMapTy &DefinedGlobals) {
1629 // Declare a callback for the internalize pass that will ask for every
1630 // candidate GlobalValue if it can be internalized or not.
1631 auto MustPreserveGV = [&](const GlobalValue &GV) -> bool {
1632 // It may be the case that GV is on a chain of an ifunc, its alias and
1633 // subsequent aliases. In this case, the summary for the value is not
1634 // available.
1635 if (isa<GlobalIFunc>(&GV) ||
1636 (isa<GlobalAlias>(&GV) &&
1637 isa<GlobalIFunc>(cast<GlobalAlias>(&GV)->getAliaseeObject())))
1638 return true;
1639
1640 // Lookup the linkage recorded in the summaries during global analysis.
1641 auto GS = DefinedGlobals.find(GV.getGUID());
1642 if (GS == DefinedGlobals.end()) {
1643 // Must have been promoted (possibly conservatively). Find original
1644 // name so that we can access the correct summary and see if it can
1645 // be internalized again.
1646 // FIXME: Eventually we should control promotion instead of promoting
1647 // and internalizing again.
1648 StringRef OrigName =
1649 ModuleSummaryIndex::getOriginalNameBeforePromote(GV.getName());
1650 std::string OrigId = GlobalValue::getGlobalIdentifier(
1651 OrigName, GlobalValue::InternalLinkage,
1652 TheModule.getSourceFileName());
1653 GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId));
1654 if (GS == DefinedGlobals.end()) {
1655 // Also check the original non-promoted non-globalized name. In some
1656 // cases a preempted weak value is linked in as a local copy because
1657 // it is referenced by an alias (IRLinker::linkGlobalValueProto).
1658 // In that case, since it was originally not a local value, it was
1659 // recorded in the index using the original name.
1660 // FIXME: This may not be needed once PR27866 is fixed.
1661 GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName));
1662 assert(GS != DefinedGlobals.end());
1663 }
1664 }
1665 return !GlobalValue::isLocalLinkage(GS->second->linkage());
1666 };
1667
1668 // FIXME: See if we can just internalize directly here via linkage changes
1669 // based on the index, rather than invoking internalizeModule.
1670 internalizeModule(TheModule, MustPreserveGV);
1671 }
1672
1673 /// Make alias a clone of its aliasee.
replaceAliasWithAliasee(Module * SrcModule,GlobalAlias * GA)1674 static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
1675 Function *Fn = cast<Function>(GA->getAliaseeObject());
1676
1677 ValueToValueMapTy VMap;
1678 Function *NewFn = CloneFunction(Fn, VMap);
1679 // Clone should use the original alias's linkage, visibility and name, and we
1680 // ensure all uses of alias instead use the new clone (casted if necessary).
1681 NewFn->setLinkage(GA->getLinkage());
1682 NewFn->setVisibility(GA->getVisibility());
1683 GA->replaceAllUsesWith(NewFn);
1684 NewFn->takeName(GA);
1685 return NewFn;
1686 }
1687
1688 // Internalize values that we marked with specific attribute
1689 // in processGlobalForThinLTO.
internalizeGVsAfterImport(Module & M)1690 static void internalizeGVsAfterImport(Module &M) {
1691 for (auto &GV : M.globals())
1692 // Skip GVs which have been converted to declarations
1693 // by dropDeadSymbols.
1694 if (!GV.isDeclaration() && GV.hasAttribute("thinlto-internalize")) {
1695 GV.setLinkage(GlobalValue::InternalLinkage);
1696 GV.setVisibility(GlobalValue::DefaultVisibility);
1697 }
1698 }
1699
1700 // Automatically import functions in Module \p DestModule based on the summaries
1701 // index.
importFunctions(Module & DestModule,const FunctionImporter::ImportMapTy & ImportList)1702 Expected<bool> FunctionImporter::importFunctions(
1703 Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
1704 LLVM_DEBUG(dbgs() << "Starting import for Module "
1705 << DestModule.getModuleIdentifier() << "\n");
1706 unsigned ImportedCount = 0, ImportedGVCount = 0;
1707
1708 IRMover Mover(DestModule);
1709 // Do the actual import of functions now, one Module at a time
1710 std::set<StringRef> ModuleNameOrderedList;
1711 for (const auto &FunctionsToImportPerModule : ImportList) {
1712 ModuleNameOrderedList.insert(FunctionsToImportPerModule.first);
1713 }
1714
1715 auto getImportType = [&](const FunctionsToImportTy &GUIDToImportType,
1716 GlobalValue::GUID GUID)
1717 -> std::optional<GlobalValueSummary::ImportKind> {
1718 auto Iter = GUIDToImportType.find(GUID);
1719 if (Iter == GUIDToImportType.end())
1720 return std::nullopt;
1721 return Iter->second;
1722 };
1723
1724 for (const auto &Name : ModuleNameOrderedList) {
1725 // Get the module for the import
1726 const auto &FunctionsToImportPerModule = ImportList.find(Name);
1727 assert(FunctionsToImportPerModule != ImportList.end());
1728 Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
1729 if (!SrcModuleOrErr)
1730 return SrcModuleOrErr.takeError();
1731 std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
1732 assert(&DestModule.getContext() == &SrcModule->getContext() &&
1733 "Context mismatch");
1734
1735 // If modules were created with lazy metadata loading, materialize it
1736 // now, before linking it (otherwise this will be a noop).
1737 if (Error Err = SrcModule->materializeMetadata())
1738 return std::move(Err);
1739
1740 auto &ImportGUIDs = FunctionsToImportPerModule->second;
1741
1742 // Find the globals to import
1743 SetVector<GlobalValue *> GlobalsToImport;
1744 for (Function &F : *SrcModule) {
1745 if (!F.hasName())
1746 continue;
1747 auto GUID = F.getGUID();
1748 auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1749
1750 bool ImportDefinition =
1751 (MaybeImportType &&
1752 (*MaybeImportType == GlobalValueSummary::Definition));
1753
1754 LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1755 << " importing function"
1756 << (ImportDefinition
1757 ? " definition "
1758 : (MaybeImportType ? " declaration " : " "))
1759 << GUID << " " << F.getName() << " from "
1760 << SrcModule->getSourceFileName() << "\n");
1761 if (ImportDefinition) {
1762 if (Error Err = F.materialize())
1763 return std::move(Err);
1764 // MemProf should match function's definition and summary,
1765 // 'thinlto_src_module' is needed.
1766 if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1767 // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1768 // statistics and debugging.
1769 F.setMetadata(
1770 "thinlto_src_module",
1771 MDNode::get(DestModule.getContext(),
1772 {MDString::get(DestModule.getContext(),
1773 SrcModule->getModuleIdentifier())}));
1774 F.setMetadata(
1775 "thinlto_src_file",
1776 MDNode::get(DestModule.getContext(),
1777 {MDString::get(DestModule.getContext(),
1778 SrcModule->getSourceFileName())}));
1779 }
1780 GlobalsToImport.insert(&F);
1781 }
1782 }
1783 for (GlobalVariable &GV : SrcModule->globals()) {
1784 if (!GV.hasName())
1785 continue;
1786 auto GUID = GV.getGUID();
1787 auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1788
1789 bool ImportDefinition =
1790 (MaybeImportType &&
1791 (*MaybeImportType == GlobalValueSummary::Definition));
1792
1793 LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1794 << " importing global"
1795 << (ImportDefinition
1796 ? " definition "
1797 : (MaybeImportType ? " declaration " : " "))
1798 << GUID << " " << GV.getName() << " from "
1799 << SrcModule->getSourceFileName() << "\n");
1800 if (ImportDefinition) {
1801 if (Error Err = GV.materialize())
1802 return std::move(Err);
1803 ImportedGVCount += GlobalsToImport.insert(&GV);
1804 }
1805 }
1806 for (GlobalAlias &GA : SrcModule->aliases()) {
1807 if (!GA.hasName() || isa<GlobalIFunc>(GA.getAliaseeObject()))
1808 continue;
1809 auto GUID = GA.getGUID();
1810 auto MaybeImportType = getImportType(ImportGUIDs, GUID);
1811
1812 bool ImportDefinition =
1813 (MaybeImportType &&
1814 (*MaybeImportType == GlobalValueSummary::Definition));
1815
1816 LLVM_DEBUG(dbgs() << (MaybeImportType ? "Is" : "Not")
1817 << " importing alias"
1818 << (ImportDefinition
1819 ? " definition "
1820 : (MaybeImportType ? " declaration " : " "))
1821 << GUID << " " << GA.getName() << " from "
1822 << SrcModule->getSourceFileName() << "\n");
1823 if (ImportDefinition) {
1824 if (Error Err = GA.materialize())
1825 return std::move(Err);
1826 // Import alias as a copy of its aliasee.
1827 GlobalObject *GO = GA.getAliaseeObject();
1828 if (Error Err = GO->materialize())
1829 return std::move(Err);
1830 auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
1831 LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
1832 << GO->getName() << " from "
1833 << SrcModule->getSourceFileName() << "\n");
1834 if (EnableImportMetadata || EnableMemProfContextDisambiguation) {
1835 // Add 'thinlto_src_module' and 'thinlto_src_file' metadata for
1836 // statistics and debugging.
1837 Fn->setMetadata(
1838 "thinlto_src_module",
1839 MDNode::get(DestModule.getContext(),
1840 {MDString::get(DestModule.getContext(),
1841 SrcModule->getModuleIdentifier())}));
1842 Fn->setMetadata(
1843 "thinlto_src_file",
1844 MDNode::get(DestModule.getContext(),
1845 {MDString::get(DestModule.getContext(),
1846 SrcModule->getSourceFileName())}));
1847 }
1848 GlobalsToImport.insert(Fn);
1849 }
1850 }
1851
1852 // Upgrade debug info after we're done materializing all the globals and we
1853 // have loaded all the required metadata!
1854 UpgradeDebugInfo(*SrcModule);
1855
1856 // Set the partial sample profile ratio in the profile summary module flag
1857 // of the imported source module, if applicable, so that the profile summary
1858 // module flag will match with that of the destination module when it's
1859 // imported.
1860 SrcModule->setPartialSampleProfileRatio(Index);
1861
1862 // Link in the specified functions.
1863 if (renameModuleForThinLTO(*SrcModule, Index, ClearDSOLocalOnDeclarations,
1864 &GlobalsToImport))
1865 return true;
1866
1867 if (PrintImports) {
1868 for (const auto *GV : GlobalsToImport)
1869 dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
1870 << " from " << SrcModule->getSourceFileName() << "\n";
1871 }
1872
1873 if (Error Err = Mover.move(std::move(SrcModule),
1874 GlobalsToImport.getArrayRef(), nullptr,
1875 /*IsPerformingImport=*/true))
1876 return createStringError(errc::invalid_argument,
1877 Twine("Function Import: link error: ") +
1878 toString(std::move(Err)));
1879
1880 ImportedCount += GlobalsToImport.size();
1881 NumImportedModules++;
1882 }
1883
1884 internalizeGVsAfterImport(DestModule);
1885
1886 NumImportedFunctions += (ImportedCount - ImportedGVCount);
1887 NumImportedGlobalVars += ImportedGVCount;
1888
1889 // TODO: Print counters for definitions and declarations in the debugging log.
1890 LLVM_DEBUG(dbgs() << "Imported " << ImportedCount - ImportedGVCount
1891 << " functions for Module "
1892 << DestModule.getModuleIdentifier() << "\n");
1893 LLVM_DEBUG(dbgs() << "Imported " << ImportedGVCount
1894 << " global variables for Module "
1895 << DestModule.getModuleIdentifier() << "\n");
1896 return ImportedCount;
1897 }
1898
doImportingForModuleForTest(Module & M,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> isPrevailing)1899 static bool doImportingForModuleForTest(
1900 Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1901 isPrevailing) {
1902 if (SummaryFile.empty())
1903 report_fatal_error("error: -function-import requires -summary-file\n");
1904 Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
1905 getModuleSummaryIndexForFile(SummaryFile);
1906 if (!IndexPtrOrErr) {
1907 logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
1908 "Error loading file '" + SummaryFile + "': ");
1909 return false;
1910 }
1911 std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
1912
1913 // First step is collecting the import list.
1914 FunctionImporter::ImportMapTy ImportList;
1915 // If requested, simply import all functions in the index. This is used
1916 // when testing distributed backend handling via the opt tool, when
1917 // we have distributed indexes containing exactly the summaries to import.
1918 if (ImportAllIndex)
1919 ComputeCrossModuleImportForModuleFromIndexForTest(M.getModuleIdentifier(),
1920 *Index, ImportList);
1921 else
1922 ComputeCrossModuleImportForModuleForTest(M.getModuleIdentifier(),
1923 isPrevailing, *Index, ImportList);
1924
1925 // Conservatively mark all internal values as promoted. This interface is
1926 // only used when doing importing via the function importing pass. The pass
1927 // is only enabled when testing importing via the 'opt' tool, which does
1928 // not do the ThinLink that would normally determine what values to promote.
1929 for (auto &I : *Index) {
1930 for (auto &S : I.second.SummaryList) {
1931 if (GlobalValue::isLocalLinkage(S->linkage()))
1932 S->setLinkage(GlobalValue::ExternalLinkage);
1933 }
1934 }
1935
1936 // Next we need to promote to global scope and rename any local values that
1937 // are potentially exported to other modules.
1938 if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
1939 /*GlobalsToImport=*/nullptr)) {
1940 errs() << "Error renaming module\n";
1941 return true;
1942 }
1943
1944 // Perform the import now.
1945 auto ModuleLoader = [&M](StringRef Identifier) {
1946 return loadFile(std::string(Identifier), M.getContext());
1947 };
1948 FunctionImporter Importer(*Index, ModuleLoader,
1949 /*ClearDSOLocalOnDeclarations=*/false);
1950 Expected<bool> Result = Importer.importFunctions(M, ImportList);
1951
1952 // FIXME: Probably need to propagate Errors through the pass manager.
1953 if (!Result) {
1954 logAllUnhandledErrors(Result.takeError(), errs(),
1955 "Error importing module: ");
1956 return true;
1957 }
1958
1959 return true;
1960 }
1961
run(Module & M,ModuleAnalysisManager & AM)1962 PreservedAnalyses FunctionImportPass::run(Module &M,
1963 ModuleAnalysisManager &AM) {
1964 // This is only used for testing the function import pass via opt, where we
1965 // don't have prevailing information from the LTO context available, so just
1966 // conservatively assume everything is prevailing (which is fine for the very
1967 // limited use of prevailing checking in this pass).
1968 auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
1969 return true;
1970 };
1971 if (!doImportingForModuleForTest(M, isPrevailing))
1972 return PreservedAnalyses::all();
1973
1974 return PreservedAnalyses::none();
1975 }
1976