1 //===-LTO.cpp - LLVM Link Time Optimizer ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions and classes used to support LTO. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/LTO/LTO.h" 14 #include "llvm/ADT/ScopeExit.h" 15 #include "llvm/ADT/SmallSet.h" 16 #include "llvm/ADT/StableHashing.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 20 #include "llvm/Analysis/StackSafetyAnalysis.h" 21 #include "llvm/Analysis/TargetLibraryInfo.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/Bitcode/BitcodeWriter.h" 25 #include "llvm/CGData/CodeGenData.h" 26 #include "llvm/CodeGen/Analysis.h" 27 #include "llvm/Config/llvm-config.h" 28 #include "llvm/IR/AutoUpgrade.h" 29 #include "llvm/IR/DiagnosticPrinter.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/LLVMRemarkStreamer.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/IR/Mangler.h" 34 #include "llvm/IR/Metadata.h" 35 #include "llvm/IR/RuntimeLibcalls.h" 36 #include "llvm/LTO/LTOBackend.h" 37 #include "llvm/Linker/IRMover.h" 38 #include "llvm/MC/TargetRegistry.h" 39 #include "llvm/Object/IRObjectFile.h" 40 #include "llvm/Support/Caching.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Compiler.h" 43 #include "llvm/Support/Error.h" 44 #include "llvm/Support/FileSystem.h" 45 #include "llvm/Support/JSON.h" 46 #include "llvm/Support/MemoryBuffer.h" 47 #include "llvm/Support/Path.h" 48 #include "llvm/Support/Process.h" 49 #include "llvm/Support/SHA1.h" 50 #include "llvm/Support/SourceMgr.h" 51 #include "llvm/Support/ThreadPool.h" 52 #include "llvm/Support/Threading.h" 53 #include "llvm/Support/TimeProfiler.h" 54 #include "llvm/Support/ToolOutputFile.h" 55 #include "llvm/Support/VCSRevision.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include "llvm/Target/TargetOptions.h" 58 #include "llvm/Transforms/IPO.h" 59 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" 60 #include "llvm/Transforms/IPO/WholeProgramDevirt.h" 61 #include "llvm/Transforms/Utils/FunctionImportUtils.h" 62 #include "llvm/Transforms/Utils/SplitModule.h" 63 64 #include <optional> 65 #include <set> 66 67 using namespace llvm; 68 using namespace lto; 69 using namespace object; 70 71 #define DEBUG_TYPE "lto" 72 73 static cl::opt<bool> 74 DumpThinCGSCCs("dump-thin-cg-sccs", cl::init(false), cl::Hidden, 75 cl::desc("Dump the SCCs in the ThinLTO index's callgraph")); 76 77 extern cl::opt<bool> CodeGenDataThinLTOTwoRounds; 78 79 extern cl::opt<bool> ForceImportAll; 80 81 namespace llvm { 82 /// Enable global value internalization in LTO. 83 cl::opt<bool> EnableLTOInternalization( 84 "enable-lto-internalization", cl::init(true), cl::Hidden, 85 cl::desc("Enable global value internalization in LTO")); 86 87 static cl::opt<bool> 88 LTOKeepSymbolCopies("lto-keep-symbol-copies", cl::init(false), cl::Hidden, 89 cl::desc("Keep copies of symbols in LTO indexing")); 90 91 /// Indicate we are linking with an allocator that supports hot/cold operator 92 /// new interfaces. 93 extern cl::opt<bool> SupportsHotColdNew; 94 95 /// Enable MemProf context disambiguation for thin link. 96 extern cl::opt<bool> EnableMemProfContextDisambiguation; 97 } // namespace llvm 98 99 // Computes a unique hash for the Module considering the current list of 100 // export/import and other global analysis results. 101 // Returns the hash in its hexadecimal representation. 102 std::string llvm::computeLTOCacheKey( 103 const Config &Conf, const ModuleSummaryIndex &Index, StringRef ModuleID, 104 const FunctionImporter::ImportMapTy &ImportList, 105 const FunctionImporter::ExportSetTy &ExportList, 106 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 107 const GVSummaryMapTy &DefinedGlobals, 108 const DenseSet<GlobalValue::GUID> &CfiFunctionDefs, 109 const DenseSet<GlobalValue::GUID> &CfiFunctionDecls) { 110 // Compute the unique hash for this entry. 111 // This is based on the current compiler version, the module itself, the 112 // export list, the hash for every single module in the import list, the 113 // list of ResolvedODR for the module, and the list of preserved symbols. 114 SHA1 Hasher; 115 116 // Start with the compiler revision 117 Hasher.update(LLVM_VERSION_STRING); 118 #ifdef LLVM_REVISION 119 Hasher.update(LLVM_REVISION); 120 #endif 121 122 // Include the parts of the LTO configuration that affect code generation. 123 auto AddString = [&](StringRef Str) { 124 Hasher.update(Str); 125 Hasher.update(ArrayRef<uint8_t>{0}); 126 }; 127 auto AddUnsigned = [&](unsigned I) { 128 uint8_t Data[4]; 129 support::endian::write32le(Data, I); 130 Hasher.update(Data); 131 }; 132 auto AddUint64 = [&](uint64_t I) { 133 uint8_t Data[8]; 134 support::endian::write64le(Data, I); 135 Hasher.update(Data); 136 }; 137 auto AddUint8 = [&](const uint8_t I) { 138 Hasher.update(ArrayRef<uint8_t>(&I, 1)); 139 }; 140 AddString(Conf.CPU); 141 // FIXME: Hash more of Options. For now all clients initialize Options from 142 // command-line flags (which is unsupported in production), but may set 143 // X86RelaxRelocations. The clang driver can also pass FunctionSections, 144 // DataSections and DebuggerTuning via command line flags. 145 AddUnsigned(Conf.Options.MCOptions.X86RelaxRelocations); 146 AddUnsigned(Conf.Options.FunctionSections); 147 AddUnsigned(Conf.Options.DataSections); 148 AddUnsigned((unsigned)Conf.Options.DebuggerTuning); 149 for (auto &A : Conf.MAttrs) 150 AddString(A); 151 if (Conf.RelocModel) 152 AddUnsigned(*Conf.RelocModel); 153 else 154 AddUnsigned(-1); 155 if (Conf.CodeModel) 156 AddUnsigned(*Conf.CodeModel); 157 else 158 AddUnsigned(-1); 159 for (const auto &S : Conf.MllvmArgs) 160 AddString(S); 161 AddUnsigned(static_cast<int>(Conf.CGOptLevel)); 162 AddUnsigned(static_cast<int>(Conf.CGFileType)); 163 AddUnsigned(Conf.OptLevel); 164 AddUnsigned(Conf.Freestanding); 165 AddString(Conf.OptPipeline); 166 AddString(Conf.AAPipeline); 167 AddString(Conf.OverrideTriple); 168 AddString(Conf.DefaultTriple); 169 AddString(Conf.DwoDir); 170 171 // Include the hash for the current module 172 auto ModHash = Index.getModuleHash(ModuleID); 173 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); 174 175 // TODO: `ExportList` is determined by `ImportList`. Since `ImportList` is 176 // used to compute cache key, we could omit hashing `ExportList` here. 177 std::vector<uint64_t> ExportsGUID; 178 ExportsGUID.reserve(ExportList.size()); 179 for (const auto &VI : ExportList) 180 ExportsGUID.push_back(VI.getGUID()); 181 182 // Sort the export list elements GUIDs. 183 llvm::sort(ExportsGUID); 184 for (auto GUID : ExportsGUID) 185 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID))); 186 187 // Order using module hash, to be both independent of module name and 188 // module order. 189 auto Comp = [&](const std::pair<StringRef, GlobalValue::GUID> &L, 190 const std::pair<StringRef, GlobalValue::GUID> &R) { 191 return std::make_pair(Index.getModule(L.first)->second, L.second) < 192 std::make_pair(Index.getModule(R.first)->second, R.second); 193 }; 194 FunctionImporter::SortedImportList SortedImportList(ImportList, Comp); 195 196 // Count the number of imports for each source module. 197 DenseMap<StringRef, unsigned> ModuleToNumImports; 198 for (const auto &[FromModule, GUID, Type] : SortedImportList) 199 ++ModuleToNumImports[FromModule]; 200 201 std::optional<StringRef> LastModule; 202 for (const auto &[FromModule, GUID, Type] : SortedImportList) { 203 if (LastModule != FromModule) { 204 // Include the hash for every module we import functions from. The set of 205 // imported symbols for each module may affect code generation and is 206 // sensitive to link order, so include that as well. 207 LastModule = FromModule; 208 auto ModHash = Index.getModule(FromModule)->second; 209 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash))); 210 AddUint64(ModuleToNumImports[FromModule]); 211 } 212 AddUint64(GUID); 213 AddUint8(Type); 214 } 215 216 // Include the hash for the resolved ODR. 217 for (auto &Entry : ResolvedODR) { 218 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first, 219 sizeof(GlobalValue::GUID))); 220 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second, 221 sizeof(GlobalValue::LinkageTypes))); 222 } 223 224 // Members of CfiFunctionDefs and CfiFunctionDecls that are referenced or 225 // defined in this module. 226 std::set<GlobalValue::GUID> UsedCfiDefs; 227 std::set<GlobalValue::GUID> UsedCfiDecls; 228 229 // Typeids used in this module. 230 std::set<GlobalValue::GUID> UsedTypeIds; 231 232 auto AddUsedCfiGlobal = [&](GlobalValue::GUID ValueGUID) { 233 if (CfiFunctionDefs.contains(ValueGUID)) 234 UsedCfiDefs.insert(ValueGUID); 235 if (CfiFunctionDecls.contains(ValueGUID)) 236 UsedCfiDecls.insert(ValueGUID); 237 }; 238 239 auto AddUsedThings = [&](GlobalValueSummary *GS) { 240 if (!GS) return; 241 AddUnsigned(GS->getVisibility()); 242 AddUnsigned(GS->isLive()); 243 AddUnsigned(GS->canAutoHide()); 244 for (const ValueInfo &VI : GS->refs()) { 245 AddUnsigned(VI.isDSOLocal(Index.withDSOLocalPropagation())); 246 AddUsedCfiGlobal(VI.getGUID()); 247 } 248 if (auto *GVS = dyn_cast<GlobalVarSummary>(GS)) { 249 AddUnsigned(GVS->maybeReadOnly()); 250 AddUnsigned(GVS->maybeWriteOnly()); 251 } 252 if (auto *FS = dyn_cast<FunctionSummary>(GS)) { 253 for (auto &TT : FS->type_tests()) 254 UsedTypeIds.insert(TT); 255 for (auto &TT : FS->type_test_assume_vcalls()) 256 UsedTypeIds.insert(TT.GUID); 257 for (auto &TT : FS->type_checked_load_vcalls()) 258 UsedTypeIds.insert(TT.GUID); 259 for (auto &TT : FS->type_test_assume_const_vcalls()) 260 UsedTypeIds.insert(TT.VFunc.GUID); 261 for (auto &TT : FS->type_checked_load_const_vcalls()) 262 UsedTypeIds.insert(TT.VFunc.GUID); 263 for (auto &ET : FS->calls()) { 264 AddUnsigned(ET.first.isDSOLocal(Index.withDSOLocalPropagation())); 265 AddUsedCfiGlobal(ET.first.getGUID()); 266 } 267 } 268 }; 269 270 // Include the hash for the linkage type to reflect internalization and weak 271 // resolution, and collect any used type identifier resolutions. 272 for (auto &GS : DefinedGlobals) { 273 GlobalValue::LinkageTypes Linkage = GS.second->linkage(); 274 Hasher.update( 275 ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage))); 276 AddUsedCfiGlobal(GS.first); 277 AddUsedThings(GS.second); 278 } 279 280 // Imported functions may introduce new uses of type identifier resolutions, 281 // so we need to collect their used resolutions as well. 282 for (const auto &[FromModule, GUID, Type] : SortedImportList) { 283 GlobalValueSummary *S = Index.findSummaryInModule(GUID, FromModule); 284 AddUsedThings(S); 285 // If this is an alias, we also care about any types/etc. that the aliasee 286 // may reference. 287 if (auto *AS = dyn_cast_or_null<AliasSummary>(S)) 288 AddUsedThings(AS->getBaseObject()); 289 } 290 291 auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) { 292 AddString(TId); 293 294 AddUnsigned(S.TTRes.TheKind); 295 AddUnsigned(S.TTRes.SizeM1BitWidth); 296 297 AddUint64(S.TTRes.AlignLog2); 298 AddUint64(S.TTRes.SizeM1); 299 AddUint64(S.TTRes.BitMask); 300 AddUint64(S.TTRes.InlineBits); 301 302 AddUint64(S.WPDRes.size()); 303 for (auto &WPD : S.WPDRes) { 304 AddUnsigned(WPD.first); 305 AddUnsigned(WPD.second.TheKind); 306 AddString(WPD.second.SingleImplName); 307 308 AddUint64(WPD.second.ResByArg.size()); 309 for (auto &ByArg : WPD.second.ResByArg) { 310 AddUint64(ByArg.first.size()); 311 for (uint64_t Arg : ByArg.first) 312 AddUint64(Arg); 313 AddUnsigned(ByArg.second.TheKind); 314 AddUint64(ByArg.second.Info); 315 AddUnsigned(ByArg.second.Byte); 316 AddUnsigned(ByArg.second.Bit); 317 } 318 } 319 }; 320 321 // Include the hash for all type identifiers used by this module. 322 for (GlobalValue::GUID TId : UsedTypeIds) { 323 auto TidIter = Index.typeIds().equal_range(TId); 324 for (const auto &I : make_range(TidIter)) 325 AddTypeIdSummary(I.second.first, I.second.second); 326 } 327 328 AddUnsigned(UsedCfiDefs.size()); 329 for (auto &V : UsedCfiDefs) 330 AddUint64(V); 331 332 AddUnsigned(UsedCfiDecls.size()); 333 for (auto &V : UsedCfiDecls) 334 AddUint64(V); 335 336 if (!Conf.SampleProfile.empty()) { 337 auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile); 338 if (FileOrErr) { 339 Hasher.update(FileOrErr.get()->getBuffer()); 340 341 if (!Conf.ProfileRemapping.empty()) { 342 FileOrErr = MemoryBuffer::getFile(Conf.ProfileRemapping); 343 if (FileOrErr) 344 Hasher.update(FileOrErr.get()->getBuffer()); 345 } 346 } 347 } 348 349 return toHex(Hasher.result()); 350 } 351 352 std::string llvm::recomputeLTOCacheKey(const std::string &Key, 353 StringRef ExtraID) { 354 SHA1 Hasher; 355 356 auto AddString = [&](StringRef Str) { 357 Hasher.update(Str); 358 Hasher.update(ArrayRef<uint8_t>{0}); 359 }; 360 AddString(Key); 361 AddString(ExtraID); 362 363 return toHex(Hasher.result()); 364 } 365 366 static void thinLTOResolvePrevailingGUID( 367 const Config &C, ValueInfo VI, 368 DenseSet<GlobalValueSummary *> &GlobalInvolvedWithAlias, 369 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 370 isPrevailing, 371 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)> 372 recordNewLinkage, 373 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { 374 GlobalValue::VisibilityTypes Visibility = 375 C.VisibilityScheme == Config::ELF ? VI.getELFVisibility() 376 : GlobalValue::DefaultVisibility; 377 for (auto &S : VI.getSummaryList()) { 378 GlobalValue::LinkageTypes OriginalLinkage = S->linkage(); 379 // Ignore local and appending linkage values since the linker 380 // doesn't resolve them. 381 if (GlobalValue::isLocalLinkage(OriginalLinkage) || 382 GlobalValue::isAppendingLinkage(S->linkage())) 383 continue; 384 // We need to emit only one of these. The prevailing module will keep it, 385 // but turned into a weak, while the others will drop it when possible. 386 // This is both a compile-time optimization and a correctness 387 // transformation. This is necessary for correctness when we have exported 388 // a reference - we need to convert the linkonce to weak to 389 // ensure a copy is kept to satisfy the exported reference. 390 // FIXME: We may want to split the compile time and correctness 391 // aspects into separate routines. 392 if (isPrevailing(VI.getGUID(), S.get())) { 393 if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) { 394 S->setLinkage(GlobalValue::getWeakLinkage( 395 GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); 396 // The kept copy is eligible for auto-hiding (hidden visibility) if all 397 // copies were (i.e. they were all linkonce_odr global unnamed addr). 398 // If any copy is not (e.g. it was originally weak_odr), then the symbol 399 // must remain externally available (e.g. a weak_odr from an explicitly 400 // instantiated template). Additionally, if it is in the 401 // GUIDPreservedSymbols set, that means that it is visibile outside 402 // the summary (e.g. in a native object or a bitcode file without 403 // summary), and in that case we cannot hide it as it isn't possible to 404 // check all copies. 405 S->setCanAutoHide(VI.canAutoHide() && 406 !GUIDPreservedSymbols.count(VI.getGUID())); 407 } 408 if (C.VisibilityScheme == Config::FromPrevailing) 409 Visibility = S->getVisibility(); 410 } 411 // Alias and aliasee can't be turned into available_externally. 412 // When force-import-all is used, it indicates that object linking is not 413 // supported by the target. In this case, we can't change the linkage as 414 // well in case the global is converted to declaration. 415 else if (!isa<AliasSummary>(S.get()) && 416 !GlobalInvolvedWithAlias.count(S.get()) && !ForceImportAll) 417 S->setLinkage(GlobalValue::AvailableExternallyLinkage); 418 419 // For ELF, set visibility to the computed visibility from summaries. We 420 // don't track visibility from declarations so this may be more relaxed than 421 // the most constraining one. 422 if (C.VisibilityScheme == Config::ELF) 423 S->setVisibility(Visibility); 424 425 if (S->linkage() != OriginalLinkage) 426 recordNewLinkage(S->modulePath(), VI.getGUID(), S->linkage()); 427 } 428 429 if (C.VisibilityScheme == Config::FromPrevailing) { 430 for (auto &S : VI.getSummaryList()) { 431 GlobalValue::LinkageTypes OriginalLinkage = S->linkage(); 432 if (GlobalValue::isLocalLinkage(OriginalLinkage) || 433 GlobalValue::isAppendingLinkage(S->linkage())) 434 continue; 435 S->setVisibility(Visibility); 436 } 437 } 438 } 439 440 /// Resolve linkage for prevailing symbols in the \p Index. 441 // 442 // We'd like to drop these functions if they are no longer referenced in the 443 // current module. However there is a chance that another module is still 444 // referencing them because of the import. We make sure we always emit at least 445 // one copy. 446 void llvm::thinLTOResolvePrevailingInIndex( 447 const Config &C, ModuleSummaryIndex &Index, 448 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 449 isPrevailing, 450 function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)> 451 recordNewLinkage, 452 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { 453 // We won't optimize the globals that are referenced by an alias for now 454 // Ideally we should turn the alias into a global and duplicate the definition 455 // when needed. 456 DenseSet<GlobalValueSummary *> GlobalInvolvedWithAlias; 457 for (auto &I : Index) 458 for (auto &S : I.second.SummaryList) 459 if (auto AS = dyn_cast<AliasSummary>(S.get())) 460 GlobalInvolvedWithAlias.insert(&AS->getAliasee()); 461 462 for (auto &I : Index) 463 thinLTOResolvePrevailingGUID(C, Index.getValueInfo(I), 464 GlobalInvolvedWithAlias, isPrevailing, 465 recordNewLinkage, GUIDPreservedSymbols); 466 } 467 468 static void thinLTOInternalizeAndPromoteGUID( 469 ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported, 470 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 471 isPrevailing) { 472 auto ExternallyVisibleCopies = 473 llvm::count_if(VI.getSummaryList(), 474 [](const std::unique_ptr<GlobalValueSummary> &Summary) { 475 return !GlobalValue::isLocalLinkage(Summary->linkage()); 476 }); 477 478 for (auto &S : VI.getSummaryList()) { 479 // First see if we need to promote an internal value because it is not 480 // exported. 481 if (isExported(S->modulePath(), VI)) { 482 if (GlobalValue::isLocalLinkage(S->linkage())) 483 S->setLinkage(GlobalValue::ExternalLinkage); 484 continue; 485 } 486 487 // Otherwise, see if we can internalize. 488 if (!EnableLTOInternalization) 489 continue; 490 491 // Non-exported values with external linkage can be internalized. 492 if (GlobalValue::isExternalLinkage(S->linkage())) { 493 S->setLinkage(GlobalValue::InternalLinkage); 494 continue; 495 } 496 497 // Non-exported function and variable definitions with a weak-for-linker 498 // linkage can be internalized in certain cases. The minimum legality 499 // requirements would be that they are not address taken to ensure that we 500 // don't break pointer equality checks, and that variables are either read- 501 // or write-only. For functions, this is the case if either all copies are 502 // [local_]unnamed_addr, or we can propagate reference edge attributes 503 // (which is how this is guaranteed for variables, when analyzing whether 504 // they are read or write-only). 505 // 506 // However, we only get to this code for weak-for-linkage values in one of 507 // two cases: 508 // 1) The prevailing copy is not in IR (it is in native code). 509 // 2) The prevailing copy in IR is not exported from its module. 510 // Additionally, at least for the new LTO API, case 2 will only happen if 511 // there is exactly one definition of the value (i.e. in exactly one 512 // module), as duplicate defs are result in the value being marked exported. 513 // Likely, users of the legacy LTO API are similar, however, currently there 514 // are llvm-lto based tests of the legacy LTO API that do not mark 515 // duplicate linkonce_odr copies as exported via the tool, so we need 516 // to handle that case below by checking the number of copies. 517 // 518 // Generally, we only want to internalize a weak-for-linker value in case 519 // 2, because in case 1 we cannot see how the value is used to know if it 520 // is read or write-only. We also don't want to bloat the binary with 521 // multiple internalized copies of non-prevailing linkonce/weak functions. 522 // Note if we don't internalize, we will convert non-prevailing copies to 523 // available_externally anyway, so that we drop them after inlining. The 524 // only reason to internalize such a function is if we indeed have a single 525 // copy, because internalizing it won't increase binary size, and enables 526 // use of inliner heuristics that are more aggressive in the face of a 527 // single call to a static (local). For variables, internalizing a read or 528 // write only variable can enable more aggressive optimization. However, we 529 // already perform this elsewhere in the ThinLTO backend handling for 530 // read or write-only variables (processGlobalForThinLTO). 531 // 532 // Therefore, only internalize linkonce/weak if there is a single copy, that 533 // is prevailing in this IR module. We can do so aggressively, without 534 // requiring the address to be insignificant, or that a variable be read or 535 // write-only. 536 if (!GlobalValue::isWeakForLinker(S->linkage()) || 537 GlobalValue::isExternalWeakLinkage(S->linkage())) 538 continue; 539 540 if (isPrevailing(VI.getGUID(), S.get()) && ExternallyVisibleCopies == 1) 541 S->setLinkage(GlobalValue::InternalLinkage); 542 } 543 } 544 545 // Update the linkages in the given \p Index to mark exported values 546 // as external and non-exported values as internal. 547 void llvm::thinLTOInternalizeAndPromoteInIndex( 548 ModuleSummaryIndex &Index, 549 function_ref<bool(StringRef, ValueInfo)> isExported, 550 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 551 isPrevailing) { 552 for (auto &I : Index) 553 thinLTOInternalizeAndPromoteGUID(Index.getValueInfo(I), isExported, 554 isPrevailing); 555 } 556 557 // Requires a destructor for std::vector<InputModule>. 558 InputFile::~InputFile() = default; 559 560 Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) { 561 std::unique_ptr<InputFile> File(new InputFile); 562 563 Expected<IRSymtabFile> FOrErr = readIRSymtab(Object); 564 if (!FOrErr) 565 return FOrErr.takeError(); 566 567 File->TargetTriple = FOrErr->TheReader.getTargetTriple(); 568 File->SourceFileName = FOrErr->TheReader.getSourceFileName(); 569 File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); 570 File->DependentLibraries = FOrErr->TheReader.getDependentLibraries(); 571 File->ComdatTable = FOrErr->TheReader.getComdatTable(); 572 573 for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { 574 size_t Begin = File->Symbols.size(); 575 for (const irsymtab::Reader::SymbolRef &Sym : 576 FOrErr->TheReader.module_symbols(I)) 577 // Skip symbols that are irrelevant to LTO. Note that this condition needs 578 // to match the one in Skip() in LTO::addRegularLTO(). 579 if (Sym.isGlobal() && !Sym.isFormatSpecific()) 580 File->Symbols.push_back(Sym); 581 File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); 582 } 583 584 File->Mods = FOrErr->Mods; 585 File->Strtab = std::move(FOrErr->Strtab); 586 return std::move(File); 587 } 588 589 StringRef InputFile::getName() const { 590 return Mods[0].getModuleIdentifier(); 591 } 592 593 BitcodeModule &InputFile::getSingleBitcodeModule() { 594 assert(Mods.size() == 1 && "Expect only one bitcode module"); 595 return Mods[0]; 596 } 597 598 LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel, 599 const Config &Conf) 600 : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel), 601 Ctx(Conf), CombinedModule(std::make_unique<Module>("ld-temp.o", Ctx)), 602 Mover(std::make_unique<IRMover>(*CombinedModule)) {} 603 604 LTO::ThinLTOState::ThinLTOState(ThinBackend BackendParam) 605 : Backend(std::move(BackendParam)), CombinedIndex(/*HaveGVs*/ false) { 606 if (!Backend.isValid()) 607 Backend = 608 createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); 609 } 610 611 LTO::LTO(Config Conf, ThinBackend Backend, 612 unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) 613 : Conf(std::move(Conf)), 614 RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), 615 ThinLTO(std::move(Backend)), 616 GlobalResolutions( 617 std::make_unique<DenseMap<StringRef, GlobalResolution>>()), 618 LTOMode(LTOMode) { 619 if (Conf.KeepSymbolNameCopies || LTOKeepSymbolCopies) { 620 Alloc = std::make_unique<BumpPtrAllocator>(); 621 GlobalResolutionSymbolSaver = std::make_unique<llvm::StringSaver>(*Alloc); 622 } 623 } 624 625 // Requires a destructor for MapVector<BitcodeModule>. 626 LTO::~LTO() = default; 627 628 // Add the symbols in the given module to the GlobalResolutions map, and resolve 629 // their partitions. 630 void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms, 631 ArrayRef<SymbolResolution> Res, 632 unsigned Partition, bool InSummary) { 633 auto *ResI = Res.begin(); 634 auto *ResE = Res.end(); 635 (void)ResE; 636 for (const InputFile::Symbol &Sym : Syms) { 637 assert(ResI != ResE); 638 SymbolResolution Res = *ResI++; 639 640 StringRef SymbolName = Sym.getName(); 641 // Keep copies of symbols if the client of LTO says so. 642 if (GlobalResolutionSymbolSaver && !GlobalResolutions->contains(SymbolName)) 643 SymbolName = GlobalResolutionSymbolSaver->save(SymbolName); 644 645 auto &GlobalRes = (*GlobalResolutions)[SymbolName]; 646 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); 647 if (Res.Prevailing) { 648 assert(!GlobalRes.Prevailing && 649 "Multiple prevailing defs are not allowed"); 650 GlobalRes.Prevailing = true; 651 GlobalRes.IRName = std::string(Sym.getIRName()); 652 } else if (!GlobalRes.Prevailing && GlobalRes.IRName.empty()) { 653 // Sometimes it can be two copies of symbol in a module and prevailing 654 // symbol can have no IR name. That might happen if symbol is defined in 655 // module level inline asm block. In case we have multiple modules with 656 // the same symbol we want to use IR name of the prevailing symbol. 657 // Otherwise, if we haven't seen a prevailing symbol, set the name so that 658 // we can later use it to check if there is any prevailing copy in IR. 659 GlobalRes.IRName = std::string(Sym.getIRName()); 660 } 661 662 // In rare occasion, the symbol used to initialize GlobalRes has a different 663 // IRName from the inspected Symbol. This can happen on macOS + iOS, when a 664 // symbol is referenced through its mangled name, say @"\01_symbol" while 665 // the IRName is @symbol (the prefix underscore comes from MachO mangling). 666 // In that case, we have the same actual Symbol that can get two different 667 // GUID, leading to some invalid internalization. Workaround this by marking 668 // the GlobalRes external. 669 670 // FIXME: instead of this check, it would be desirable to compute GUIDs 671 // based on mangled name, but this requires an access to the Target Triple 672 // and would be relatively invasive on the codebase. 673 if (GlobalRes.IRName != Sym.getIRName()) { 674 GlobalRes.Partition = GlobalResolution::External; 675 GlobalRes.VisibleOutsideSummary = true; 676 } 677 678 // Set the partition to external if we know it is re-defined by the linker 679 // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a 680 // regular object, is referenced from llvm.compiler.used/llvm.used, or was 681 // already recorded as being referenced from a different partition. 682 if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || 683 (GlobalRes.Partition != GlobalResolution::Unknown && 684 GlobalRes.Partition != Partition)) { 685 GlobalRes.Partition = GlobalResolution::External; 686 } else 687 // First recorded reference, save the current partition. 688 GlobalRes.Partition = Partition; 689 690 // Flag as visible outside of summary if visible from a regular object or 691 // from a module that does not have a summary. 692 GlobalRes.VisibleOutsideSummary |= 693 (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); 694 695 GlobalRes.ExportDynamic |= Res.ExportDynamic; 696 } 697 } 698 699 void LTO::releaseGlobalResolutionsMemory() { 700 // Release GlobalResolutions dense-map itself. 701 GlobalResolutions.reset(); 702 // Release the string saver memory. 703 GlobalResolutionSymbolSaver.reset(); 704 Alloc.reset(); 705 } 706 707 static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, 708 ArrayRef<SymbolResolution> Res) { 709 StringRef Path = Input->getName(); 710 OS << Path << '\n'; 711 auto ResI = Res.begin(); 712 for (const InputFile::Symbol &Sym : Input->symbols()) { 713 assert(ResI != Res.end()); 714 SymbolResolution Res = *ResI++; 715 716 OS << "-r=" << Path << ',' << Sym.getName() << ','; 717 if (Res.Prevailing) 718 OS << 'p'; 719 if (Res.FinalDefinitionInLinkageUnit) 720 OS << 'l'; 721 if (Res.VisibleToRegularObj) 722 OS << 'x'; 723 if (Res.LinkerRedefined) 724 OS << 'r'; 725 OS << '\n'; 726 } 727 OS.flush(); 728 assert(ResI == Res.end()); 729 } 730 731 Error LTO::add(std::unique_ptr<InputFile> Input, 732 ArrayRef<SymbolResolution> Res) { 733 assert(!CalledGetMaxTasks); 734 735 if (Conf.ResolutionFile) 736 writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res); 737 738 if (RegularLTO.CombinedModule->getTargetTriple().empty()) { 739 Triple InputTriple(Input->getTargetTriple()); 740 RegularLTO.CombinedModule->setTargetTriple(InputTriple); 741 if (InputTriple.isOSBinFormatELF()) 742 Conf.VisibilityScheme = Config::ELF; 743 } 744 745 const SymbolResolution *ResI = Res.begin(); 746 for (unsigned I = 0; I != Input->Mods.size(); ++I) 747 if (Error Err = addModule(*Input, I, ResI, Res.end())) 748 return Err; 749 750 assert(ResI == Res.end()); 751 return Error::success(); 752 } 753 754 Error LTO::addModule(InputFile &Input, unsigned ModI, 755 const SymbolResolution *&ResI, 756 const SymbolResolution *ResE) { 757 Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo(); 758 if (!LTOInfo) 759 return LTOInfo.takeError(); 760 761 if (EnableSplitLTOUnit) { 762 // If only some modules were split, flag this in the index so that 763 // we can skip or error on optimizations that need consistently split 764 // modules (whole program devirt and lower type tests). 765 if (*EnableSplitLTOUnit != LTOInfo->EnableSplitLTOUnit) 766 ThinLTO.CombinedIndex.setPartiallySplitLTOUnits(); 767 } else 768 EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; 769 770 BitcodeModule BM = Input.Mods[ModI]; 771 772 if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) && 773 !LTOInfo->UnifiedLTO) 774 return make_error<StringError>( 775 "unified LTO compilation must use " 776 "compatible bitcode modules (use -funified-lto)", 777 inconvertibleErrorCode()); 778 779 if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default) 780 LTOMode = LTOK_UnifiedThin; 781 782 bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular); 783 784 auto ModSyms = Input.module_symbols(ModI); 785 addModuleToGlobalRes(ModSyms, {ResI, ResE}, 786 IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, 787 LTOInfo->HasSummary); 788 789 if (IsThinLTO) 790 return addThinLTO(BM, ModSyms, ResI, ResE); 791 792 RegularLTO.EmptyCombinedModule = false; 793 Expected<RegularLTOState::AddedModule> ModOrErr = 794 addRegularLTO(BM, ModSyms, ResI, ResE); 795 if (!ModOrErr) 796 return ModOrErr.takeError(); 797 798 if (!LTOInfo->HasSummary) 799 return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false); 800 801 // Regular LTO module summaries are added to a dummy module that represents 802 // the combined regular LTO module. 803 if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "")) 804 return Err; 805 RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr)); 806 return Error::success(); 807 } 808 809 // Checks whether the given global value is in a non-prevailing comdat 810 // (comdat containing values the linker indicated were not prevailing, 811 // which we then dropped to available_externally), and if so, removes 812 // it from the comdat. This is called for all global values to ensure the 813 // comdat is empty rather than leaving an incomplete comdat. It is needed for 814 // regular LTO modules, in case we are in a mixed-LTO mode (both regular 815 // and thin LTO modules) compilation. Since the regular LTO module will be 816 // linked first in the final native link, we want to make sure the linker 817 // doesn't select any of these incomplete comdats that would be left 818 // in the regular LTO module without this cleanup. 819 static void 820 handleNonPrevailingComdat(GlobalValue &GV, 821 std::set<const Comdat *> &NonPrevailingComdats) { 822 Comdat *C = GV.getComdat(); 823 if (!C) 824 return; 825 826 if (!NonPrevailingComdats.count(C)) 827 return; 828 829 // Additionally need to drop all global values from the comdat to 830 // available_externally, to satisfy the COMDAT requirement that all members 831 // are discarded as a unit. The non-local linkage global values avoid 832 // duplicate definition linker errors. 833 GV.setLinkage(GlobalValue::AvailableExternallyLinkage); 834 835 if (auto GO = dyn_cast<GlobalObject>(&GV)) 836 GO->setComdat(nullptr); 837 } 838 839 // Add a regular LTO object to the link. 840 // The resulting module needs to be linked into the combined LTO module with 841 // linkRegularLTO. 842 Expected<LTO::RegularLTOState::AddedModule> 843 LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, 844 const SymbolResolution *&ResI, 845 const SymbolResolution *ResE) { 846 RegularLTOState::AddedModule Mod; 847 Expected<std::unique_ptr<Module>> MOrErr = 848 BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, 849 /*IsImporting*/ false); 850 if (!MOrErr) 851 return MOrErr.takeError(); 852 Module &M = **MOrErr; 853 Mod.M = std::move(*MOrErr); 854 855 if (Error Err = M.materializeMetadata()) 856 return std::move(Err); 857 858 // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests 859 // will rename local functions in the merged module as "<function name>.1". 860 // This causes linking errors, since other parts of the module expect the 861 // original function name. 862 if (LTOMode == LTOK_UnifiedRegular) 863 if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions")) 864 M.eraseNamedMetadata(CfiFunctionsMD); 865 866 UpgradeDebugInfo(M); 867 868 ModuleSymbolTable SymTab; 869 SymTab.addModule(&M); 870 871 for (GlobalVariable &GV : M.globals()) 872 if (GV.hasAppendingLinkage()) 873 Mod.Keep.push_back(&GV); 874 875 DenseSet<GlobalObject *> AliasedGlobals; 876 for (auto &GA : M.aliases()) 877 if (GlobalObject *GO = GA.getAliaseeObject()) 878 AliasedGlobals.insert(GO); 879 880 // In this function we need IR GlobalValues matching the symbols in Syms 881 // (which is not backed by a module), so we need to enumerate them in the same 882 // order. The symbol enumeration order of a ModuleSymbolTable intentionally 883 // matches the order of an irsymtab, but when we read the irsymtab in 884 // InputFile::create we omit some symbols that are irrelevant to LTO. The 885 // Skip() function skips the same symbols from the module as InputFile does 886 // from the symbol table. 887 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end(); 888 auto Skip = [&]() { 889 while (MsymI != MsymE) { 890 auto Flags = SymTab.getSymbolFlags(*MsymI); 891 if ((Flags & object::BasicSymbolRef::SF_Global) && 892 !(Flags & object::BasicSymbolRef::SF_FormatSpecific)) 893 return; 894 ++MsymI; 895 } 896 }; 897 Skip(); 898 899 std::set<const Comdat *> NonPrevailingComdats; 900 SmallSet<StringRef, 2> NonPrevailingAsmSymbols; 901 for (const InputFile::Symbol &Sym : Syms) { 902 assert(ResI != ResE); 903 SymbolResolution Res = *ResI++; 904 905 assert(MsymI != MsymE); 906 ModuleSymbolTable::Symbol Msym = *MsymI++; 907 Skip(); 908 909 if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Msym)) { 910 if (Res.Prevailing) { 911 if (Sym.isUndefined()) 912 continue; 913 Mod.Keep.push_back(GV); 914 // For symbols re-defined with linker -wrap and -defsym options, 915 // set the linkage to weak to inhibit IPO. The linkage will be 916 // restored by the linker. 917 if (Res.LinkerRedefined) 918 GV->setLinkage(GlobalValue::WeakAnyLinkage); 919 920 GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); 921 if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) 922 GV->setLinkage(GlobalValue::getWeakLinkage( 923 GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); 924 } else if (isa<GlobalObject>(GV) && 925 (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || 926 GV->hasAvailableExternallyLinkage()) && 927 !AliasedGlobals.count(cast<GlobalObject>(GV))) { 928 // Any of the above three types of linkage indicates that the 929 // chosen prevailing symbol will have the same semantics as this copy of 930 // the symbol, so we may be able to link it with available_externally 931 // linkage. We will decide later whether to do that when we link this 932 // module (in linkRegularLTO), based on whether it is undefined. 933 Mod.Keep.push_back(GV); 934 GV->setLinkage(GlobalValue::AvailableExternallyLinkage); 935 if (GV->hasComdat()) 936 NonPrevailingComdats.insert(GV->getComdat()); 937 cast<GlobalObject>(GV)->setComdat(nullptr); 938 } 939 940 // Set the 'local' flag based on the linker resolution for this symbol. 941 if (Res.FinalDefinitionInLinkageUnit) { 942 GV->setDSOLocal(true); 943 if (GV->hasDLLImportStorageClass()) 944 GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes:: 945 DefaultStorageClass); 946 } 947 } else if (auto *AS = 948 dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Msym)) { 949 // Collect non-prevailing symbols. 950 if (!Res.Prevailing) 951 NonPrevailingAsmSymbols.insert(AS->first); 952 } else { 953 llvm_unreachable("unknown symbol type"); 954 } 955 956 // Common resolution: collect the maximum size/alignment over all commons. 957 // We also record if we see an instance of a common as prevailing, so that 958 // if none is prevailing we can ignore it later. 959 if (Sym.isCommon()) { 960 // FIXME: We should figure out what to do about commons defined by asm. 961 // For now they aren't reported correctly by ModuleSymbolTable. 962 auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())]; 963 CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize()); 964 if (uint32_t SymAlignValue = Sym.getCommonAlignment()) { 965 CommonRes.Alignment = 966 std::max(Align(SymAlignValue), CommonRes.Alignment); 967 } 968 CommonRes.Prevailing |= Res.Prevailing; 969 } 970 } 971 972 if (!M.getComdatSymbolTable().empty()) 973 for (GlobalValue &GV : M.global_values()) 974 handleNonPrevailingComdat(GV, NonPrevailingComdats); 975 976 // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm 977 // block. 978 if (!M.getModuleInlineAsm().empty()) { 979 std::string NewIA = ".lto_discard"; 980 if (!NonPrevailingAsmSymbols.empty()) { 981 // Don't dicard a symbol if there is a live .symver for it. 982 ModuleSymbolTable::CollectAsmSymvers( 983 M, [&](StringRef Name, StringRef Alias) { 984 if (!NonPrevailingAsmSymbols.count(Alias)) 985 NonPrevailingAsmSymbols.erase(Name); 986 }); 987 NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", "); 988 } 989 NewIA += "\n"; 990 M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm()); 991 } 992 993 assert(MsymI == MsymE); 994 return std::move(Mod); 995 } 996 997 Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, 998 bool LivenessFromIndex) { 999 std::vector<GlobalValue *> Keep; 1000 for (GlobalValue *GV : Mod.Keep) { 1001 if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) { 1002 if (Function *F = dyn_cast<Function>(GV)) { 1003 if (DiagnosticOutputFile) { 1004 if (Error Err = F->materialize()) 1005 return Err; 1006 OptimizationRemarkEmitter ORE(F, nullptr); 1007 ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F) 1008 << ore::NV("Function", F) 1009 << " not added to the combined module "); 1010 } 1011 } 1012 continue; 1013 } 1014 1015 if (!GV->hasAvailableExternallyLinkage()) { 1016 Keep.push_back(GV); 1017 continue; 1018 } 1019 1020 // Only link available_externally definitions if we don't already have a 1021 // definition. 1022 GlobalValue *CombinedGV = 1023 RegularLTO.CombinedModule->getNamedValue(GV->getName()); 1024 if (CombinedGV && !CombinedGV->isDeclaration()) 1025 continue; 1026 1027 Keep.push_back(GV); 1028 } 1029 1030 return RegularLTO.Mover->move(std::move(Mod.M), Keep, nullptr, 1031 /* IsPerformingImport */ false); 1032 } 1033 1034 // Add a ThinLTO module to the link. 1035 Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, 1036 const SymbolResolution *&ResI, 1037 const SymbolResolution *ResE) { 1038 const SymbolResolution *ResITmp = ResI; 1039 for (const InputFile::Symbol &Sym : Syms) { 1040 assert(ResITmp != ResE); 1041 SymbolResolution Res = *ResITmp++; 1042 1043 if (!Sym.getIRName().empty()) { 1044 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage( 1045 GlobalValue::getGlobalIdentifier(Sym.getIRName(), 1046 GlobalValue::ExternalLinkage, "")); 1047 if (Res.Prevailing) 1048 ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); 1049 } 1050 } 1051 1052 if (Error Err = 1053 BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), 1054 [&](GlobalValue::GUID GUID) { 1055 return ThinLTO.PrevailingModuleForGUID[GUID] == 1056 BM.getModuleIdentifier(); 1057 })) 1058 return Err; 1059 LLVM_DEBUG(dbgs() << "Module " << BM.getModuleIdentifier() << "\n"); 1060 1061 for (const InputFile::Symbol &Sym : Syms) { 1062 assert(ResI != ResE); 1063 SymbolResolution Res = *ResI++; 1064 1065 if (!Sym.getIRName().empty()) { 1066 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage( 1067 GlobalValue::getGlobalIdentifier(Sym.getIRName(), 1068 GlobalValue::ExternalLinkage, "")); 1069 if (Res.Prevailing) { 1070 assert(ThinLTO.PrevailingModuleForGUID[GUID] == 1071 BM.getModuleIdentifier()); 1072 1073 // For linker redefined symbols (via --wrap or --defsym) we want to 1074 // switch the linkage to `weak` to prevent IPOs from happening. 1075 // Find the summary in the module for this very GV and record the new 1076 // linkage so that we can switch it when we import the GV. 1077 if (Res.LinkerRedefined) 1078 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( 1079 GUID, BM.getModuleIdentifier())) 1080 S->setLinkage(GlobalValue::WeakAnyLinkage); 1081 } 1082 1083 // If the linker resolved the symbol to a local definition then mark it 1084 // as local in the summary for the module we are adding. 1085 if (Res.FinalDefinitionInLinkageUnit) { 1086 if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( 1087 GUID, BM.getModuleIdentifier())) { 1088 S->setDSOLocal(true); 1089 } 1090 } 1091 } 1092 } 1093 1094 if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second) 1095 return make_error<StringError>( 1096 "Expected at most one ThinLTO module per bitcode file", 1097 inconvertibleErrorCode()); 1098 1099 if (!Conf.ThinLTOModulesToCompile.empty()) { 1100 if (!ThinLTO.ModulesToCompile) 1101 ThinLTO.ModulesToCompile = ModuleMapType(); 1102 // This is a fuzzy name matching where only modules with name containing the 1103 // specified switch values are going to be compiled. 1104 for (const std::string &Name : Conf.ThinLTOModulesToCompile) { 1105 if (BM.getModuleIdentifier().contains(Name)) { 1106 ThinLTO.ModulesToCompile->insert({BM.getModuleIdentifier(), BM}); 1107 LLVM_DEBUG(dbgs() << "[ThinLTO] Selecting " << BM.getModuleIdentifier() 1108 << " to compile\n"); 1109 } 1110 } 1111 } 1112 1113 return Error::success(); 1114 } 1115 1116 unsigned LTO::getMaxTasks() const { 1117 CalledGetMaxTasks = true; 1118 auto ModuleCount = ThinLTO.ModulesToCompile ? ThinLTO.ModulesToCompile->size() 1119 : ThinLTO.ModuleMap.size(); 1120 return RegularLTO.ParallelCodeGenParallelismLevel + ModuleCount; 1121 } 1122 1123 // If only some of the modules were split, we cannot correctly handle 1124 // code that contains type tests or type checked loads. 1125 Error LTO::checkPartiallySplit() { 1126 if (!ThinLTO.CombinedIndex.partiallySplitLTOUnits()) 1127 return Error::success(); 1128 1129 const Module *Combined = RegularLTO.CombinedModule.get(); 1130 Function *TypeTestFunc = 1131 Intrinsic::getDeclarationIfExists(Combined, Intrinsic::type_test); 1132 Function *TypeCheckedLoadFunc = 1133 Intrinsic::getDeclarationIfExists(Combined, Intrinsic::type_checked_load); 1134 Function *TypeCheckedLoadRelativeFunc = Intrinsic::getDeclarationIfExists( 1135 Combined, Intrinsic::type_checked_load_relative); 1136 1137 // First check if there are type tests / type checked loads in the 1138 // merged regular LTO module IR. 1139 if ((TypeTestFunc && !TypeTestFunc->use_empty()) || 1140 (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) || 1141 (TypeCheckedLoadRelativeFunc && 1142 !TypeCheckedLoadRelativeFunc->use_empty())) 1143 return make_error<StringError>( 1144 "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)", 1145 inconvertibleErrorCode()); 1146 1147 // Otherwise check if there are any recorded in the combined summary from the 1148 // ThinLTO modules. 1149 for (auto &P : ThinLTO.CombinedIndex) { 1150 for (auto &S : P.second.SummaryList) { 1151 auto *FS = dyn_cast<FunctionSummary>(S.get()); 1152 if (!FS) 1153 continue; 1154 if (!FS->type_test_assume_vcalls().empty() || 1155 !FS->type_checked_load_vcalls().empty() || 1156 !FS->type_test_assume_const_vcalls().empty() || 1157 !FS->type_checked_load_const_vcalls().empty() || 1158 !FS->type_tests().empty()) 1159 return make_error<StringError>( 1160 "inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)", 1161 inconvertibleErrorCode()); 1162 } 1163 } 1164 return Error::success(); 1165 } 1166 1167 Error LTO::run(AddStreamFn AddStream, FileCache Cache) { 1168 // Compute "dead" symbols, we don't want to import/export these! 1169 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols; 1170 DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions; 1171 for (auto &Res : *GlobalResolutions) { 1172 // Normally resolution have IR name of symbol. We can do nothing here 1173 // otherwise. See comments in GlobalResolution struct for more details. 1174 if (Res.second.IRName.empty()) 1175 continue; 1176 1177 GlobalValue::GUID GUID = GlobalValue::getGUIDAssumingExternalLinkage( 1178 GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); 1179 1180 if (Res.second.VisibleOutsideSummary && Res.second.Prevailing) 1181 GUIDPreservedSymbols.insert(GUID); 1182 1183 if (Res.second.ExportDynamic) 1184 DynamicExportSymbols.insert(GUID); 1185 1186 GUIDPrevailingResolutions[GUID] = 1187 Res.second.Prevailing ? PrevailingType::Yes : PrevailingType::No; 1188 } 1189 1190 auto isPrevailing = [&](GlobalValue::GUID G) { 1191 auto It = GUIDPrevailingResolutions.find(G); 1192 if (It == GUIDPrevailingResolutions.end()) 1193 return PrevailingType::Unknown; 1194 return It->second; 1195 }; 1196 computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, 1197 isPrevailing, Conf.OptLevel > 0); 1198 1199 // Setup output file to emit statistics. 1200 auto StatsFileOrErr = setupStatsFile(Conf.StatsFile); 1201 if (!StatsFileOrErr) 1202 return StatsFileOrErr.takeError(); 1203 std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get()); 1204 1205 // TODO: Ideally this would be controlled automatically by detecting that we 1206 // are linking with an allocator that supports these interfaces, rather than 1207 // an internal option (which would still be needed for tests, however). For 1208 // example, if the library exported a symbol like __malloc_hot_cold the linker 1209 // could recognize that and set a flag in the lto::Config. 1210 if (SupportsHotColdNew) 1211 ThinLTO.CombinedIndex.setWithSupportsHotColdNew(); 1212 1213 Error Result = runRegularLTO(AddStream); 1214 if (!Result) 1215 // This will reset the GlobalResolutions optional once done with it to 1216 // reduce peak memory before importing. 1217 Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols); 1218 1219 if (StatsFile) 1220 PrintStatisticsJSON(StatsFile->os()); 1221 1222 return Result; 1223 } 1224 1225 void lto::updateMemProfAttributes(Module &Mod, 1226 const ModuleSummaryIndex &Index) { 1227 if (Index.withSupportsHotColdNew()) 1228 return; 1229 1230 // The profile matcher applies hotness attributes directly for allocations, 1231 // and those will cause us to generate calls to the hot/cold interfaces 1232 // unconditionally. If supports-hot-cold-new was not enabled in the LTO 1233 // link then assume we don't want these calls (e.g. not linking with 1234 // the appropriate library, or otherwise trying to disable this behavior). 1235 for (auto &F : Mod) { 1236 for (auto &BB : F) { 1237 for (auto &I : BB) { 1238 auto *CI = dyn_cast<CallBase>(&I); 1239 if (!CI) 1240 continue; 1241 if (CI->hasFnAttr("memprof")) 1242 CI->removeFnAttr("memprof"); 1243 // Strip off all memprof metadata as it is no longer needed. 1244 // Importantly, this avoids the addition of new memprof attributes 1245 // after inlining propagation. 1246 // TODO: If we support additional types of MemProf metadata beyond hot 1247 // and cold, we will need to update the metadata based on the allocator 1248 // APIs supported instead of completely stripping all. 1249 CI->setMetadata(LLVMContext::MD_memprof, nullptr); 1250 CI->setMetadata(LLVMContext::MD_callsite, nullptr); 1251 } 1252 } 1253 } 1254 } 1255 1256 Error LTO::runRegularLTO(AddStreamFn AddStream) { 1257 // Setup optimization remarks. 1258 auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks( 1259 RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename, 1260 Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness, 1261 Conf.RemarksHotnessThreshold); 1262 LLVM_DEBUG(dbgs() << "Running regular LTO\n"); 1263 if (!DiagFileOrErr) 1264 return DiagFileOrErr.takeError(); 1265 DiagnosticOutputFile = std::move(*DiagFileOrErr); 1266 1267 // Finalize linking of regular LTO modules containing summaries now that 1268 // we have computed liveness information. 1269 for (auto &M : RegularLTO.ModsWithSummaries) 1270 if (Error Err = linkRegularLTO(std::move(M), 1271 /*LivenessFromIndex=*/true)) 1272 return Err; 1273 1274 // Ensure we don't have inconsistently split LTO units with type tests. 1275 // FIXME: this checks both LTO and ThinLTO. It happens to work as we take 1276 // this path both cases but eventually this should be split into two and 1277 // do the ThinLTO checks in `runThinLTO`. 1278 if (Error Err = checkPartiallySplit()) 1279 return Err; 1280 1281 // Make sure commons have the right size/alignment: we kept the largest from 1282 // all the prevailing when adding the inputs, and we apply it here. 1283 const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); 1284 for (auto &I : RegularLTO.Commons) { 1285 if (!I.second.Prevailing) 1286 // Don't do anything if no instance of this common was prevailing. 1287 continue; 1288 GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first); 1289 if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) { 1290 // Don't create a new global if the type is already correct, just make 1291 // sure the alignment is correct. 1292 OldGV->setAlignment(I.second.Alignment); 1293 continue; 1294 } 1295 ArrayType *Ty = 1296 ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size); 1297 auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false, 1298 GlobalValue::CommonLinkage, 1299 ConstantAggregateZero::get(Ty), ""); 1300 GV->setAlignment(I.second.Alignment); 1301 if (OldGV) { 1302 OldGV->replaceAllUsesWith(GV); 1303 GV->takeName(OldGV); 1304 OldGV->eraseFromParent(); 1305 } else { 1306 GV->setName(I.first); 1307 } 1308 } 1309 1310 updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex); 1311 1312 bool WholeProgramVisibilityEnabledInLTO = 1313 Conf.HasWholeProgramVisibility && 1314 // If validation is enabled, upgrade visibility only when all vtables 1315 // have typeinfos. 1316 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos); 1317 1318 // This returns true when the name is local or not defined. Locals are 1319 // expected to be handled separately. 1320 auto IsVisibleToRegularObj = [&](StringRef name) { 1321 auto It = GlobalResolutions->find(name); 1322 return (It == GlobalResolutions->end() || 1323 It->second.VisibleOutsideSummary || !It->second.Prevailing); 1324 }; 1325 1326 // If allowed, upgrade public vcall visibility metadata to linkage unit 1327 // visibility before whole program devirtualization in the optimizer. 1328 updateVCallVisibilityInModule( 1329 *RegularLTO.CombinedModule, WholeProgramVisibilityEnabledInLTO, 1330 DynamicExportSymbols, Conf.ValidateAllVtablesHaveTypeInfos, 1331 IsVisibleToRegularObj); 1332 updatePublicTypeTestCalls(*RegularLTO.CombinedModule, 1333 WholeProgramVisibilityEnabledInLTO); 1334 1335 if (Conf.PreOptModuleHook && 1336 !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule)) 1337 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); 1338 1339 if (!Conf.CodeGenOnly) { 1340 for (const auto &R : *GlobalResolutions) { 1341 GlobalValue *GV = 1342 RegularLTO.CombinedModule->getNamedValue(R.second.IRName); 1343 if (!R.second.isPrevailingIRSymbol()) 1344 continue; 1345 if (R.second.Partition != 0 && 1346 R.second.Partition != GlobalResolution::External) 1347 continue; 1348 1349 // Ignore symbols defined in other partitions. 1350 // Also skip declarations, which are not allowed to have internal linkage. 1351 if (!GV || GV->hasLocalLinkage() || GV->isDeclaration()) 1352 continue; 1353 1354 // Symbols that are marked DLLImport or DLLExport should not be 1355 // internalized, as they are either externally visible or referencing 1356 // external symbols. Symbols that have AvailableExternally or Appending 1357 // linkage might be used by future passes and should be kept as is. 1358 // These linkages are seen in Unified regular LTO, because the process 1359 // of creating split LTO units introduces symbols with that linkage into 1360 // one of the created modules. Normally, only the ThinLTO backend would 1361 // compile this module, but Unified Regular LTO processes both 1362 // modules created by the splitting process as regular LTO modules. 1363 if ((LTOMode == LTOKind::LTOK_UnifiedRegular) && 1364 ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) || 1365 GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage())) 1366 continue; 1367 1368 GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global 1369 : GlobalValue::UnnamedAddr::None); 1370 if (EnableLTOInternalization && R.second.Partition == 0) 1371 GV->setLinkage(GlobalValue::InternalLinkage); 1372 } 1373 1374 if (Conf.PostInternalizeModuleHook && 1375 !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule)) 1376 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); 1377 } 1378 1379 if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) { 1380 if (Error Err = 1381 backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, 1382 *RegularLTO.CombinedModule, ThinLTO.CombinedIndex)) 1383 return Err; 1384 } 1385 1386 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); 1387 } 1388 1389 SmallVector<const char *> LTO::getRuntimeLibcallSymbols(const Triple &TT) { 1390 RTLIB::RuntimeLibcallsInfo Libcalls(TT); 1391 SmallVector<const char *> LibcallSymbols; 1392 ArrayRef<RTLIB::LibcallImpl> LibcallImpls = Libcalls.getLibcallImpls(); 1393 LibcallSymbols.reserve(LibcallImpls.size()); 1394 1395 for (RTLIB::LibcallImpl Impl : LibcallImpls) { 1396 if (Impl != RTLIB::Unsupported) 1397 LibcallSymbols.push_back(Libcalls.getLibcallImplName(Impl)); 1398 } 1399 1400 return LibcallSymbols; 1401 } 1402 1403 Error ThinBackendProc::emitFiles( 1404 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, 1405 const std::string &NewModulePath) const { 1406 return emitFiles(ImportList, ModulePath, NewModulePath, 1407 NewModulePath + ".thinlto.bc", 1408 /*ImportsFiles=*/std::nullopt); 1409 } 1410 1411 Error ThinBackendProc::emitFiles( 1412 const FunctionImporter::ImportMapTy &ImportList, llvm::StringRef ModulePath, 1413 const std::string &NewModulePath, StringRef SummaryPath, 1414 std::optional<std::reference_wrapper<ImportsFilesContainer>> ImportsFiles) 1415 const { 1416 ModuleToSummariesForIndexTy ModuleToSummariesForIndex; 1417 GVSummaryPtrSet DeclarationSummaries; 1418 1419 std::error_code EC; 1420 gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries, 1421 ImportList, ModuleToSummariesForIndex, 1422 DeclarationSummaries); 1423 1424 raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None); 1425 if (EC) 1426 return createFileError("cannot open " + Twine(SummaryPath), EC); 1427 1428 writeIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex, 1429 &DeclarationSummaries); 1430 1431 if (ShouldEmitImportsFiles) { 1432 Error ImportsFilesError = EmitImportsFiles( 1433 ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex); 1434 if (ImportsFilesError) 1435 return ImportsFilesError; 1436 } 1437 1438 // Optionally, store the imports files. 1439 if (ImportsFiles) 1440 processImportsFiles( 1441 ModulePath, ModuleToSummariesForIndex, 1442 [&](StringRef M) { ImportsFiles->get().push_back(M.str()); }); 1443 1444 return Error::success(); 1445 } 1446 1447 namespace { 1448 /// Base class for ThinLTO backends that perform code generation and insert the 1449 /// generated files back into the link. 1450 class CGThinBackend : public ThinBackendProc { 1451 protected: 1452 AddStreamFn AddStream; 1453 DenseSet<GlobalValue::GUID> CfiFunctionDefs; 1454 DenseSet<GlobalValue::GUID> CfiFunctionDecls; 1455 bool ShouldEmitIndexFiles; 1456 1457 public: 1458 CGThinBackend( 1459 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1460 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1461 AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, 1462 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, 1463 ThreadPoolStrategy ThinLTOParallelism) 1464 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, 1465 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), 1466 AddStream(std::move(AddStream)), 1467 ShouldEmitIndexFiles(ShouldEmitIndexFiles) { 1468 auto &Defs = CombinedIndex.cfiFunctionDefs(); 1469 CfiFunctionDefs.insert_range(Defs.guids()); 1470 auto &Decls = CombinedIndex.cfiFunctionDecls(); 1471 CfiFunctionDecls.insert_range(Decls.guids()); 1472 } 1473 }; 1474 1475 /// This backend performs code generation by scheduling a job to run on 1476 /// an in-process thread when invoked for each task. 1477 class InProcessThinBackend : public CGThinBackend { 1478 protected: 1479 FileCache Cache; 1480 1481 public: 1482 InProcessThinBackend( 1483 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1484 ThreadPoolStrategy ThinLTOParallelism, 1485 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1486 AddStreamFn AddStream, FileCache Cache, lto::IndexWriteCallback OnWrite, 1487 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles) 1488 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, 1489 AddStream, OnWrite, ShouldEmitIndexFiles, 1490 ShouldEmitImportsFiles, ThinLTOParallelism), 1491 Cache(std::move(Cache)) {} 1492 1493 virtual Error runThinLTOBackendThread( 1494 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, 1495 ModuleSummaryIndex &CombinedIndex, 1496 const FunctionImporter::ImportMapTy &ImportList, 1497 const FunctionImporter::ExportSetTy &ExportList, 1498 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 1499 const GVSummaryMapTy &DefinedGlobals, 1500 MapVector<StringRef, BitcodeModule> &ModuleMap) { 1501 auto RunThinBackend = [&](AddStreamFn AddStream) { 1502 LTOLLVMContext BackendContext(Conf); 1503 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext); 1504 if (!MOrErr) 1505 return MOrErr.takeError(); 1506 1507 return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex, 1508 ImportList, DefinedGlobals, &ModuleMap, 1509 Conf.CodeGenOnly); 1510 }; 1511 1512 auto ModuleID = BM.getModuleIdentifier(); 1513 1514 if (ShouldEmitIndexFiles) { 1515 if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) 1516 return E; 1517 } 1518 1519 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || 1520 all_of(CombinedIndex.getModuleHash(ModuleID), 1521 [](uint32_t V) { return V == 0; })) 1522 // Cache disabled or no entry for this module in the combined index or 1523 // no module hash. 1524 return RunThinBackend(AddStream); 1525 1526 // The module may be cached, this helps handling it. 1527 std::string Key = computeLTOCacheKey( 1528 Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, 1529 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); 1530 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID); 1531 if (Error Err = CacheAddStreamOrErr.takeError()) 1532 return Err; 1533 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 1534 if (CacheAddStream) 1535 return RunThinBackend(CacheAddStream); 1536 1537 return Error::success(); 1538 } 1539 1540 Error start( 1541 unsigned Task, BitcodeModule BM, 1542 const FunctionImporter::ImportMapTy &ImportList, 1543 const FunctionImporter::ExportSetTy &ExportList, 1544 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 1545 MapVector<StringRef, BitcodeModule> &ModuleMap) override { 1546 StringRef ModulePath = BM.getModuleIdentifier(); 1547 assert(ModuleToDefinedGVSummaries.count(ModulePath)); 1548 const GVSummaryMapTy &DefinedGlobals = 1549 ModuleToDefinedGVSummaries.find(ModulePath)->second; 1550 BackendThreadPool.async( 1551 [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, 1552 const FunctionImporter::ImportMapTy &ImportList, 1553 const FunctionImporter::ExportSetTy &ExportList, 1554 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> 1555 &ResolvedODR, 1556 const GVSummaryMapTy &DefinedGlobals, 1557 MapVector<StringRef, BitcodeModule> &ModuleMap) { 1558 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) 1559 timeTraceProfilerInitialize(Conf.TimeTraceGranularity, 1560 "thin backend"); 1561 Error E = runThinLTOBackendThread( 1562 AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList, 1563 ResolvedODR, DefinedGlobals, ModuleMap); 1564 if (E) { 1565 std::unique_lock<std::mutex> L(ErrMu); 1566 if (Err) 1567 Err = joinErrors(std::move(*Err), std::move(E)); 1568 else 1569 Err = std::move(E); 1570 } 1571 if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) 1572 timeTraceProfilerFinishThread(); 1573 }, 1574 BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), 1575 std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap)); 1576 1577 if (OnWrite) 1578 OnWrite(std::string(ModulePath)); 1579 return Error::success(); 1580 } 1581 }; 1582 1583 /// This backend is utilized in the first round of a two-codegen round process. 1584 /// It first saves optimized bitcode files to disk before the codegen process 1585 /// begins. After codegen, it stores the resulting object files in a scratch 1586 /// buffer. Note the codegen data stored in the scratch buffer will be extracted 1587 /// and merged in the subsequent step. 1588 class FirstRoundThinBackend : public InProcessThinBackend { 1589 AddStreamFn IRAddStream; 1590 FileCache IRCache; 1591 1592 public: 1593 FirstRoundThinBackend( 1594 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1595 ThreadPoolStrategy ThinLTOParallelism, 1596 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1597 AddStreamFn CGAddStream, FileCache CGCache, AddStreamFn IRAddStream, 1598 FileCache IRCache) 1599 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, 1600 ModuleToDefinedGVSummaries, std::move(CGAddStream), 1601 std::move(CGCache), /*OnWrite=*/nullptr, 1602 /*ShouldEmitIndexFiles=*/false, 1603 /*ShouldEmitImportsFiles=*/false), 1604 IRAddStream(std::move(IRAddStream)), IRCache(std::move(IRCache)) {} 1605 1606 Error runThinLTOBackendThread( 1607 AddStreamFn CGAddStream, FileCache CGCache, unsigned Task, 1608 BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, 1609 const FunctionImporter::ImportMapTy &ImportList, 1610 const FunctionImporter::ExportSetTy &ExportList, 1611 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 1612 const GVSummaryMapTy &DefinedGlobals, 1613 MapVector<StringRef, BitcodeModule> &ModuleMap) override { 1614 auto RunThinBackend = [&](AddStreamFn CGAddStream, 1615 AddStreamFn IRAddStream) { 1616 LTOLLVMContext BackendContext(Conf); 1617 Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext); 1618 if (!MOrErr) 1619 return MOrErr.takeError(); 1620 1621 return thinBackend(Conf, Task, CGAddStream, **MOrErr, CombinedIndex, 1622 ImportList, DefinedGlobals, &ModuleMap, 1623 Conf.CodeGenOnly, IRAddStream); 1624 }; 1625 1626 auto ModuleID = BM.getModuleIdentifier(); 1627 // Like InProcessThinBackend, we produce index files as needed for 1628 // FirstRoundThinBackend. However, these files are not generated for 1629 // SecondRoundThinBackend. 1630 if (ShouldEmitIndexFiles) { 1631 if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str())) 1632 return E; 1633 } 1634 1635 assert((CGCache.isValid() == IRCache.isValid()) && 1636 "Both caches for CG and IR should have matching availability"); 1637 if (!CGCache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || 1638 all_of(CombinedIndex.getModuleHash(ModuleID), 1639 [](uint32_t V) { return V == 0; })) 1640 // Cache disabled or no entry for this module in the combined index or 1641 // no module hash. 1642 return RunThinBackend(CGAddStream, IRAddStream); 1643 1644 // Get CGKey for caching object in CGCache. 1645 std::string CGKey = computeLTOCacheKey( 1646 Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, 1647 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); 1648 Expected<AddStreamFn> CacheCGAddStreamOrErr = 1649 CGCache(Task, CGKey, ModuleID); 1650 if (Error Err = CacheCGAddStreamOrErr.takeError()) 1651 return Err; 1652 AddStreamFn &CacheCGAddStream = *CacheCGAddStreamOrErr; 1653 1654 // Get IRKey for caching (optimized) IR in IRCache with an extra ID. 1655 std::string IRKey = recomputeLTOCacheKey(CGKey, /*ExtraID=*/"IR"); 1656 Expected<AddStreamFn> CacheIRAddStreamOrErr = 1657 IRCache(Task, IRKey, ModuleID); 1658 if (Error Err = CacheIRAddStreamOrErr.takeError()) 1659 return Err; 1660 AddStreamFn &CacheIRAddStream = *CacheIRAddStreamOrErr; 1661 1662 // Ideally, both CG and IR caching should be synchronized. However, in 1663 // practice, their availability may differ due to different expiration 1664 // times. Therefore, if either cache is missing, the backend process is 1665 // triggered. 1666 if (CacheCGAddStream || CacheIRAddStream) { 1667 LLVM_DEBUG(dbgs() << "[FirstRound] Cache Miss for " 1668 << BM.getModuleIdentifier() << "\n"); 1669 return RunThinBackend(CacheCGAddStream ? CacheCGAddStream : CGAddStream, 1670 CacheIRAddStream ? CacheIRAddStream : IRAddStream); 1671 } 1672 1673 return Error::success(); 1674 } 1675 }; 1676 1677 /// This backend operates in the second round of a two-codegen round process. 1678 /// It starts by reading the optimized bitcode files that were saved during the 1679 /// first round. The backend then executes the codegen only to further optimize 1680 /// the code, utilizing the codegen data merged from the first round. Finally, 1681 /// it writes the resulting object files as usual. 1682 class SecondRoundThinBackend : public InProcessThinBackend { 1683 std::unique_ptr<SmallVector<StringRef>> IRFiles; 1684 stable_hash CombinedCGDataHash; 1685 1686 public: 1687 SecondRoundThinBackend( 1688 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1689 ThreadPoolStrategy ThinLTOParallelism, 1690 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1691 AddStreamFn AddStream, FileCache Cache, 1692 std::unique_ptr<SmallVector<StringRef>> IRFiles, 1693 stable_hash CombinedCGDataHash) 1694 : InProcessThinBackend(Conf, CombinedIndex, ThinLTOParallelism, 1695 ModuleToDefinedGVSummaries, std::move(AddStream), 1696 std::move(Cache), 1697 /*OnWrite=*/nullptr, 1698 /*ShouldEmitIndexFiles=*/false, 1699 /*ShouldEmitImportsFiles=*/false), 1700 IRFiles(std::move(IRFiles)), CombinedCGDataHash(CombinedCGDataHash) {} 1701 1702 virtual Error runThinLTOBackendThread( 1703 AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, 1704 ModuleSummaryIndex &CombinedIndex, 1705 const FunctionImporter::ImportMapTy &ImportList, 1706 const FunctionImporter::ExportSetTy &ExportList, 1707 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 1708 const GVSummaryMapTy &DefinedGlobals, 1709 MapVector<StringRef, BitcodeModule> &ModuleMap) override { 1710 auto RunThinBackend = [&](AddStreamFn AddStream) { 1711 LTOLLVMContext BackendContext(Conf); 1712 std::unique_ptr<Module> LoadedModule = 1713 cgdata::loadModuleForTwoRounds(BM, Task, BackendContext, *IRFiles); 1714 1715 return thinBackend(Conf, Task, AddStream, *LoadedModule, CombinedIndex, 1716 ImportList, DefinedGlobals, &ModuleMap, 1717 /*CodeGenOnly=*/true); 1718 }; 1719 1720 auto ModuleID = BM.getModuleIdentifier(); 1721 if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) || 1722 all_of(CombinedIndex.getModuleHash(ModuleID), 1723 [](uint32_t V) { return V == 0; })) 1724 // Cache disabled or no entry for this module in the combined index or 1725 // no module hash. 1726 return RunThinBackend(AddStream); 1727 1728 // Get Key for caching the final object file in Cache with the combined 1729 // CGData hash. 1730 std::string Key = computeLTOCacheKey( 1731 Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, 1732 DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); 1733 Key = recomputeLTOCacheKey(Key, 1734 /*ExtraID=*/std::to_string(CombinedCGDataHash)); 1735 Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key, ModuleID); 1736 if (Error Err = CacheAddStreamOrErr.takeError()) 1737 return Err; 1738 AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; 1739 1740 if (CacheAddStream) { 1741 LLVM_DEBUG(dbgs() << "[SecondRound] Cache Miss for " 1742 << BM.getModuleIdentifier() << "\n"); 1743 return RunThinBackend(CacheAddStream); 1744 } 1745 1746 return Error::success(); 1747 } 1748 }; 1749 } // end anonymous namespace 1750 1751 ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism, 1752 lto::IndexWriteCallback OnWrite, 1753 bool ShouldEmitIndexFiles, 1754 bool ShouldEmitImportsFiles) { 1755 auto Func = 1756 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1757 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1758 AddStreamFn AddStream, FileCache Cache) { 1759 return std::make_unique<InProcessThinBackend>( 1760 Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, 1761 AddStream, Cache, OnWrite, ShouldEmitIndexFiles, 1762 ShouldEmitImportsFiles); 1763 }; 1764 return ThinBackend(Func, Parallelism); 1765 } 1766 1767 StringLiteral lto::getThinLTODefaultCPU(const Triple &TheTriple) { 1768 if (!TheTriple.isOSDarwin()) 1769 return ""; 1770 if (TheTriple.getArch() == Triple::x86_64) 1771 return "core2"; 1772 if (TheTriple.getArch() == Triple::x86) 1773 return "yonah"; 1774 if (TheTriple.isArm64e()) 1775 return "apple-a12"; 1776 if (TheTriple.getArch() == Triple::aarch64 || 1777 TheTriple.getArch() == Triple::aarch64_32) 1778 return "cyclone"; 1779 return ""; 1780 } 1781 1782 // Given the original \p Path to an output file, replace any path 1783 // prefix matching \p OldPrefix with \p NewPrefix. Also, create the 1784 // resulting directory if it does not yet exist. 1785 std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix, 1786 StringRef NewPrefix) { 1787 if (OldPrefix.empty() && NewPrefix.empty()) 1788 return std::string(Path); 1789 SmallString<128> NewPath(Path); 1790 llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix); 1791 StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str()); 1792 if (!ParentPath.empty()) { 1793 // Make sure the new directory exists, creating it if necessary. 1794 if (std::error_code EC = llvm::sys::fs::create_directories(ParentPath)) 1795 llvm::errs() << "warning: could not create directory '" << ParentPath 1796 << "': " << EC.message() << '\n'; 1797 } 1798 return std::string(NewPath); 1799 } 1800 1801 namespace { 1802 class WriteIndexesThinBackend : public ThinBackendProc { 1803 std::string OldPrefix, NewPrefix, NativeObjectPrefix; 1804 raw_fd_ostream *LinkedObjectsFile; 1805 1806 public: 1807 WriteIndexesThinBackend( 1808 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1809 ThreadPoolStrategy ThinLTOParallelism, 1810 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1811 std::string OldPrefix, std::string NewPrefix, 1812 std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, 1813 raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite) 1814 : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries, 1815 OnWrite, ShouldEmitImportsFiles, ThinLTOParallelism), 1816 OldPrefix(OldPrefix), NewPrefix(NewPrefix), 1817 NativeObjectPrefix(NativeObjectPrefix), 1818 LinkedObjectsFile(LinkedObjectsFile) {} 1819 1820 Error start( 1821 unsigned Task, BitcodeModule BM, 1822 const FunctionImporter::ImportMapTy &ImportList, 1823 const FunctionImporter::ExportSetTy &ExportList, 1824 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 1825 MapVector<StringRef, BitcodeModule> &ModuleMap) override { 1826 StringRef ModulePath = BM.getModuleIdentifier(); 1827 1828 // The contents of this file may be used as input to a native link, and must 1829 // therefore contain the processed modules in a determinstic order that 1830 // match the order they are provided on the command line. For that reason, 1831 // we cannot include this in the asynchronously executed lambda below. 1832 if (LinkedObjectsFile) { 1833 std::string ObjectPrefix = 1834 NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix; 1835 std::string LinkedObjectsFilePath = 1836 getThinLTOOutputFile(ModulePath, OldPrefix, ObjectPrefix); 1837 *LinkedObjectsFile << LinkedObjectsFilePath << '\n'; 1838 } 1839 1840 BackendThreadPool.async( 1841 [this](const StringRef ModulePath, 1842 const FunctionImporter::ImportMapTy &ImportList, 1843 const std::string &OldPrefix, const std::string &NewPrefix) { 1844 std::string NewModulePath = 1845 getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); 1846 auto E = emitFiles(ImportList, ModulePath, NewModulePath); 1847 if (E) { 1848 std::unique_lock<std::mutex> L(ErrMu); 1849 if (Err) 1850 Err = joinErrors(std::move(*Err), std::move(E)); 1851 else 1852 Err = std::move(E); 1853 return; 1854 } 1855 }, 1856 ModulePath, ImportList, OldPrefix, NewPrefix); 1857 1858 if (OnWrite) 1859 OnWrite(std::string(ModulePath)); 1860 return Error::success(); 1861 } 1862 1863 bool isSensitiveToInputOrder() override { 1864 // The order which modules are written to LinkedObjectsFile should be 1865 // deterministic and match the order they are passed on the command line. 1866 return true; 1867 } 1868 }; 1869 } // end anonymous namespace 1870 1871 ThinBackend lto::createWriteIndexesThinBackend( 1872 ThreadPoolStrategy Parallelism, std::string OldPrefix, 1873 std::string NewPrefix, std::string NativeObjectPrefix, 1874 bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, 1875 IndexWriteCallback OnWrite) { 1876 auto Func = 1877 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, 1878 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 1879 AddStreamFn AddStream, FileCache Cache) { 1880 return std::make_unique<WriteIndexesThinBackend>( 1881 Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, 1882 OldPrefix, NewPrefix, NativeObjectPrefix, ShouldEmitImportsFiles, 1883 LinkedObjectsFile, OnWrite); 1884 }; 1885 return ThinBackend(Func, Parallelism); 1886 } 1887 1888 Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, 1889 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) { 1890 LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); 1891 ThinLTO.CombinedIndex.releaseTemporaryMemory(); 1892 timeTraceProfilerBegin("ThinLink", StringRef("")); 1893 auto TimeTraceScopeExit = llvm::make_scope_exit([]() { 1894 if (llvm::timeTraceProfilerEnabled()) 1895 llvm::timeTraceProfilerEnd(); 1896 }); 1897 if (ThinLTO.ModuleMap.empty()) 1898 return Error::success(); 1899 1900 if (ThinLTO.ModulesToCompile && ThinLTO.ModulesToCompile->empty()) { 1901 llvm::errs() << "warning: [ThinLTO] No module compiled\n"; 1902 return Error::success(); 1903 } 1904 1905 if (Conf.CombinedIndexHook && 1906 !Conf.CombinedIndexHook(ThinLTO.CombinedIndex, GUIDPreservedSymbols)) 1907 return Error::success(); 1908 1909 // Collect for each module the list of function it defines (GUID -> 1910 // Summary). 1911 DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries( 1912 ThinLTO.ModuleMap.size()); 1913 ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( 1914 ModuleToDefinedGVSummaries); 1915 // Create entries for any modules that didn't have any GV summaries 1916 // (either they didn't have any GVs to start with, or we suppressed 1917 // generation of the summaries because they e.g. had inline assembly 1918 // uses that couldn't be promoted/renamed on export). This is so 1919 // InProcessThinBackend::start can still launch a backend thread, which 1920 // is passed the map of summaries for the module, without any special 1921 // handling for this case. 1922 for (auto &Mod : ThinLTO.ModuleMap) 1923 if (!ModuleToDefinedGVSummaries.count(Mod.first)) 1924 ModuleToDefinedGVSummaries.try_emplace(Mod.first); 1925 1926 FunctionImporter::ImportListsTy ImportLists(ThinLTO.ModuleMap.size()); 1927 DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists( 1928 ThinLTO.ModuleMap.size()); 1929 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR; 1930 1931 if (DumpThinCGSCCs) 1932 ThinLTO.CombinedIndex.dumpSCCs(outs()); 1933 1934 std::set<GlobalValue::GUID> ExportedGUIDs; 1935 1936 bool WholeProgramVisibilityEnabledInLTO = 1937 Conf.HasWholeProgramVisibility && 1938 // If validation is enabled, upgrade visibility only when all vtables 1939 // have typeinfos. 1940 (!Conf.ValidateAllVtablesHaveTypeInfos || Conf.AllVtablesHaveTypeInfos); 1941 if (hasWholeProgramVisibility(WholeProgramVisibilityEnabledInLTO)) 1942 ThinLTO.CombinedIndex.setWithWholeProgramVisibility(); 1943 1944 // If we're validating, get the vtable symbols that should not be 1945 // upgraded because they correspond to typeIDs outside of index-based 1946 // WPD info. 1947 DenseSet<GlobalValue::GUID> VisibleToRegularObjSymbols; 1948 if (WholeProgramVisibilityEnabledInLTO && 1949 Conf.ValidateAllVtablesHaveTypeInfos) { 1950 // This returns true when the name is local or not defined. Locals are 1951 // expected to be handled separately. 1952 auto IsVisibleToRegularObj = [&](StringRef name) { 1953 auto It = GlobalResolutions->find(name); 1954 return (It == GlobalResolutions->end() || 1955 It->second.VisibleOutsideSummary || !It->second.Prevailing); 1956 }; 1957 1958 getVisibleToRegularObjVtableGUIDs(ThinLTO.CombinedIndex, 1959 VisibleToRegularObjSymbols, 1960 IsVisibleToRegularObj); 1961 } 1962 1963 // If allowed, upgrade public vcall visibility to linkage unit visibility in 1964 // the summaries before whole program devirtualization below. 1965 updateVCallVisibilityInIndex( 1966 ThinLTO.CombinedIndex, WholeProgramVisibilityEnabledInLTO, 1967 DynamicExportSymbols, VisibleToRegularObjSymbols); 1968 1969 // Perform index-based WPD. This will return immediately if there are 1970 // no index entries in the typeIdMetadata map (e.g. if we are instead 1971 // performing IR-based WPD in hybrid regular/thin LTO mode). 1972 std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap; 1973 runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, 1974 LocalWPDTargetsMap); 1975 1976 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { 1977 return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); 1978 }; 1979 if (EnableMemProfContextDisambiguation) { 1980 MemProfContextDisambiguation ContextDisambiguation; 1981 ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); 1982 } 1983 1984 // Figure out which symbols need to be internalized. This also needs to happen 1985 // at -O0 because summary-based DCE is implemented using internalization, and 1986 // we must apply DCE consistently with the full LTO module in order to avoid 1987 // undefined references during the final link. 1988 for (auto &Res : *GlobalResolutions) { 1989 // If the symbol does not have external references or it is not prevailing, 1990 // then not need to mark it as exported from a ThinLTO partition. 1991 if (Res.second.Partition != GlobalResolution::External || 1992 !Res.second.isPrevailingIRSymbol()) 1993 continue; 1994 auto GUID = GlobalValue::getGUIDAssumingExternalLinkage( 1995 GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); 1996 // Mark exported unless index-based analysis determined it to be dead. 1997 if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) 1998 ExportedGUIDs.insert(GUID); 1999 } 2000 2001 // Reset the GlobalResolutions to deallocate the associated memory, as there 2002 // are no further accesses. We specifically want to do this before computing 2003 // cross module importing, which adds to peak memory via the computed import 2004 // and export lists. 2005 releaseGlobalResolutionsMemory(); 2006 2007 if (Conf.OptLevel > 0) 2008 ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, 2009 isPrevailing, ImportLists, ExportLists); 2010 2011 // Any functions referenced by the jump table in the regular LTO object must 2012 // be exported. 2013 auto &Defs = ThinLTO.CombinedIndex.cfiFunctionDefs(); 2014 ExportedGUIDs.insert(Defs.guid_begin(), Defs.guid_end()); 2015 auto &Decls = ThinLTO.CombinedIndex.cfiFunctionDecls(); 2016 ExportedGUIDs.insert(Decls.guid_begin(), Decls.guid_end()); 2017 2018 auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) { 2019 const auto &ExportList = ExportLists.find(ModuleIdentifier); 2020 return (ExportList != ExportLists.end() && ExportList->second.count(VI)) || 2021 ExportedGUIDs.count(VI.getGUID()); 2022 }; 2023 2024 // Update local devirtualized targets that were exported by cross-module 2025 // importing or by other devirtualizations marked in the ExportedGUIDs set. 2026 updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, 2027 LocalWPDTargetsMap); 2028 2029 thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, 2030 isPrevailing); 2031 2032 auto recordNewLinkage = [&](StringRef ModuleIdentifier, 2033 GlobalValue::GUID GUID, 2034 GlobalValue::LinkageTypes NewLinkage) { 2035 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; 2036 }; 2037 thinLTOResolvePrevailingInIndex(Conf, ThinLTO.CombinedIndex, isPrevailing, 2038 recordNewLinkage, GUIDPreservedSymbols); 2039 2040 thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex, isPrevailing); 2041 2042 generateParamAccessSummary(ThinLTO.CombinedIndex); 2043 2044 if (llvm::timeTraceProfilerEnabled()) 2045 llvm::timeTraceProfilerEnd(); 2046 2047 TimeTraceScopeExit.release(); 2048 2049 auto &ModuleMap = 2050 ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap; 2051 2052 auto RunBackends = [&](ThinBackendProc *BackendProcess) -> Error { 2053 auto ProcessOneModule = [&](int I) -> Error { 2054 auto &Mod = *(ModuleMap.begin() + I); 2055 // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for 2056 // combined module and parallel code generation partitions. 2057 return BackendProcess->start( 2058 RegularLTO.ParallelCodeGenParallelismLevel + I, Mod.second, 2059 ImportLists[Mod.first], ExportLists[Mod.first], 2060 ResolvedODR[Mod.first], ThinLTO.ModuleMap); 2061 }; 2062 2063 BackendProcess->setup(ModuleMap.size(), 2064 RegularLTO.ParallelCodeGenParallelismLevel, 2065 RegularLTO.CombinedModule->getTargetTriple()); 2066 2067 if (BackendProcess->getThreadCount() == 1 || 2068 BackendProcess->isSensitiveToInputOrder()) { 2069 // Process the modules in the order they were provided on the 2070 // command-line. It is important for this codepath to be used for 2071 // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists 2072 // ThinLTO objects in the same order as the inputs, which otherwise would 2073 // affect the final link order. 2074 for (int I = 0, E = ModuleMap.size(); I != E; ++I) 2075 if (Error E = ProcessOneModule(I)) 2076 return E; 2077 } else { 2078 // When executing in parallel, process largest bitsize modules first to 2079 // improve parallelism, and avoid starving the thread pool near the end. 2080 // This saves about 15 sec on a 36-core machine while link `clang.exe` 2081 // (out of 100 sec). 2082 std::vector<BitcodeModule *> ModulesVec; 2083 ModulesVec.reserve(ModuleMap.size()); 2084 for (auto &Mod : ModuleMap) 2085 ModulesVec.push_back(&Mod.second); 2086 for (int I : generateModulesOrdering(ModulesVec)) 2087 if (Error E = ProcessOneModule(I)) 2088 return E; 2089 } 2090 return BackendProcess->wait(); 2091 }; 2092 2093 if (!CodeGenDataThinLTOTwoRounds) { 2094 std::unique_ptr<ThinBackendProc> BackendProc = 2095 ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, 2096 AddStream, Cache); 2097 return RunBackends(BackendProc.get()); 2098 } 2099 2100 // Perform two rounds of code generation for ThinLTO: 2101 // 1. First round: Perform optimization and code generation, outputting to 2102 // temporary scratch objects. 2103 // 2. Merge code generation data extracted from the temporary scratch objects. 2104 // 3. Second round: Execute code generation again using the merged data. 2105 LLVM_DEBUG(dbgs() << "[TwoRounds] Initializing ThinLTO two-codegen rounds\n"); 2106 2107 unsigned MaxTasks = getMaxTasks(); 2108 auto Parallelism = ThinLTO.Backend.getParallelism(); 2109 // Set up two additional streams and caches for storing temporary scratch 2110 // objects and optimized IRs, using the same cache directory as the original. 2111 cgdata::StreamCacheData CG(MaxTasks, Cache, "CG"), IR(MaxTasks, Cache, "IR"); 2112 2113 // First round: Execute optimization and code generation, outputting to 2114 // temporary scratch objects. Serialize the optimized IRs before initiating 2115 // code generation. 2116 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the first round of codegen\n"); 2117 auto FirstRoundLTO = std::make_unique<FirstRoundThinBackend>( 2118 Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, 2119 CG.AddStream, CG.Cache, IR.AddStream, IR.Cache); 2120 if (Error E = RunBackends(FirstRoundLTO.get())) 2121 return E; 2122 2123 LLVM_DEBUG(dbgs() << "[TwoRounds] Merging codegen data\n"); 2124 auto CombinedHashOrErr = cgdata::mergeCodeGenData(*CG.getResult()); 2125 if (Error E = CombinedHashOrErr.takeError()) 2126 return E; 2127 auto CombinedHash = *CombinedHashOrErr; 2128 LLVM_DEBUG(dbgs() << "[TwoRounds] CGData hash: " << CombinedHash << "\n"); 2129 2130 // Second round: Read the optimized IRs and execute code generation using the 2131 // merged data. 2132 LLVM_DEBUG(dbgs() << "[TwoRounds] Running the second round of codegen\n"); 2133 auto SecondRoundLTO = std::make_unique<SecondRoundThinBackend>( 2134 Conf, ThinLTO.CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, 2135 AddStream, Cache, IR.getResult(), CombinedHash); 2136 return RunBackends(SecondRoundLTO.get()); 2137 } 2138 2139 Expected<std::unique_ptr<ToolOutputFile>> lto::setupLLVMOptimizationRemarks( 2140 LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, 2141 StringRef RemarksFormat, bool RemarksWithHotness, 2142 std::optional<uint64_t> RemarksHotnessThreshold, int Count) { 2143 std::string Filename = std::string(RemarksFilename); 2144 // For ThinLTO, file.opt.<format> becomes 2145 // file.opt.<format>.thin.<num>.<format>. 2146 if (!Filename.empty() && Count != -1) 2147 Filename = 2148 (Twine(Filename) + ".thin." + llvm::utostr(Count) + "." + RemarksFormat) 2149 .str(); 2150 2151 auto ResultOrErr = llvm::setupLLVMOptimizationRemarks( 2152 Context, Filename, RemarksPasses, RemarksFormat, RemarksWithHotness, 2153 RemarksHotnessThreshold); 2154 if (Error E = ResultOrErr.takeError()) 2155 return std::move(E); 2156 2157 if (*ResultOrErr) 2158 (*ResultOrErr)->keep(); 2159 2160 return ResultOrErr; 2161 } 2162 2163 Expected<std::unique_ptr<ToolOutputFile>> 2164 lto::setupStatsFile(StringRef StatsFilename) { 2165 // Setup output file to emit statistics. 2166 if (StatsFilename.empty()) 2167 return nullptr; 2168 2169 llvm::EnableStatistics(false); 2170 std::error_code EC; 2171 auto StatsFile = 2172 std::make_unique<ToolOutputFile>(StatsFilename, EC, sys::fs::OF_None); 2173 if (EC) 2174 return errorCodeToError(EC); 2175 2176 StatsFile->keep(); 2177 return std::move(StatsFile); 2178 } 2179 2180 // Compute the ordering we will process the inputs: the rough heuristic here 2181 // is to sort them per size so that the largest module get schedule as soon as 2182 // possible. This is purely a compile-time optimization. 2183 std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) { 2184 auto Seq = llvm::seq<int>(0, R.size()); 2185 std::vector<int> ModulesOrdering(Seq.begin(), Seq.end()); 2186 llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) { 2187 auto LSize = R[LeftIndex]->getBuffer().size(); 2188 auto RSize = R[RightIndex]->getBuffer().size(); 2189 return LSize > RSize; 2190 }); 2191 return ModulesOrdering; 2192 } 2193 2194 namespace { 2195 /// This out-of-process backend does not perform code generation when invoked 2196 /// for each task. Instead, it generates the necessary information (e.g., the 2197 /// summary index shard, import list, etc.) to enable code generation to be 2198 /// performed externally, similar to WriteIndexesThinBackend. The backend's 2199 /// `wait` function then invokes an external distributor process to carry out 2200 /// the backend compilations. 2201 class OutOfProcessThinBackend : public CGThinBackend { 2202 using SString = SmallString<128>; 2203 2204 BumpPtrAllocator Alloc; 2205 StringSaver Saver{Alloc}; 2206 2207 SString LinkerOutputFile; 2208 2209 SString DistributorPath; 2210 ArrayRef<StringRef> DistributorArgs; 2211 2212 SString RemoteCompiler; 2213 ArrayRef<StringRef> RemoteCompilerArgs; 2214 2215 bool SaveTemps; 2216 2217 SmallVector<StringRef, 0> CodegenOptions; 2218 DenseSet<StringRef> CommonInputs; 2219 2220 // Information specific to individual backend compilation job. 2221 struct Job { 2222 unsigned Task; 2223 StringRef ModuleID; 2224 StringRef NativeObjectPath; 2225 StringRef SummaryIndexPath; 2226 ImportsFilesContainer ImportsFiles; 2227 }; 2228 // The set of backend compilations jobs. 2229 SmallVector<Job> Jobs; 2230 2231 // A unique string to identify the current link. 2232 SmallString<8> UID; 2233 2234 // The offset to the first ThinLTO task. 2235 unsigned ThinLTOTaskOffset; 2236 2237 // The target triple to supply for backend compilations. 2238 llvm::Triple Triple; 2239 2240 public: 2241 OutOfProcessThinBackend( 2242 const Config &Conf, ModuleSummaryIndex &CombinedIndex, 2243 ThreadPoolStrategy ThinLTOParallelism, 2244 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 2245 AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, 2246 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, 2247 StringRef LinkerOutputFile, StringRef Distributor, 2248 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler, 2249 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) 2250 : CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries, 2251 AddStream, OnWrite, ShouldEmitIndexFiles, 2252 ShouldEmitImportsFiles, ThinLTOParallelism), 2253 LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor), 2254 DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler), 2255 RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {} 2256 2257 virtual void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, 2258 llvm::Triple Triple) override { 2259 UID = itostr(sys::Process::getProcessId()); 2260 Jobs.resize((size_t)ThinLTONumTasks); 2261 this->ThinLTOTaskOffset = ThinLTOTaskOffset; 2262 this->Triple = Triple; 2263 } 2264 2265 Error start( 2266 unsigned Task, BitcodeModule BM, 2267 const FunctionImporter::ImportMapTy &ImportList, 2268 const FunctionImporter::ExportSetTy &ExportList, 2269 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR, 2270 MapVector<StringRef, BitcodeModule> &ModuleMap) override { 2271 2272 StringRef ModulePath = BM.getModuleIdentifier(); 2273 2274 SString ObjFilePath = sys::path::parent_path(LinkerOutputFile); 2275 sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." + 2276 itostr(Task) + "." + UID + ".native.o"); 2277 2278 Job &J = Jobs[Task - ThinLTOTaskOffset]; 2279 J = { 2280 Task, 2281 ModulePath, 2282 Saver.save(ObjFilePath.str()), 2283 Saver.save(ObjFilePath.str() + ".thinlto.bc"), 2284 {} // Filled in by emitFiles below. 2285 }; 2286 2287 assert(ModuleToDefinedGVSummaries.count(ModulePath)); 2288 2289 // The BackendThreadPool is only used here to write the sharded index files 2290 // (similar to WriteIndexesThinBackend). 2291 BackendThreadPool.async( 2292 [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) { 2293 if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(), 2294 J.SummaryIndexPath, J.ImportsFiles)) { 2295 std::unique_lock<std::mutex> L(ErrMu); 2296 if (Err) 2297 Err = joinErrors(std::move(*Err), std::move(E)); 2298 else 2299 Err = std::move(E); 2300 } 2301 }, 2302 std::ref(J), std::ref(ImportList)); 2303 2304 return Error::success(); 2305 } 2306 2307 // Derive a set of Clang options that will be shared/common for all DTLTO 2308 // backend compilations. We are intentionally minimal here as these options 2309 // must remain synchronized with the behavior of Clang. DTLTO does not support 2310 // all the features available with in-process LTO. More features are expected 2311 // to be added over time. Users can specify Clang options directly if a 2312 // feature is not supported. Note that explicitly specified options that imply 2313 // additional input or output file dependencies must be communicated to the 2314 // distribution system, potentially by setting extra options on the 2315 // distributor program. 2316 void buildCommonRemoteCompilerOptions() { 2317 const lto::Config &C = Conf; 2318 auto &Ops = CodegenOptions; 2319 2320 Ops.push_back(Saver.save("-O" + Twine(C.OptLevel))); 2321 2322 if (C.Options.EmitAddrsig) 2323 Ops.push_back("-faddrsig"); 2324 if (C.Options.FunctionSections) 2325 Ops.push_back("-ffunction-sections"); 2326 if (C.Options.DataSections) 2327 Ops.push_back("-fdata-sections"); 2328 2329 if (C.RelocModel == Reloc::PIC_) 2330 // Clang doesn't have -fpic for all triples. 2331 if (!Triple.isOSBinFormatCOFF()) 2332 Ops.push_back("-fpic"); 2333 2334 // Turn on/off warnings about profile cfg mismatch (default on) 2335 // --lto-pgo-warn-mismatch. 2336 if (!C.PGOWarnMismatch) { 2337 Ops.push_back("-mllvm"); 2338 Ops.push_back("-no-pgo-warn-mismatch"); 2339 } 2340 2341 // Enable sample-based profile guided optimizations. 2342 // Sample profile file path --lto-sample-profile=<value>. 2343 if (!C.SampleProfile.empty()) { 2344 Ops.push_back( 2345 Saver.save("-fprofile-sample-use=" + Twine(C.SampleProfile))); 2346 CommonInputs.insert(C.SampleProfile); 2347 } 2348 2349 // We don't know which of options will be used by Clang. 2350 Ops.push_back("-Wno-unused-command-line-argument"); 2351 2352 // Forward any supplied options. 2353 if (!RemoteCompilerArgs.empty()) 2354 for (auto &a : RemoteCompilerArgs) 2355 Ops.push_back(a); 2356 } 2357 2358 // Generates a JSON file describing the backend compilations, for the 2359 // distributor. 2360 bool emitDistributorJson(StringRef DistributorJson) { 2361 using json::Array; 2362 std::error_code EC; 2363 raw_fd_ostream OS(DistributorJson, EC); 2364 if (EC) 2365 return false; 2366 2367 json::OStream JOS(OS); 2368 JOS.object([&]() { 2369 // Information common to all jobs. 2370 JOS.attributeObject("common", [&]() { 2371 JOS.attribute("linker_output", LinkerOutputFile); 2372 2373 JOS.attributeArray("args", [&]() { 2374 JOS.value(RemoteCompiler); 2375 2376 JOS.value("-c"); 2377 2378 JOS.value(Saver.save("--target=" + Triple.str())); 2379 2380 for (const auto &A : CodegenOptions) 2381 JOS.value(A); 2382 }); 2383 2384 JOS.attribute("inputs", Array(CommonInputs)); 2385 }); 2386 2387 // Per-compilation-job information. 2388 JOS.attributeArray("jobs", [&]() { 2389 for (const auto &J : Jobs) { 2390 assert(J.Task != 0); 2391 2392 SmallVector<StringRef, 2> Inputs; 2393 SmallVector<StringRef, 1> Outputs; 2394 2395 JOS.object([&]() { 2396 JOS.attributeArray("args", [&]() { 2397 JOS.value(J.ModuleID); 2398 Inputs.push_back(J.ModuleID); 2399 2400 JOS.value( 2401 Saver.save("-fthinlto-index=" + Twine(J.SummaryIndexPath))); 2402 Inputs.push_back(J.SummaryIndexPath); 2403 2404 JOS.value("-o"); 2405 JOS.value(J.NativeObjectPath); 2406 Outputs.push_back(J.NativeObjectPath); 2407 }); 2408 2409 // Add the bitcode files from which imports will be made. These do 2410 // not explicitly appear on the backend compilation command lines 2411 // but are recorded in the summary index shards. 2412 llvm::append_range(Inputs, J.ImportsFiles); 2413 JOS.attribute("inputs", Array(Inputs)); 2414 2415 JOS.attribute("outputs", Array(Outputs)); 2416 }); 2417 } 2418 }); 2419 }); 2420 2421 return true; 2422 } 2423 2424 void removeFile(StringRef FileName) { 2425 std::error_code EC = sys::fs::remove(FileName, true); 2426 if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory)) 2427 errs() << "warning: could not remove the file '" << FileName 2428 << "': " << EC.message() << "\n"; 2429 } 2430 2431 Error wait() override { 2432 // Wait for the information on the required backend compilations to be 2433 // gathered. 2434 BackendThreadPool.wait(); 2435 if (Err) 2436 return std::move(*Err); 2437 2438 auto CleanPerJobFiles = llvm::make_scope_exit([&] { 2439 if (!SaveTemps) 2440 for (auto &Job : Jobs) { 2441 removeFile(Job.NativeObjectPath); 2442 if (!ShouldEmitIndexFiles) 2443 removeFile(Job.SummaryIndexPath); 2444 } 2445 }); 2446 2447 const StringRef BCError = "DTLTO backend compilation: "; 2448 2449 buildCommonRemoteCompilerOptions(); 2450 2451 SString JsonFile = sys::path::parent_path(LinkerOutputFile); 2452 sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID + 2453 ".dist-file.json"); 2454 if (!emitDistributorJson(JsonFile)) 2455 return make_error<StringError>( 2456 BCError + "failed to generate distributor JSON script: " + JsonFile, 2457 inconvertibleErrorCode()); 2458 auto CleanJson = llvm::make_scope_exit([&] { 2459 if (!SaveTemps) 2460 removeFile(JsonFile); 2461 }); 2462 2463 SmallVector<StringRef, 3> Args = {DistributorPath}; 2464 llvm::append_range(Args, DistributorArgs); 2465 Args.push_back(JsonFile); 2466 std::string ErrMsg; 2467 if (sys::ExecuteAndWait(Args[0], Args, 2468 /*Env=*/std::nullopt, /*Redirects=*/{}, 2469 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { 2470 return make_error<StringError>( 2471 BCError + "distributor execution failed" + 2472 (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), 2473 inconvertibleErrorCode()); 2474 } 2475 2476 for (auto &Job : Jobs) { 2477 // Load the native object from a file into a memory buffer 2478 // and store its contents in the output buffer. 2479 auto ObjFileMbOrErr = 2480 MemoryBuffer::getFile(Job.NativeObjectPath, /*IsText=*/false, 2481 /*RequiresNullTerminator=*/false); 2482 if (std::error_code EC = ObjFileMbOrErr.getError()) 2483 return make_error<StringError>( 2484 BCError + "cannot open native object file: " + 2485 Job.NativeObjectPath + ": " + EC.message(), 2486 inconvertibleErrorCode()); 2487 auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); 2488 if (Error Err = StreamOrErr.takeError()) 2489 report_fatal_error(std::move(Err)); 2490 auto &Stream = *StreamOrErr->get(); 2491 *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer(); 2492 if (Error Err = Stream.commit()) 2493 report_fatal_error(std::move(Err)); 2494 } 2495 2496 return Error::success(); 2497 } 2498 }; 2499 } // end anonymous namespace 2500 2501 ThinBackend lto::createOutOfProcessThinBackend( 2502 ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite, 2503 bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, 2504 StringRef LinkerOutputFile, StringRef Distributor, 2505 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler, 2506 ArrayRef<StringRef> RemoteCompilerArgs, bool SaveTemps) { 2507 auto Func = 2508 [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, 2509 const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries, 2510 AddStreamFn AddStream, FileCache /*Cache*/) { 2511 return std::make_unique<OutOfProcessThinBackend>( 2512 Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, 2513 AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, 2514 LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler, 2515 RemoteCompilerArgs, SaveTemps); 2516 }; 2517 return ThinBackend(Func, Parallelism); 2518 } 2519