1 //===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the LLVM module linker. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "LinkDiagnosticInfo.h" 14 #include "llvm-c/Linker.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/IR/Comdat.h" 17 #include "llvm/IR/DiagnosticPrinter.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/LLVMContext.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Linker/Linker.h" 22 #include "llvm/Support/Error.h" 23 using namespace llvm; 24 25 namespace { 26 27 /// This is an implementation class for the LinkModules function, which is the 28 /// entrypoint for this file. 29 class ModuleLinker { 30 IRMover &Mover; 31 std::unique_ptr<Module> SrcM; 32 33 SetVector<GlobalValue *> ValuesToLink; 34 35 /// For symbol clashes, prefer those from Src. 36 unsigned Flags; 37 38 /// List of global value names that should be internalized. 39 StringSet<> Internalize; 40 41 /// Function that will perform the actual internalization. The reason for a 42 /// callback is that the linker cannot call internalizeModule without 43 /// creating a circular dependency between IPO and the linker. 44 std::function<void(Module &, const StringSet<> &)> InternalizeCallback; 45 46 /// Used as the callback for lazy linking. 47 /// The mover has just hit GV and we have to decide if it, and other members 48 /// of the same comdat, should be linked. Every member to be linked is passed 49 /// to Add. 50 void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add); 51 52 bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; } 53 bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; } 54 55 bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, 56 const GlobalValue &Src); 57 58 /// Should we have mover and linker error diag info? 59 bool emitError(const Twine &Message) { 60 SrcM->getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message)); 61 return true; 62 } 63 64 bool getComdatLeader(Module &M, StringRef ComdatName, 65 const GlobalVariable *&GVar); 66 bool computeResultingSelectionKind(StringRef ComdatName, 67 Comdat::SelectionKind Src, 68 Comdat::SelectionKind Dst, 69 Comdat::SelectionKind &Result, 70 bool &LinkFromSrc); 71 std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>> 72 ComdatsChosen; 73 bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK, 74 bool &LinkFromSrc); 75 // Keep track of the lazy linked global members of each comdat in source. 76 DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers; 77 78 /// Given a global in the source module, return the global in the 79 /// destination module that is being linked to, if any. 80 GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { 81 Module &DstM = Mover.getModule(); 82 // If the source has no name it can't link. If it has local linkage, 83 // there is no name match-up going on. 84 if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage())) 85 return nullptr; 86 87 // Otherwise see if we have a match in the destination module's symtab. 88 GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName()); 89 if (!DGV) 90 return nullptr; 91 92 // If we found a global with the same name in the dest module, but it has 93 // internal linkage, we are really not doing any linkage here. 94 if (DGV->hasLocalLinkage()) 95 return nullptr; 96 97 // Otherwise, we do in fact link to the destination global. 98 return DGV; 99 } 100 101 /// Drop GV if it is a member of a comdat that we are dropping. 102 /// This can happen with COFF's largest selection kind. 103 void dropReplacedComdat(GlobalValue &GV, 104 const DenseSet<const Comdat *> &ReplacedDstComdats); 105 106 bool linkIfNeeded(GlobalValue &GV); 107 108 public: 109 ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags, 110 std::function<void(Module &, const StringSet<> &)> 111 InternalizeCallback = {}) 112 : Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags), 113 InternalizeCallback(std::move(InternalizeCallback)) {} 114 115 bool run(); 116 }; 117 } 118 119 static GlobalValue::VisibilityTypes 120 getMinVisibility(GlobalValue::VisibilityTypes A, 121 GlobalValue::VisibilityTypes B) { 122 if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility) 123 return GlobalValue::HiddenVisibility; 124 if (A == GlobalValue::ProtectedVisibility || 125 B == GlobalValue::ProtectedVisibility) 126 return GlobalValue::ProtectedVisibility; 127 return GlobalValue::DefaultVisibility; 128 } 129 130 bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName, 131 const GlobalVariable *&GVar) { 132 const GlobalValue *GVal = M.getNamedValue(ComdatName); 133 if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) { 134 GVal = GA->getBaseObject(); 135 if (!GVal) 136 // We cannot resolve the size of the aliasee yet. 137 return emitError("Linking COMDATs named '" + ComdatName + 138 "': COMDAT key involves incomputable alias size."); 139 } 140 141 GVar = dyn_cast_or_null<GlobalVariable>(GVal); 142 if (!GVar) 143 return emitError( 144 "Linking COMDATs named '" + ComdatName + 145 "': GlobalVariable required for data dependent selection!"); 146 147 return false; 148 } 149 150 bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, 151 Comdat::SelectionKind Src, 152 Comdat::SelectionKind Dst, 153 Comdat::SelectionKind &Result, 154 bool &LinkFromSrc) { 155 Module &DstM = Mover.getModule(); 156 // The ability to mix Comdat::SelectionKind::Any with 157 // Comdat::SelectionKind::Largest is a behavior that comes from COFF. 158 bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any || 159 Dst == Comdat::SelectionKind::Largest; 160 bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any || 161 Src == Comdat::SelectionKind::Largest; 162 if (DstAnyOrLargest && SrcAnyOrLargest) { 163 if (Dst == Comdat::SelectionKind::Largest || 164 Src == Comdat::SelectionKind::Largest) 165 Result = Comdat::SelectionKind::Largest; 166 else 167 Result = Comdat::SelectionKind::Any; 168 } else if (Src == Dst) { 169 Result = Dst; 170 } else { 171 return emitError("Linking COMDATs named '" + ComdatName + 172 "': invalid selection kinds!"); 173 } 174 175 switch (Result) { 176 case Comdat::SelectionKind::Any: 177 // Go with Dst. 178 LinkFromSrc = false; 179 break; 180 case Comdat::SelectionKind::NoDuplicates: 181 return emitError("Linking COMDATs named '" + ComdatName + 182 "': noduplicates has been violated!"); 183 case Comdat::SelectionKind::ExactMatch: 184 case Comdat::SelectionKind::Largest: 185 case Comdat::SelectionKind::SameSize: { 186 const GlobalVariable *DstGV; 187 const GlobalVariable *SrcGV; 188 if (getComdatLeader(DstM, ComdatName, DstGV) || 189 getComdatLeader(*SrcM, ComdatName, SrcGV)) 190 return true; 191 192 const DataLayout &DstDL = DstM.getDataLayout(); 193 const DataLayout &SrcDL = SrcM->getDataLayout(); 194 uint64_t DstSize = DstDL.getTypeAllocSize(DstGV->getValueType()); 195 uint64_t SrcSize = SrcDL.getTypeAllocSize(SrcGV->getValueType()); 196 if (Result == Comdat::SelectionKind::ExactMatch) { 197 if (SrcGV->getInitializer() != DstGV->getInitializer()) 198 return emitError("Linking COMDATs named '" + ComdatName + 199 "': ExactMatch violated!"); 200 LinkFromSrc = false; 201 } else if (Result == Comdat::SelectionKind::Largest) { 202 LinkFromSrc = SrcSize > DstSize; 203 } else if (Result == Comdat::SelectionKind::SameSize) { 204 if (SrcSize != DstSize) 205 return emitError("Linking COMDATs named '" + ComdatName + 206 "': SameSize violated!"); 207 LinkFromSrc = false; 208 } else { 209 llvm_unreachable("unknown selection kind"); 210 } 211 break; 212 } 213 } 214 215 return false; 216 } 217 218 bool ModuleLinker::getComdatResult(const Comdat *SrcC, 219 Comdat::SelectionKind &Result, 220 bool &LinkFromSrc) { 221 Module &DstM = Mover.getModule(); 222 Comdat::SelectionKind SSK = SrcC->getSelectionKind(); 223 StringRef ComdatName = SrcC->getName(); 224 Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); 225 Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName); 226 227 if (DstCI == ComdatSymTab.end()) { 228 // Use the comdat if it is only available in one of the modules. 229 LinkFromSrc = true; 230 Result = SSK; 231 return false; 232 } 233 234 const Comdat *DstC = &DstCI->second; 235 Comdat::SelectionKind DSK = DstC->getSelectionKind(); 236 return computeResultingSelectionKind(ComdatName, SSK, DSK, Result, 237 LinkFromSrc); 238 } 239 240 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, 241 const GlobalValue &Dest, 242 const GlobalValue &Src) { 243 244 // Should we unconditionally use the Src? 245 if (shouldOverrideFromSrc()) { 246 LinkFromSrc = true; 247 return false; 248 } 249 250 // We always have to add Src if it has appending linkage. 251 if (Src.hasAppendingLinkage()) { 252 LinkFromSrc = true; 253 return false; 254 } 255 256 bool SrcIsDeclaration = Src.isDeclarationForLinker(); 257 bool DestIsDeclaration = Dest.isDeclarationForLinker(); 258 259 if (SrcIsDeclaration) { 260 // If Src is external or if both Src & Dest are external.. Just link the 261 // external globals, we aren't adding anything. 262 if (Src.hasDLLImportStorageClass()) { 263 // If one of GVs is marked as DLLImport, result should be dllimport'ed. 264 LinkFromSrc = DestIsDeclaration; 265 return false; 266 } 267 // If the Dest is weak, use the source linkage. 268 if (Dest.hasExternalWeakLinkage()) { 269 LinkFromSrc = true; 270 return false; 271 } 272 // Link an available_externally over a declaration. 273 LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration(); 274 return false; 275 } 276 277 if (DestIsDeclaration) { 278 // If Dest is external but Src is not: 279 LinkFromSrc = true; 280 return false; 281 } 282 283 if (Src.hasCommonLinkage()) { 284 if (Dest.hasLinkOnceLinkage() || Dest.hasWeakLinkage()) { 285 LinkFromSrc = true; 286 return false; 287 } 288 289 if (!Dest.hasCommonLinkage()) { 290 LinkFromSrc = false; 291 return false; 292 } 293 294 const DataLayout &DL = Dest.getParent()->getDataLayout(); 295 uint64_t DestSize = DL.getTypeAllocSize(Dest.getValueType()); 296 uint64_t SrcSize = DL.getTypeAllocSize(Src.getValueType()); 297 LinkFromSrc = SrcSize > DestSize; 298 return false; 299 } 300 301 if (Src.isWeakForLinker()) { 302 assert(!Dest.hasExternalWeakLinkage()); 303 assert(!Dest.hasAvailableExternallyLinkage()); 304 305 if (Dest.hasLinkOnceLinkage() && Src.hasWeakLinkage()) { 306 LinkFromSrc = true; 307 return false; 308 } 309 310 LinkFromSrc = false; 311 return false; 312 } 313 314 if (Dest.isWeakForLinker()) { 315 assert(Src.hasExternalLinkage()); 316 LinkFromSrc = true; 317 return false; 318 } 319 320 assert(!Src.hasExternalWeakLinkage()); 321 assert(!Dest.hasExternalWeakLinkage()); 322 assert(Dest.hasExternalLinkage() && Src.hasExternalLinkage() && 323 "Unexpected linkage type!"); 324 return emitError("Linking globals named '" + Src.getName() + 325 "': symbol multiply defined!"); 326 } 327 328 bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { 329 GlobalValue *DGV = getLinkedToGlobal(&GV); 330 331 if (shouldLinkOnlyNeeded()) { 332 // Always import variables with appending linkage. 333 if (!GV.hasAppendingLinkage()) { 334 // Don't import globals unless they are referenced by the destination 335 // module. 336 if (!DGV) 337 return false; 338 // Don't import globals that are already defined in the destination module 339 if (!DGV->isDeclaration()) 340 return false; 341 } 342 } 343 344 if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) { 345 auto *DGVar = dyn_cast<GlobalVariable>(DGV); 346 auto *SGVar = dyn_cast<GlobalVariable>(&GV); 347 if (DGVar && SGVar) { 348 if (DGVar->isDeclaration() && SGVar->isDeclaration() && 349 (!DGVar->isConstant() || !SGVar->isConstant())) { 350 DGVar->setConstant(false); 351 SGVar->setConstant(false); 352 } 353 if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) { 354 MaybeAlign Align( 355 std::max(DGVar->getAlignment(), SGVar->getAlignment())); 356 SGVar->setAlignment(Align); 357 DGVar->setAlignment(Align); 358 } 359 } 360 361 GlobalValue::VisibilityTypes Visibility = 362 getMinVisibility(DGV->getVisibility(), GV.getVisibility()); 363 DGV->setVisibility(Visibility); 364 GV.setVisibility(Visibility); 365 366 GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::getMinUnnamedAddr( 367 DGV->getUnnamedAddr(), GV.getUnnamedAddr()); 368 DGV->setUnnamedAddr(UnnamedAddr); 369 GV.setUnnamedAddr(UnnamedAddr); 370 } 371 372 if (!DGV && !shouldOverrideFromSrc() && 373 (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() || 374 GV.hasAvailableExternallyLinkage())) 375 return false; 376 377 if (GV.isDeclaration()) 378 return false; 379 380 if (const Comdat *SC = GV.getComdat()) { 381 bool LinkFromSrc; 382 Comdat::SelectionKind SK; 383 std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; 384 if (!LinkFromSrc) 385 return false; 386 } 387 388 bool LinkFromSrc = true; 389 if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV)) 390 return true; 391 if (LinkFromSrc) 392 ValuesToLink.insert(&GV); 393 return false; 394 } 395 396 void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) { 397 // Add these to the internalize list 398 if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() && 399 !shouldLinkOnlyNeeded()) 400 return; 401 402 if (InternalizeCallback) 403 Internalize.insert(GV.getName()); 404 Add(GV); 405 406 const Comdat *SC = GV.getComdat(); 407 if (!SC) 408 return; 409 for (GlobalValue *GV2 : LazyComdatMembers[SC]) { 410 GlobalValue *DGV = getLinkedToGlobal(GV2); 411 bool LinkFromSrc = true; 412 if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2)) 413 return; 414 if (!LinkFromSrc) 415 continue; 416 if (InternalizeCallback) 417 Internalize.insert(GV2->getName()); 418 Add(*GV2); 419 } 420 } 421 422 void ModuleLinker::dropReplacedComdat( 423 GlobalValue &GV, const DenseSet<const Comdat *> &ReplacedDstComdats) { 424 Comdat *C = GV.getComdat(); 425 if (!C) 426 return; 427 if (!ReplacedDstComdats.count(C)) 428 return; 429 if (GV.use_empty()) { 430 GV.eraseFromParent(); 431 return; 432 } 433 434 if (auto *F = dyn_cast<Function>(&GV)) { 435 F->deleteBody(); 436 } else if (auto *Var = dyn_cast<GlobalVariable>(&GV)) { 437 Var->setInitializer(nullptr); 438 } else { 439 auto &Alias = cast<GlobalAlias>(GV); 440 Module &M = *Alias.getParent(); 441 PointerType &Ty = *cast<PointerType>(Alias.getType()); 442 GlobalValue *Declaration; 443 if (auto *FTy = dyn_cast<FunctionType>(Alias.getValueType())) { 444 Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, "", &M); 445 } else { 446 Declaration = 447 new GlobalVariable(M, Ty.getElementType(), /*isConstant*/ false, 448 GlobalValue::ExternalLinkage, 449 /*Initializer*/ nullptr); 450 } 451 Declaration->takeName(&Alias); 452 Alias.replaceAllUsesWith(Declaration); 453 Alias.eraseFromParent(); 454 } 455 } 456 457 bool ModuleLinker::run() { 458 Module &DstM = Mover.getModule(); 459 DenseSet<const Comdat *> ReplacedDstComdats; 460 461 for (const auto &SMEC : SrcM->getComdatSymbolTable()) { 462 const Comdat &C = SMEC.getValue(); 463 if (ComdatsChosen.count(&C)) 464 continue; 465 Comdat::SelectionKind SK; 466 bool LinkFromSrc; 467 if (getComdatResult(&C, SK, LinkFromSrc)) 468 return true; 469 ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); 470 471 if (!LinkFromSrc) 472 continue; 473 474 Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); 475 Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(C.getName()); 476 if (DstCI == ComdatSymTab.end()) 477 continue; 478 479 // The source comdat is replacing the dest one. 480 const Comdat *DstC = &DstCI->second; 481 ReplacedDstComdats.insert(DstC); 482 } 483 484 // Alias have to go first, since we are not able to find their comdats 485 // otherwise. 486 for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) { 487 GlobalAlias &GV = *I++; 488 dropReplacedComdat(GV, ReplacedDstComdats); 489 } 490 491 for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) { 492 GlobalVariable &GV = *I++; 493 dropReplacedComdat(GV, ReplacedDstComdats); 494 } 495 496 for (auto I = DstM.begin(), E = DstM.end(); I != E;) { 497 Function &GV = *I++; 498 dropReplacedComdat(GV, ReplacedDstComdats); 499 } 500 501 for (GlobalVariable &GV : SrcM->globals()) 502 if (GV.hasLinkOnceLinkage()) 503 if (const Comdat *SC = GV.getComdat()) 504 LazyComdatMembers[SC].push_back(&GV); 505 506 for (Function &SF : *SrcM) 507 if (SF.hasLinkOnceLinkage()) 508 if (const Comdat *SC = SF.getComdat()) 509 LazyComdatMembers[SC].push_back(&SF); 510 511 for (GlobalAlias &GA : SrcM->aliases()) 512 if (GA.hasLinkOnceLinkage()) 513 if (const Comdat *SC = GA.getComdat()) 514 LazyComdatMembers[SC].push_back(&GA); 515 516 // Insert all of the globals in src into the DstM module... without linking 517 // initializers (which could refer to functions not yet mapped over). 518 for (GlobalVariable &GV : SrcM->globals()) 519 if (linkIfNeeded(GV)) 520 return true; 521 522 for (Function &SF : *SrcM) 523 if (linkIfNeeded(SF)) 524 return true; 525 526 for (GlobalAlias &GA : SrcM->aliases()) 527 if (linkIfNeeded(GA)) 528 return true; 529 530 for (unsigned I = 0; I < ValuesToLink.size(); ++I) { 531 GlobalValue *GV = ValuesToLink[I]; 532 const Comdat *SC = GV->getComdat(); 533 if (!SC) 534 continue; 535 for (GlobalValue *GV2 : LazyComdatMembers[SC]) { 536 GlobalValue *DGV = getLinkedToGlobal(GV2); 537 bool LinkFromSrc = true; 538 if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, *GV2)) 539 return true; 540 if (LinkFromSrc) 541 ValuesToLink.insert(GV2); 542 } 543 } 544 545 if (InternalizeCallback) { 546 for (GlobalValue *GV : ValuesToLink) 547 Internalize.insert(GV->getName()); 548 } 549 550 // FIXME: Propagate Errors through to the caller instead of emitting 551 // diagnostics. 552 bool HasErrors = false; 553 if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(), 554 [this](GlobalValue &GV, IRMover::ValueAdder Add) { 555 addLazyFor(GV, Add); 556 }, 557 /* IsPerformingImport */ false)) { 558 handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { 559 DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message())); 560 HasErrors = true; 561 }); 562 } 563 if (HasErrors) 564 return true; 565 566 if (InternalizeCallback) 567 InternalizeCallback(DstM, Internalize); 568 569 return false; 570 } 571 572 Linker::Linker(Module &M) : Mover(M) {} 573 574 bool Linker::linkInModule( 575 std::unique_ptr<Module> Src, unsigned Flags, 576 std::function<void(Module &, const StringSet<> &)> InternalizeCallback) { 577 ModuleLinker ModLinker(Mover, std::move(Src), Flags, 578 std::move(InternalizeCallback)); 579 return ModLinker.run(); 580 } 581 582 //===----------------------------------------------------------------------===// 583 // LinkModules entrypoint. 584 //===----------------------------------------------------------------------===// 585 586 /// This function links two modules together, with the resulting Dest module 587 /// modified to be the composite of the two input modules. If an error occurs, 588 /// true is returned and ErrorMsg (if not null) is set to indicate the problem. 589 /// Upon failure, the Dest module could be in a modified state, and shouldn't be 590 /// relied on to be consistent. 591 bool Linker::linkModules( 592 Module &Dest, std::unique_ptr<Module> Src, unsigned Flags, 593 std::function<void(Module &, const StringSet<> &)> InternalizeCallback) { 594 Linker L(Dest); 595 return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback)); 596 } 597 598 //===----------------------------------------------------------------------===// 599 // C API. 600 //===----------------------------------------------------------------------===// 601 602 LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) { 603 Module *D = unwrap(Dest); 604 std::unique_ptr<Module> M(unwrap(Src)); 605 return Linker::linkModules(*D, std::move(M)); 606 } 607