1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass transforms simple global variables that never have their address 10 // taken. If obviously true, it marks read/write globals as constant, deletes 11 // variables only stored to, etc. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/GlobalOpt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/BlockFrequencyInfo.h" 24 #include "llvm/Analysis/ConstantFolding.h" 25 #include "llvm/Analysis/MemoryBuiltins.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/IR/Attributes.h" 31 #include "llvm/IR/BasicBlock.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/Constant.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DataLayout.h" 36 #include "llvm/IR/DebugInfoMetadata.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalAlias.h" 41 #include "llvm/IR/GlobalValue.h" 42 #include "llvm/IR/GlobalVariable.h" 43 #include "llvm/IR/IRBuilder.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/Operator.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Use.h" 52 #include "llvm/IR/User.h" 53 #include "llvm/IR/Value.h" 54 #include "llvm/IR/ValueHandle.h" 55 #include "llvm/Support/AtomicOrdering.h" 56 #include "llvm/Support/Casting.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/ErrorHandling.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include "llvm/Transforms/IPO.h" 62 #include "llvm/Transforms/Utils/CtorUtils.h" 63 #include "llvm/Transforms/Utils/Evaluator.h" 64 #include "llvm/Transforms/Utils/GlobalStatus.h" 65 #include "llvm/Transforms/Utils/Local.h" 66 #include <cassert> 67 #include <cstdint> 68 #include <optional> 69 #include <utility> 70 #include <vector> 71 72 using namespace llvm; 73 74 #define DEBUG_TYPE "globalopt" 75 76 STATISTIC(NumMarked , "Number of globals marked constant"); 77 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); 78 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 79 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 80 STATISTIC(NumDeleted , "Number of globals deleted"); 81 STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 82 STATISTIC(NumLocalized , "Number of globals localized"); 83 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 84 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 85 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 86 STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 87 STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 88 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 89 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); 90 STATISTIC(NumAtExitRemoved, "Number of atexit handlers removed"); 91 STATISTIC(NumInternalFunc, "Number of internal functions"); 92 STATISTIC(NumColdCC, "Number of functions marked coldcc"); 93 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); 94 STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); 95 96 static cl::opt<bool> 97 OptimizeNonFMVCallers("optimize-non-fmv-callers", 98 cl::desc("Statically resolve calls to versioned " 99 "functions from non-versioned callers."), 100 cl::init(true), cl::Hidden); 101 102 static cl::opt<bool> 103 EnableColdCCStressTest("enable-coldcc-stress-test", 104 cl::desc("Enable stress test of coldcc by adding " 105 "calling conv to all internal functions."), 106 cl::init(false), cl::Hidden); 107 108 static cl::opt<int> ColdCCRelFreq( 109 "coldcc-rel-freq", cl::Hidden, cl::init(2), 110 cl::desc( 111 "Maximum block frequency, expressed as a percentage of caller's " 112 "entry frequency, for a call site to be considered cold for enabling " 113 "coldcc")); 114 115 /// Is this global variable possibly used by a leak checker as a root? If so, 116 /// we might not really want to eliminate the stores to it. 117 static bool isLeakCheckerRoot(GlobalVariable *GV) { 118 // A global variable is a root if it is a pointer, or could plausibly contain 119 // a pointer. There are two challenges; one is that we could have a struct 120 // the has an inner member which is a pointer. We recurse through the type to 121 // detect these (up to a point). The other is that we may actually be a union 122 // of a pointer and another type, and so our LLVM type is an integer which 123 // gets converted into a pointer, or our type is an [i8 x #] with a pointer 124 // potentially contained here. 125 126 if (GV->hasPrivateLinkage()) 127 return false; 128 129 SmallVector<Type *, 4> Types; 130 Types.push_back(GV->getValueType()); 131 132 unsigned Limit = 20; 133 do { 134 Type *Ty = Types.pop_back_val(); 135 switch (Ty->getTypeID()) { 136 default: break; 137 case Type::PointerTyID: 138 return true; 139 case Type::FixedVectorTyID: 140 case Type::ScalableVectorTyID: 141 if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) 142 return true; 143 break; 144 case Type::ArrayTyID: 145 Types.push_back(cast<ArrayType>(Ty)->getElementType()); 146 break; 147 case Type::StructTyID: { 148 StructType *STy = cast<StructType>(Ty); 149 if (STy->isOpaque()) return true; 150 for (Type *InnerTy : STy->elements()) { 151 if (isa<PointerType>(InnerTy)) return true; 152 if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || 153 isa<VectorType>(InnerTy)) 154 Types.push_back(InnerTy); 155 } 156 break; 157 } 158 } 159 if (--Limit == 0) return true; 160 } while (!Types.empty()); 161 return false; 162 } 163 164 /// Given a value that is stored to a global but never read, determine whether 165 /// it's safe to remove the store and the chain of computation that feeds the 166 /// store. 167 static bool IsSafeComputationToRemove( 168 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 169 do { 170 if (isa<Constant>(V)) 171 return true; 172 if (!V->hasOneUse()) 173 return false; 174 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || 175 isa<GlobalValue>(V)) 176 return false; 177 if (isAllocationFn(V, GetTLI)) 178 return true; 179 180 Instruction *I = cast<Instruction>(V); 181 if (I->mayHaveSideEffects()) 182 return false; 183 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 184 if (!GEP->hasAllConstantIndices()) 185 return false; 186 } else if (I->getNumOperands() != 1) { 187 return false; 188 } 189 190 V = I->getOperand(0); 191 } while (true); 192 } 193 194 /// This GV is a pointer root. Loop over all users of the global and clean up 195 /// any that obviously don't assign the global a value that isn't dynamically 196 /// allocated. 197 static bool 198 CleanupPointerRootUsers(GlobalVariable *GV, 199 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 200 // A brief explanation of leak checkers. The goal is to find bugs where 201 // pointers are forgotten, causing an accumulating growth in memory 202 // usage over time. The common strategy for leak checkers is to explicitly 203 // allow the memory pointed to by globals at exit. This is popular because it 204 // also solves another problem where the main thread of a C++ program may shut 205 // down before other threads that are still expecting to use those globals. To 206 // handle that case, we expect the program may create a singleton and never 207 // destroy it. 208 209 bool Changed = false; 210 211 // If Dead[n].first is the only use of a malloc result, we can delete its 212 // chain of computation and the store to the global in Dead[n].second. 213 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; 214 215 SmallVector<User *> Worklist(GV->users()); 216 // Constants can't be pointers to dynamically allocated memory. 217 while (!Worklist.empty()) { 218 User *U = Worklist.pop_back_val(); 219 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 220 Value *V = SI->getValueOperand(); 221 if (isa<Constant>(V)) { 222 Changed = true; 223 SI->eraseFromParent(); 224 } else if (Instruction *I = dyn_cast<Instruction>(V)) { 225 if (I->hasOneUse()) 226 Dead.push_back(std::make_pair(I, SI)); 227 } 228 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { 229 if (isa<Constant>(MSI->getValue())) { 230 Changed = true; 231 MSI->eraseFromParent(); 232 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { 233 if (I->hasOneUse()) 234 Dead.push_back(std::make_pair(I, MSI)); 235 } 236 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { 237 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); 238 if (MemSrc && MemSrc->isConstant()) { 239 Changed = true; 240 MTI->eraseFromParent(); 241 } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) { 242 if (I->hasOneUse()) 243 Dead.push_back(std::make_pair(I, MTI)); 244 } 245 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 246 if (isa<GEPOperator>(CE)) 247 append_range(Worklist, CE->users()); 248 } 249 } 250 251 for (const auto &[Inst, Store] : Dead) { 252 if (IsSafeComputationToRemove(Inst, GetTLI)) { 253 Store->eraseFromParent(); 254 Instruction *I = Inst; 255 do { 256 if (isAllocationFn(I, GetTLI)) 257 break; 258 Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); 259 if (!J) 260 break; 261 I->eraseFromParent(); 262 I = J; 263 } while (true); 264 I->eraseFromParent(); 265 Changed = true; 266 } 267 } 268 269 GV->removeDeadConstantUsers(); 270 return Changed; 271 } 272 273 /// We just marked GV constant. Loop over all users of the global, cleaning up 274 /// the obvious ones. This is largely just a quick scan over the use list to 275 /// clean up the easy and obvious cruft. This returns true if it made a change. 276 static bool CleanupConstantGlobalUsers(GlobalVariable *GV, 277 const DataLayout &DL) { 278 Constant *Init = GV->getInitializer(); 279 SmallVector<User *, 8> WorkList(GV->users()); 280 SmallPtrSet<User *, 8> Visited; 281 bool Changed = false; 282 283 SmallVector<WeakTrackingVH> MaybeDeadInsts; 284 auto EraseFromParent = [&](Instruction *I) { 285 for (Value *Op : I->operands()) 286 if (auto *OpI = dyn_cast<Instruction>(Op)) 287 MaybeDeadInsts.push_back(OpI); 288 I->eraseFromParent(); 289 Changed = true; 290 }; 291 while (!WorkList.empty()) { 292 User *U = WorkList.pop_back_val(); 293 if (!Visited.insert(U).second) 294 continue; 295 296 if (auto *BO = dyn_cast<BitCastOperator>(U)) 297 append_range(WorkList, BO->users()); 298 if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U)) 299 append_range(WorkList, ASC->users()); 300 else if (auto *GEP = dyn_cast<GEPOperator>(U)) 301 append_range(WorkList, GEP->users()); 302 else if (auto *LI = dyn_cast<LoadInst>(U)) { 303 // A load from a uniform value is always the same, regardless of any 304 // applied offset. 305 Type *Ty = LI->getType(); 306 if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty, DL)) { 307 LI->replaceAllUsesWith(Res); 308 EraseFromParent(LI); 309 continue; 310 } 311 312 Value *PtrOp = LI->getPointerOperand(); 313 APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); 314 PtrOp = PtrOp->stripAndAccumulateConstantOffsets( 315 DL, Offset, /* AllowNonInbounds */ true); 316 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(PtrOp)) { 317 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) 318 PtrOp = II->getArgOperand(0); 319 } 320 if (PtrOp == GV) { 321 if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) { 322 LI->replaceAllUsesWith(Value); 323 EraseFromParent(LI); 324 } 325 } 326 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 327 // Store must be unreachable or storing Init into the global. 328 EraseFromParent(SI); 329 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 330 if (getUnderlyingObject(MI->getRawDest()) == GV) 331 EraseFromParent(MI); 332 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 333 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) 334 append_range(WorkList, II->users()); 335 } 336 } 337 338 Changed |= 339 RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts); 340 GV->removeDeadConstantUsers(); 341 return Changed; 342 } 343 344 /// Part of the global at a specific offset, which is only accessed through 345 /// loads and stores with the given type. 346 struct GlobalPart { 347 Type *Ty; 348 Constant *Initializer = nullptr; 349 bool IsLoaded = false; 350 bool IsStored = false; 351 }; 352 353 /// Look at all uses of the global and determine which (offset, type) pairs it 354 /// can be split into. 355 static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts, 356 GlobalVariable *GV, const DataLayout &DL) { 357 SmallVector<Use *, 16> Worklist; 358 SmallPtrSet<Use *, 16> Visited; 359 auto AppendUses = [&](Value *V) { 360 for (Use &U : V->uses()) 361 if (Visited.insert(&U).second) 362 Worklist.push_back(&U); 363 }; 364 AppendUses(GV); 365 while (!Worklist.empty()) { 366 Use *U = Worklist.pop_back_val(); 367 User *V = U->getUser(); 368 369 auto *GEP = dyn_cast<GEPOperator>(V); 370 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || 371 (GEP && GEP->hasAllConstantIndices())) { 372 AppendUses(V); 373 continue; 374 } 375 376 if (Value *Ptr = getLoadStorePointerOperand(V)) { 377 // This is storing the global address into somewhere, not storing into 378 // the global. 379 if (isa<StoreInst>(V) && U->getOperandNo() == 0) 380 return false; 381 382 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 383 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 384 /* AllowNonInbounds */ true); 385 if (Ptr != GV || Offset.getActiveBits() >= 64) 386 return false; 387 388 // TODO: We currently require that all accesses at a given offset must 389 // use the same type. This could be relaxed. 390 Type *Ty = getLoadStoreType(V); 391 const auto &[It, Inserted] = 392 Parts.try_emplace(Offset.getZExtValue(), GlobalPart{Ty}); 393 if (Ty != It->second.Ty) 394 return false; 395 396 if (Inserted) { 397 It->second.Initializer = 398 ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); 399 if (!It->second.Initializer) { 400 LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of " 401 << *GV << " with type " << *Ty << " at offset " 402 << Offset.getZExtValue()); 403 return false; 404 } 405 } 406 407 // Scalable types not currently supported. 408 if (Ty->isScalableTy()) 409 return false; 410 411 auto IsStored = [](Value *V, Constant *Initializer) { 412 auto *SI = dyn_cast<StoreInst>(V); 413 if (!SI) 414 return false; 415 416 Constant *StoredConst = dyn_cast<Constant>(SI->getOperand(0)); 417 if (!StoredConst) 418 return true; 419 420 // Don't consider stores that only write the initializer value. 421 return Initializer != StoredConst; 422 }; 423 424 It->second.IsLoaded |= isa<LoadInst>(V); 425 It->second.IsStored |= IsStored(V, It->second.Initializer); 426 continue; 427 } 428 429 // Ignore dead constant users. 430 if (auto *C = dyn_cast<Constant>(V)) { 431 if (!isSafeToDestroyConstant(C)) 432 return false; 433 continue; 434 } 435 436 // Unknown user. 437 return false; 438 } 439 440 return true; 441 } 442 443 /// Copy over the debug info for a variable to its SRA replacements. 444 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, 445 uint64_t FragmentOffsetInBits, 446 uint64_t FragmentSizeInBits, 447 uint64_t VarSize) { 448 SmallVector<DIGlobalVariableExpression *, 1> GVs; 449 GV->getDebugInfo(GVs); 450 for (auto *GVE : GVs) { 451 DIVariable *Var = GVE->getVariable(); 452 DIExpression *Expr = GVE->getExpression(); 453 int64_t CurVarOffsetInBytes = 0; 454 uint64_t CurVarOffsetInBits = 0; 455 uint64_t FragmentEndInBits = FragmentOffsetInBits + FragmentSizeInBits; 456 457 // Calculate the offset (Bytes), Continue if unknown. 458 if (!Expr->extractIfOffset(CurVarOffsetInBytes)) 459 continue; 460 461 // Ignore negative offset. 462 if (CurVarOffsetInBytes < 0) 463 continue; 464 465 // Convert offset to bits. 466 CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes; 467 468 // Current var starts after the fragment, ignore. 469 if (CurVarOffsetInBits >= FragmentEndInBits) 470 continue; 471 472 uint64_t CurVarSize = Var->getType()->getSizeInBits(); 473 uint64_t CurVarEndInBits = CurVarOffsetInBits + CurVarSize; 474 // Current variable ends before start of fragment, ignore. 475 if (CurVarSize != 0 && /* CurVarSize is known */ 476 CurVarEndInBits <= FragmentOffsetInBits) 477 continue; 478 479 // Current variable fits in (not greater than) the fragment, 480 // does not need fragment expression. 481 if (CurVarSize != 0 && /* CurVarSize is known */ 482 CurVarOffsetInBits >= FragmentOffsetInBits && 483 CurVarEndInBits <= FragmentEndInBits) { 484 uint64_t CurVarOffsetInFragment = 485 (CurVarOffsetInBits - FragmentOffsetInBits) / 8; 486 if (CurVarOffsetInFragment != 0) 487 Expr = DIExpression::get(Expr->getContext(), {dwarf::DW_OP_plus_uconst, 488 CurVarOffsetInFragment}); 489 else 490 Expr = DIExpression::get(Expr->getContext(), {}); 491 auto *NGVE = 492 DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); 493 NGV->addDebugInfo(NGVE); 494 continue; 495 } 496 // Current variable does not fit in single fragment, 497 // emit a fragment expression. 498 if (FragmentSizeInBits < VarSize) { 499 if (CurVarOffsetInBits > FragmentOffsetInBits) 500 continue; 501 uint64_t CurVarFragmentOffsetInBits = 502 FragmentOffsetInBits - CurVarOffsetInBits; 503 uint64_t CurVarFragmentSizeInBits = FragmentSizeInBits; 504 if (CurVarSize != 0 && CurVarEndInBits < FragmentEndInBits) 505 CurVarFragmentSizeInBits -= (FragmentEndInBits - CurVarEndInBits); 506 if (CurVarOffsetInBits) 507 Expr = DIExpression::get(Expr->getContext(), {}); 508 if (auto E = DIExpression::createFragmentExpression( 509 Expr, CurVarFragmentOffsetInBits, CurVarFragmentSizeInBits)) 510 Expr = *E; 511 else 512 continue; 513 } 514 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); 515 NGV->addDebugInfo(NGVE); 516 } 517 } 518 519 /// Perform scalar replacement of aggregates on the specified global variable. 520 /// This opens the door for other optimizations by exposing the behavior of the 521 /// program in a more fine-grained way. We have determined that this 522 /// transformation is safe already. We return the first global variable we 523 /// insert so that the caller can reprocess it. 524 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { 525 assert(GV->hasLocalLinkage()); 526 527 // Collect types to split into. 528 DenseMap<uint64_t, GlobalPart> Parts; 529 if (!collectSRATypes(Parts, GV, DL) || Parts.empty()) 530 return nullptr; 531 532 // Make sure we don't SRA back to the same type. 533 if (Parts.size() == 1 && Parts.begin()->second.Ty == GV->getValueType()) 534 return nullptr; 535 536 // Don't perform SRA if we would have to split into many globals. Ignore 537 // parts that are either only loaded or only stored, because we expect them 538 // to be optimized away. 539 unsigned NumParts = count_if(Parts, [](const auto &Pair) { 540 return Pair.second.IsLoaded && Pair.second.IsStored; 541 }); 542 if (NumParts > 16) 543 return nullptr; 544 545 // Sort by offset. 546 SmallVector<std::tuple<uint64_t, Type *, Constant *>, 16> TypesVector; 547 for (const auto &Pair : Parts) { 548 TypesVector.push_back( 549 {Pair.first, Pair.second.Ty, Pair.second.Initializer}); 550 } 551 sort(TypesVector, llvm::less_first()); 552 553 // Check that the types are non-overlapping. 554 uint64_t Offset = 0; 555 for (const auto &[OffsetForTy, Ty, _] : TypesVector) { 556 // Overlaps with previous type. 557 if (OffsetForTy < Offset) 558 return nullptr; 559 560 Offset = OffsetForTy + DL.getTypeAllocSize(Ty); 561 } 562 563 // Some accesses go beyond the end of the global, don't bother. 564 if (Offset > DL.getTypeAllocSize(GV->getValueType())) 565 return nullptr; 566 567 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); 568 569 // Get the alignment of the global, either explicit or target-specific. 570 Align StartAlignment = 571 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); 572 uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType()); 573 574 // Create replacement globals. 575 DenseMap<uint64_t, GlobalVariable *> NewGlobals; 576 unsigned NameSuffix = 0; 577 for (auto &[OffsetForTy, Ty, Initializer] : TypesVector) { 578 GlobalVariable *NGV = new GlobalVariable( 579 *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage, 580 Initializer, GV->getName() + "." + Twine(NameSuffix++), GV, 581 GV->getThreadLocalMode(), GV->getAddressSpace()); 582 // Start out by copying attributes from the original, including alignment. 583 NGV->copyAttributesFrom(GV); 584 NewGlobals.insert({OffsetForTy, NGV}); 585 586 // Calculate the known alignment of the field. If the original aggregate 587 // had 256 byte alignment for example, then the element at a given offset 588 // may also have a known alignment, and something might depend on that: 589 // propagate info to each field. 590 Align NewAlign = commonAlignment(StartAlignment, OffsetForTy); 591 NGV->setAlignment(NewAlign); 592 593 // Copy over the debug info for the variable. 594 transferSRADebugInfo(GV, NGV, OffsetForTy * 8, 595 DL.getTypeAllocSizeInBits(Ty), VarSize); 596 } 597 598 // Replace uses of the original global with uses of the new global. 599 SmallVector<Value *, 16> Worklist; 600 SmallPtrSet<Value *, 16> Visited; 601 SmallVector<WeakTrackingVH, 16> DeadInsts; 602 auto AppendUsers = [&](Value *V) { 603 for (User *U : V->users()) 604 if (Visited.insert(U).second) 605 Worklist.push_back(U); 606 }; 607 AppendUsers(GV); 608 while (!Worklist.empty()) { 609 Value *V = Worklist.pop_back_val(); 610 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || 611 isa<GEPOperator>(V)) { 612 AppendUsers(V); 613 if (isa<Instruction>(V)) 614 DeadInsts.push_back(V); 615 continue; 616 } 617 618 if (Value *Ptr = getLoadStorePointerOperand(V)) { 619 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 620 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 621 /* AllowNonInbounds */ true); 622 assert(Ptr == GV && "Load/store must be from/to global"); 623 GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()]; 624 assert(NGV && "Must have replacement global for this offset"); 625 626 // Update the pointer operand and recalculate alignment. 627 Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V)); 628 Align NewAlign = 629 getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V)); 630 631 if (auto *LI = dyn_cast<LoadInst>(V)) { 632 LI->setOperand(0, NGV); 633 LI->setAlignment(NewAlign); 634 } else { 635 auto *SI = cast<StoreInst>(V); 636 SI->setOperand(1, NGV); 637 SI->setAlignment(NewAlign); 638 } 639 continue; 640 } 641 642 assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) && 643 "Other users can only be dead constants"); 644 } 645 646 // Delete old instructions and global. 647 RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); 648 GV->removeDeadConstantUsers(); 649 GV->eraseFromParent(); 650 ++NumSRA; 651 652 assert(NewGlobals.size() > 0); 653 return NewGlobals.begin()->second; 654 } 655 656 /// Return true if all users of the specified value will trap if the value is 657 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid 658 /// reprocessing them. 659 static bool AllUsesOfValueWillTrapIfNull(const Value *V, 660 SmallPtrSetImpl<const PHINode*> &PHIs) { 661 for (const User *U : V->users()) { 662 if (const Instruction *I = dyn_cast<Instruction>(U)) { 663 // If null pointer is considered valid, then all uses are non-trapping. 664 // Non address-space 0 globals have already been pruned by the caller. 665 if (NullPointerIsDefined(I->getFunction())) 666 return false; 667 } 668 if (isa<LoadInst>(U)) { 669 // Will trap. 670 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 671 if (SI->getOperand(0) == V) { 672 return false; // Storing the value. 673 } 674 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 675 if (CI->getCalledOperand() != V) { 676 return false; // Not calling the ptr 677 } 678 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 679 if (II->getCalledOperand() != V) { 680 return false; // Not calling the ptr 681 } 682 } else if (const AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(U)) { 683 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) 684 return false; 685 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 686 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 687 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 688 // If we've already seen this phi node, ignore it, it has already been 689 // checked. 690 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 691 return false; 692 } else if (isa<ICmpInst>(U) && 693 !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) && 694 isa<LoadInst>(U->getOperand(0)) && 695 isa<ConstantPointerNull>(U->getOperand(1))) { 696 assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0)) 697 ->getPointerOperand() 698 ->stripPointerCasts()) && 699 "Should be GlobalVariable"); 700 // This and only this kind of non-signed ICmpInst is to be replaced with 701 // the comparing of the value of the created global init bool later in 702 // optimizeGlobalAddressOfAllocation for the global variable. 703 } else { 704 return false; 705 } 706 } 707 return true; 708 } 709 710 /// Return true if all uses of any loads from GV will trap if the loaded value 711 /// is null. Note that this also permits comparisons of the loaded value 712 /// against null, as a special case. 713 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 714 SmallVector<const Value *, 4> Worklist; 715 Worklist.push_back(GV); 716 while (!Worklist.empty()) { 717 const Value *P = Worklist.pop_back_val(); 718 for (const auto *U : P->users()) { 719 if (auto *LI = dyn_cast<LoadInst>(U)) { 720 if (!LI->isSimple()) 721 return false; 722 SmallPtrSet<const PHINode *, 8> PHIs; 723 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 724 return false; 725 } else if (auto *SI = dyn_cast<StoreInst>(U)) { 726 if (!SI->isSimple()) 727 return false; 728 // Ignore stores to the global. 729 if (SI->getPointerOperand() != P) 730 return false; 731 } else if (auto *CE = dyn_cast<ConstantExpr>(U)) { 732 if (CE->stripPointerCasts() != GV) 733 return false; 734 // Check further the ConstantExpr. 735 Worklist.push_back(CE); 736 } else { 737 // We don't know or understand this user, bail out. 738 return false; 739 } 740 } 741 } 742 743 return true; 744 } 745 746 /// Get all the loads/store uses for global variable \p GV. 747 static void allUsesOfLoadAndStores(GlobalVariable *GV, 748 SmallVector<Value *, 4> &Uses) { 749 SmallVector<Value *, 4> Worklist; 750 Worklist.push_back(GV); 751 while (!Worklist.empty()) { 752 auto *P = Worklist.pop_back_val(); 753 for (auto *U : P->users()) { 754 if (auto *CE = dyn_cast<ConstantExpr>(U)) { 755 Worklist.push_back(CE); 756 continue; 757 } 758 759 assert((isa<LoadInst>(U) || isa<StoreInst>(U)) && 760 "Expect only load or store instructions"); 761 Uses.push_back(U); 762 } 763 } 764 } 765 766 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 767 bool Changed = false; 768 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { 769 Instruction *I = cast<Instruction>(*UI++); 770 // Uses are non-trapping if null pointer is considered valid. 771 // Non address-space 0 globals are already pruned by the caller. 772 if (NullPointerIsDefined(I->getFunction())) 773 return false; 774 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 775 LI->setOperand(0, NewV); 776 Changed = true; 777 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 778 if (SI->getOperand(1) == V) { 779 SI->setOperand(1, NewV); 780 Changed = true; 781 } 782 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 783 CallBase *CB = cast<CallBase>(I); 784 if (CB->getCalledOperand() == V) { 785 // Calling through the pointer! Turn into a direct call, but be careful 786 // that the pointer is not also being passed as an argument. 787 CB->setCalledOperand(NewV); 788 Changed = true; 789 bool PassedAsArg = false; 790 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) 791 if (CB->getArgOperand(i) == V) { 792 PassedAsArg = true; 793 CB->setArgOperand(i, NewV); 794 } 795 796 if (PassedAsArg) { 797 // Being passed as an argument also. Be careful to not invalidate UI! 798 UI = V->user_begin(); 799 } 800 } 801 } else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(I)) { 802 Changed |= OptimizeAwayTrappingUsesOfValue( 803 CI, ConstantExpr::getAddrSpaceCast(NewV, CI->getType())); 804 if (CI->use_empty()) { 805 Changed = true; 806 CI->eraseFromParent(); 807 } 808 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 809 // Should handle GEP here. 810 SmallVector<Constant*, 8> Idxs; 811 Idxs.reserve(GEPI->getNumOperands()-1); 812 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 813 i != e; ++i) 814 if (Constant *C = dyn_cast<Constant>(*i)) 815 Idxs.push_back(C); 816 else 817 break; 818 if (Idxs.size() == GEPI->getNumOperands()-1) 819 Changed |= OptimizeAwayTrappingUsesOfValue( 820 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(), 821 NewV, Idxs)); 822 if (GEPI->use_empty()) { 823 Changed = true; 824 GEPI->eraseFromParent(); 825 } 826 } 827 } 828 829 return Changed; 830 } 831 832 /// The specified global has only one non-null value stored into it. If there 833 /// are uses of the loaded value that would trap if the loaded value is 834 /// dynamically null, then we know that they cannot be reachable with a null 835 /// optimize away the load. 836 static bool OptimizeAwayTrappingUsesOfLoads( 837 GlobalVariable *GV, Constant *LV, const DataLayout &DL, 838 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 839 bool Changed = false; 840 841 // Keep track of whether we are able to remove all the uses of the global 842 // other than the store that defines it. 843 bool AllNonStoreUsesGone = true; 844 845 // Replace all uses of loads with uses of uses of the stored value. 846 for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) { 847 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 848 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 849 // If we were able to delete all uses of the loads 850 if (LI->use_empty()) { 851 LI->eraseFromParent(); 852 Changed = true; 853 } else { 854 AllNonStoreUsesGone = false; 855 } 856 } else if (isa<StoreInst>(GlobalUser)) { 857 // Ignore the store that stores "LV" to the global. 858 assert(GlobalUser->getOperand(1) == GV && 859 "Must be storing *to* the global"); 860 } else { 861 AllNonStoreUsesGone = false; 862 863 // If we get here we could have other crazy uses that are transitively 864 // loaded. 865 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 866 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || 867 isa<BitCastInst>(GlobalUser) || 868 isa<GetElementPtrInst>(GlobalUser) || 869 isa<AddrSpaceCastInst>(GlobalUser)) && 870 "Only expect load and stores!"); 871 } 872 } 873 874 if (Changed) { 875 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV 876 << "\n"); 877 ++NumGlobUses; 878 } 879 880 // If we nuked all of the loads, then none of the stores are needed either, 881 // nor is the global. 882 if (AllNonStoreUsesGone) { 883 if (isLeakCheckerRoot(GV)) { 884 Changed |= CleanupPointerRootUsers(GV, GetTLI); 885 } else { 886 Changed = true; 887 CleanupConstantGlobalUsers(GV, DL); 888 } 889 if (GV->use_empty()) { 890 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 891 Changed = true; 892 GV->eraseFromParent(); 893 ++NumDeleted; 894 } 895 } 896 return Changed; 897 } 898 899 /// Walk the use list of V, constant folding all of the instructions that are 900 /// foldable. 901 static void ConstantPropUsersOf(Value *V, const DataLayout &DL, 902 TargetLibraryInfo *TLI) { 903 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) 904 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 905 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { 906 I->replaceAllUsesWith(NewC); 907 908 // Advance UI to the next non-I use to avoid invalidating it! 909 // Instructions could multiply use V. 910 while (UI != E && *UI == I) 911 ++UI; 912 if (isInstructionTriviallyDead(I, TLI)) 913 I->eraseFromParent(); 914 } 915 } 916 917 /// This function takes the specified global variable, and transforms the 918 /// program as if it always contained the result of the specified malloc. 919 /// Because it is always the result of the specified malloc, there is no reason 920 /// to actually DO the malloc. Instead, turn the malloc into a global, and any 921 /// loads of GV as uses of the new global. 922 static GlobalVariable * 923 OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, 924 uint64_t AllocSize, Constant *InitVal, 925 const DataLayout &DL, 926 TargetLibraryInfo *TLI) { 927 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI 928 << '\n'); 929 930 // Create global of type [AllocSize x i8]. 931 Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()), 932 AllocSize); 933 934 // Create the new global variable. The contents of the allocated memory is 935 // undefined initially, so initialize with an undef value. 936 GlobalVariable *NewGV = new GlobalVariable( 937 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, 938 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, 939 GV->getThreadLocalMode()); 940 941 // Initialize the global at the point of the original call. Note that this 942 // is a different point from the initialization referred to below for the 943 // nullability handling. Sublety: We have not proven the original global was 944 // only initialized once. As such, we can not fold this into the initializer 945 // of the new global as may need to re-init the storage multiple times. 946 if (!isa<UndefValue>(InitVal)) { 947 IRBuilder<> Builder(CI->getNextNode()); 948 // TODO: Use alignment above if align!=1 949 Builder.CreateMemSet(NewGV, InitVal, AllocSize, std::nullopt); 950 } 951 952 // Update users of the allocation to use the new global instead. 953 CI->replaceAllUsesWith(NewGV); 954 955 // If there is a comparison against null, we will insert a global bool to 956 // keep track of whether the global was initialized yet or not. 957 GlobalVariable *InitBool = new GlobalVariable( 958 Type::getInt1Ty(GV->getContext()), false, GlobalValue::InternalLinkage, 959 ConstantInt::getFalse(GV->getContext()), GV->getName() + ".init", 960 GV->getThreadLocalMode(), GV->getAddressSpace()); 961 bool InitBoolUsed = false; 962 963 // Loop over all instruction uses of GV, processing them in turn. 964 SmallVector<Value *, 4> Guses; 965 allUsesOfLoadAndStores(GV, Guses); 966 for (auto *U : Guses) { 967 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 968 // The global is initialized when the store to it occurs. If the stored 969 // value is null value, the global bool is set to false, otherwise true. 970 auto *NewSI = new StoreInst( 971 ConstantInt::getBool(GV->getContext(), !isa<ConstantPointerNull>( 972 SI->getValueOperand())), 973 InitBool, false, Align(1), SI->getOrdering(), SI->getSyncScopeID(), 974 SI->getIterator()); 975 NewSI->setDebugLoc(SI->getDebugLoc()); 976 SI->eraseFromParent(); 977 continue; 978 } 979 980 LoadInst *LI = cast<LoadInst>(U); 981 while (!LI->use_empty()) { 982 Use &LoadUse = *LI->use_begin(); 983 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); 984 if (!ICI) { 985 LoadUse.set(NewGV); 986 continue; 987 } 988 989 // Replace the cmp X, 0 with a use of the bool value. 990 Value *LV = new LoadInst(InitBool->getValueType(), InitBool, 991 InitBool->getName() + ".val", false, Align(1), 992 LI->getOrdering(), LI->getSyncScopeID(), 993 LI->getIterator()); 994 // FIXME: Should we use the DebugLoc of the load used by the predicate, or 995 // the predicate? The load seems most appropriate, but there's an argument 996 // that the new load does not represent the old load, but is simply a 997 // component of recomputing the predicate. 998 cast<LoadInst>(LV)->setDebugLoc(LI->getDebugLoc()); 999 InitBoolUsed = true; 1000 switch (ICI->getPredicate()) { 1001 default: llvm_unreachable("Unknown ICmp Predicate!"); 1002 case ICmpInst::ICMP_ULT: // X < null -> always false 1003 LV = ConstantInt::getFalse(GV->getContext()); 1004 break; 1005 case ICmpInst::ICMP_UGE: // X >= null -> always true 1006 LV = ConstantInt::getTrue(GV->getContext()); 1007 break; 1008 case ICmpInst::ICMP_ULE: 1009 case ICmpInst::ICMP_EQ: 1010 LV = BinaryOperator::CreateNot(LV, "notinit", ICI->getIterator()); 1011 cast<BinaryOperator>(LV)->setDebugLoc(ICI->getDebugLoc()); 1012 break; 1013 case ICmpInst::ICMP_NE: 1014 case ICmpInst::ICMP_UGT: 1015 break; // no change. 1016 } 1017 ICI->replaceAllUsesWith(LV); 1018 ICI->eraseFromParent(); 1019 } 1020 LI->eraseFromParent(); 1021 } 1022 1023 // If the initialization boolean was used, insert it, otherwise delete it. 1024 if (!InitBoolUsed) { 1025 while (!InitBool->use_empty()) // Delete initializations 1026 cast<StoreInst>(InitBool->user_back())->eraseFromParent(); 1027 delete InitBool; 1028 } else 1029 GV->getParent()->insertGlobalVariable(GV->getIterator(), InitBool); 1030 1031 // Now the GV is dead, nuke it and the allocation.. 1032 GV->eraseFromParent(); 1033 CI->eraseFromParent(); 1034 1035 // To further other optimizations, loop over all users of NewGV and try to 1036 // constant prop them. This will promote GEP instructions with constant 1037 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 1038 ConstantPropUsersOf(NewGV, DL, TLI); 1039 1040 return NewGV; 1041 } 1042 1043 /// Scan the use-list of GV checking to make sure that there are no complex uses 1044 /// of GV. We permit simple things like dereferencing the pointer, but not 1045 /// storing through the address, unless it is to the specified global. 1046 static bool 1047 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, 1048 const GlobalVariable *GV) { 1049 SmallPtrSet<const Value *, 4> Visited; 1050 SmallVector<const Value *, 4> Worklist; 1051 Worklist.push_back(CI); 1052 1053 while (!Worklist.empty()) { 1054 const Value *V = Worklist.pop_back_val(); 1055 if (!Visited.insert(V).second) 1056 continue; 1057 1058 for (const Use &VUse : V->uses()) { 1059 const User *U = VUse.getUser(); 1060 if (isa<LoadInst>(U) || isa<CmpInst>(U)) 1061 continue; // Fine, ignore. 1062 1063 if (auto *SI = dyn_cast<StoreInst>(U)) { 1064 if (SI->getValueOperand() == V && 1065 SI->getPointerOperand()->stripPointerCasts() != GV) 1066 return false; // Storing the pointer not into GV... bad. 1067 continue; // Otherwise, storing through it, or storing into GV... fine. 1068 } 1069 1070 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1071 Worklist.push_back(GEPI); 1072 continue; 1073 } 1074 1075 return false; 1076 } 1077 } 1078 1079 return true; 1080 } 1081 1082 /// If we have a global that is only initialized with a fixed size allocation 1083 /// try to transform the program to use global memory instead of heap 1084 /// allocated memory. This eliminates dynamic allocation, avoids an indirection 1085 /// accessing the data, and exposes the resultant global to further GlobalOpt. 1086 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, 1087 CallInst *CI, 1088 const DataLayout &DL, 1089 TargetLibraryInfo *TLI) { 1090 if (!isRemovableAlloc(CI, TLI)) 1091 // Must be able to remove the call when we get done.. 1092 return false; 1093 1094 Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext()); 1095 Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty); 1096 if (!InitVal) 1097 // Must be able to emit a memset for initialization 1098 return false; 1099 1100 uint64_t AllocSize; 1101 if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts())) 1102 return false; 1103 1104 // Restrict this transformation to only working on small allocations 1105 // (2048 bytes currently), as we don't want to introduce a 16M global or 1106 // something. 1107 if (AllocSize >= 2048) 1108 return false; 1109 1110 // We can't optimize this global unless all uses of it are *known* to be 1111 // of the malloc value, not of the null initializer value (consider a use 1112 // that compares the global's value against zero to see if the malloc has 1113 // been reached). To do this, we check to see if all uses of the global 1114 // would trap if the global were null: this proves that they must all 1115 // happen after the malloc. 1116 if (!allUsesOfLoadedValueWillTrapIfNull(GV)) 1117 return false; 1118 1119 // We can't optimize this if the malloc itself is used in a complex way, 1120 // for example, being stored into multiple globals. This allows the 1121 // malloc to be stored into the specified global, loaded, gep, icmp'd. 1122 // These are all things we could transform to using the global for. 1123 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) 1124 return false; 1125 1126 OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI); 1127 return true; 1128 } 1129 1130 // Try to optimize globals based on the knowledge that only one value (besides 1131 // its initializer) is ever stored to the global. 1132 static bool 1133 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1134 const DataLayout &DL, 1135 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 1136 // Ignore no-op GEPs and bitcasts. 1137 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1138 1139 // If we are dealing with a pointer global that is initialized to null and 1140 // only has one (non-null) value stored into it, then we can optimize any 1141 // users of the loaded value (often calls and loads) that would trap if the 1142 // value was null. 1143 if (GV->getInitializer()->getType()->isPointerTy() && 1144 GV->getInitializer()->isNullValue() && 1145 StoredOnceVal->getType()->isPointerTy() && 1146 !NullPointerIsDefined( 1147 nullptr /* F */, 1148 GV->getInitializer()->getType()->getPointerAddressSpace())) { 1149 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1150 // Optimize away any trapping uses of the loaded value. 1151 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) 1152 return true; 1153 } else if (isAllocationFn(StoredOnceVal, GetTLI)) { 1154 if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) { 1155 auto *TLI = &GetTLI(*CI->getFunction()); 1156 if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI)) 1157 return true; 1158 } 1159 } 1160 } 1161 1162 return false; 1163 } 1164 1165 /// At this point, we have learned that the only two values ever stored into GV 1166 /// are its initializer and OtherVal. See if we can shrink the global into a 1167 /// boolean and select between the two values whenever it is used. This exposes 1168 /// the values to other scalar optimizations. 1169 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1170 Type *GVElType = GV->getValueType(); 1171 1172 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1173 // an FP value, pointer or vector, don't do this optimization because a select 1174 // between them is very expensive and unlikely to lead to later 1175 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1176 // where v1 and v2 both require constant pool loads, a big loss. 1177 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1178 GVElType->isFloatingPointTy() || 1179 GVElType->isPointerTy() || GVElType->isVectorTy()) 1180 return false; 1181 1182 // Walk the use list of the global seeing if all the uses are load or store. 1183 // If there is anything else, bail out. 1184 for (User *U : GV->users()) { 1185 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1186 return false; 1187 if (getLoadStoreType(U) != GVElType) 1188 return false; 1189 } 1190 1191 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); 1192 1193 // Create the new global, initializing it to false. 1194 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1195 false, 1196 GlobalValue::InternalLinkage, 1197 ConstantInt::getFalse(GV->getContext()), 1198 GV->getName()+".b", 1199 GV->getThreadLocalMode(), 1200 GV->getType()->getAddressSpace()); 1201 NewGV->copyAttributesFrom(GV); 1202 GV->getParent()->insertGlobalVariable(GV->getIterator(), NewGV); 1203 1204 Constant *InitVal = GV->getInitializer(); 1205 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1206 "No reason to shrink to bool!"); 1207 1208 SmallVector<DIGlobalVariableExpression *, 1> GVs; 1209 GV->getDebugInfo(GVs); 1210 1211 // If initialized to zero and storing one into the global, we can use a cast 1212 // instead of a select to synthesize the desired value. 1213 bool IsOneZero = false; 1214 bool EmitOneOrZero = true; 1215 auto *CI = dyn_cast<ConstantInt>(OtherVal); 1216 if (CI && CI->getValue().getActiveBits() <= 64) { 1217 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1218 1219 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); 1220 if (CIInit && CIInit->getValue().getActiveBits() <= 64) { 1221 uint64_t ValInit = CIInit->getZExtValue(); 1222 uint64_t ValOther = CI->getZExtValue(); 1223 uint64_t ValMinus = ValOther - ValInit; 1224 1225 for(auto *GVe : GVs){ 1226 DIGlobalVariable *DGV = GVe->getVariable(); 1227 DIExpression *E = GVe->getExpression(); 1228 const DataLayout &DL = GV->getDataLayout(); 1229 unsigned SizeInOctets = 1230 DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8; 1231 1232 // It is expected that the address of global optimized variable is on 1233 // top of the stack. After optimization, value of that variable will 1234 // be ether 0 for initial value or 1 for other value. The following 1235 // expression should return constant integer value depending on the 1236 // value at global object address: 1237 // val * (ValOther - ValInit) + ValInit: 1238 // DW_OP_deref DW_OP_constu <ValMinus> 1239 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value 1240 SmallVector<uint64_t, 12> Ops = { 1241 dwarf::DW_OP_deref_size, SizeInOctets, 1242 dwarf::DW_OP_constu, ValMinus, 1243 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, 1244 dwarf::DW_OP_plus}; 1245 bool WithStackValue = true; 1246 E = DIExpression::prependOpcodes(E, Ops, WithStackValue); 1247 DIGlobalVariableExpression *DGVE = 1248 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); 1249 NewGV->addDebugInfo(DGVE); 1250 } 1251 EmitOneOrZero = false; 1252 } 1253 } 1254 1255 if (EmitOneOrZero) { 1256 // FIXME: This will only emit address for debugger on which will 1257 // be written only 0 or 1. 1258 for(auto *GV : GVs) 1259 NewGV->addDebugInfo(GV); 1260 } 1261 1262 while (!GV->use_empty()) { 1263 Instruction *UI = cast<Instruction>(GV->user_back()); 1264 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1265 // Change the store into a boolean store. 1266 bool StoringOther = SI->getOperand(0) == OtherVal; 1267 // Only do this if we weren't storing a loaded value. 1268 Value *StoreVal; 1269 if (StoringOther || SI->getOperand(0) == InitVal) { 1270 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1271 StoringOther); 1272 } else { 1273 // Otherwise, we are storing a previously loaded copy. To do this, 1274 // change the copy from copying the original value to just copying the 1275 // bool. 1276 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1277 1278 // If we've already replaced the input, StoredVal will be a cast or 1279 // select instruction. If not, it will be a load of the original 1280 // global. 1281 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1282 assert(LI->getOperand(0) == GV && "Not a copy!"); 1283 // Insert a new load, to preserve the saved value. 1284 StoreVal = 1285 new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b", 1286 false, Align(1), LI->getOrdering(), 1287 LI->getSyncScopeID(), LI->getIterator()); 1288 cast<LoadInst>(StoreVal)->setDebugLoc(LI->getDebugLoc()); 1289 } else { 1290 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1291 "This is not a form that we understand!"); 1292 StoreVal = StoredVal->getOperand(0); 1293 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1294 } 1295 } 1296 StoreInst *NSI = 1297 new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), 1298 SI->getSyncScopeID(), SI->getIterator()); 1299 NSI->setDebugLoc(SI->getDebugLoc()); 1300 } else { 1301 // Change the load into a load of bool then a select. 1302 LoadInst *LI = cast<LoadInst>(UI); 1303 LoadInst *NLI = new LoadInst( 1304 NewGV->getValueType(), NewGV, LI->getName() + ".b", false, Align(1), 1305 LI->getOrdering(), LI->getSyncScopeID(), LI->getIterator()); 1306 Instruction *NSI; 1307 if (IsOneZero) 1308 NSI = new ZExtInst(NLI, LI->getType(), "", LI->getIterator()); 1309 else 1310 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI->getIterator()); 1311 NSI->takeName(LI); 1312 // Since LI is split into two instructions, NLI and NSI both inherit the 1313 // same DebugLoc 1314 NLI->setDebugLoc(LI->getDebugLoc()); 1315 NSI->setDebugLoc(LI->getDebugLoc()); 1316 LI->replaceAllUsesWith(NSI); 1317 } 1318 UI->eraseFromParent(); 1319 } 1320 1321 // Retain the name of the old global variable. People who are debugging their 1322 // programs may expect these variables to be named the same. 1323 NewGV->takeName(GV); 1324 GV->eraseFromParent(); 1325 return true; 1326 } 1327 1328 static bool 1329 deleteIfDead(GlobalValue &GV, 1330 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, 1331 function_ref<void(Function &)> DeleteFnCallback = nullptr) { 1332 GV.removeDeadConstantUsers(); 1333 1334 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) 1335 return false; 1336 1337 if (const Comdat *C = GV.getComdat()) 1338 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) 1339 return false; 1340 1341 bool Dead; 1342 if (auto *F = dyn_cast<Function>(&GV)) 1343 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead(); 1344 else 1345 Dead = GV.use_empty(); 1346 if (!Dead) 1347 return false; 1348 1349 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); 1350 if (auto *F = dyn_cast<Function>(&GV)) { 1351 if (DeleteFnCallback) 1352 DeleteFnCallback(*F); 1353 } 1354 ReplaceableMetadataImpl::SalvageDebugInfo(GV); 1355 GV.eraseFromParent(); 1356 ++NumDeleted; 1357 return true; 1358 } 1359 1360 static bool isPointerValueDeadOnEntryToFunction( 1361 const Function *F, GlobalValue *GV, 1362 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1363 // Find all uses of GV. We expect them all to be in F, and if we can't 1364 // identify any of the uses we bail out. 1365 // 1366 // On each of these uses, identify if the memory that GV points to is 1367 // used/required/live at the start of the function. If it is not, for example 1368 // if the first thing the function does is store to the GV, the GV can 1369 // possibly be demoted. 1370 // 1371 // We don't do an exhaustive search for memory operations - simply look 1372 // through bitcasts as they're quite common and benign. 1373 const DataLayout &DL = GV->getDataLayout(); 1374 SmallVector<LoadInst *, 4> Loads; 1375 SmallVector<StoreInst *, 4> Stores; 1376 for (auto *U : GV->users()) { 1377 Instruction *I = dyn_cast<Instruction>(U); 1378 if (!I) 1379 return false; 1380 assert(I->getParent()->getParent() == F); 1381 1382 if (auto *LI = dyn_cast<LoadInst>(I)) 1383 Loads.push_back(LI); 1384 else if (auto *SI = dyn_cast<StoreInst>(I)) 1385 Stores.push_back(SI); 1386 else 1387 return false; 1388 } 1389 1390 // We have identified all uses of GV into loads and stores. Now check if all 1391 // of them are known not to depend on the value of the global at the function 1392 // entry point. We do this by ensuring that every load is dominated by at 1393 // least one store. 1394 auto &DT = LookupDomTree(*const_cast<Function *>(F)); 1395 1396 // The below check is quadratic. Check we're not going to do too many tests. 1397 // FIXME: Even though this will always have worst-case quadratic time, we 1398 // could put effort into minimizing the average time by putting stores that 1399 // have been shown to dominate at least one load at the beginning of the 1400 // Stores array, making subsequent dominance checks more likely to succeed 1401 // early. 1402 // 1403 // The threshold here is fairly large because global->local demotion is a 1404 // very powerful optimization should it fire. 1405 const unsigned Threshold = 100; 1406 if (Loads.size() * Stores.size() > Threshold) 1407 return false; 1408 1409 for (auto *L : Loads) { 1410 auto *LTy = L->getType(); 1411 if (none_of(Stores, [&](const StoreInst *S) { 1412 auto *STy = S->getValueOperand()->getType(); 1413 // The load is only dominated by the store if DomTree says so 1414 // and the number of bits loaded in L is less than or equal to 1415 // the number of bits stored in S. 1416 return DT.dominates(S, L) && 1417 DL.getTypeStoreSize(LTy).getFixedValue() <= 1418 DL.getTypeStoreSize(STy).getFixedValue(); 1419 })) 1420 return false; 1421 } 1422 // All loads have known dependences inside F, so the global can be localized. 1423 return true; 1424 } 1425 1426 // For a global variable with one store, if the store dominates any loads, 1427 // those loads will always load the stored value (as opposed to the 1428 // initializer), even in the presence of recursion. 1429 static bool forwardStoredOnceStore( 1430 GlobalVariable *GV, const StoreInst *StoredOnceStore, 1431 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1432 const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); 1433 // We can do this optimization for non-constants in nosync + norecurse 1434 // functions, but globals used in exactly one norecurse functions are already 1435 // promoted to an alloca. 1436 if (!isa<Constant>(StoredOnceValue)) 1437 return false; 1438 const Function *F = StoredOnceStore->getFunction(); 1439 SmallVector<LoadInst *> Loads; 1440 for (User *U : GV->users()) { 1441 if (auto *LI = dyn_cast<LoadInst>(U)) { 1442 if (LI->getFunction() == F && 1443 LI->getType() == StoredOnceValue->getType() && LI->isSimple()) 1444 Loads.push_back(LI); 1445 } 1446 } 1447 // Only compute DT if we have any loads to examine. 1448 bool MadeChange = false; 1449 if (!Loads.empty()) { 1450 auto &DT = LookupDomTree(*const_cast<Function *>(F)); 1451 for (auto *LI : Loads) { 1452 if (DT.dominates(StoredOnceStore, LI)) { 1453 LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue)); 1454 LI->eraseFromParent(); 1455 MadeChange = true; 1456 } 1457 } 1458 } 1459 return MadeChange; 1460 } 1461 1462 /// Analyze the specified global variable and optimize 1463 /// it if possible. If we make a change, return true. 1464 static bool 1465 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, 1466 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1467 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1468 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1469 auto &DL = GV->getDataLayout(); 1470 // If this is a first class global and has only one accessing function and 1471 // this function is non-recursive, we replace the global with a local alloca 1472 // in this function. 1473 // 1474 // NOTE: It doesn't make sense to promote non-single-value types since we 1475 // are just replacing static memory to stack memory. 1476 // 1477 // If the global is in different address space, don't bring it to stack. 1478 if (!GS.HasMultipleAccessingFunctions && 1479 GS.AccessingFunction && 1480 GV->getValueType()->isSingleValueType() && 1481 GV->getType()->getAddressSpace() == DL.getAllocaAddrSpace() && 1482 !GV->isExternallyInitialized() && 1483 GS.AccessingFunction->doesNotRecurse() && 1484 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, 1485 LookupDomTree)) { 1486 const DataLayout &DL = GV->getDataLayout(); 1487 1488 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); 1489 BasicBlock::iterator FirstI = 1490 GS.AccessingFunction->getEntryBlock().begin().getNonConst(); 1491 Type *ElemTy = GV->getValueType(); 1492 // FIXME: Pass Global's alignment when globals have alignment 1493 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), 1494 nullptr, GV->getName(), FirstI); 1495 Alloca->setDebugLoc(DebugLoc::getCompilerGenerated()); 1496 if (!isa<UndefValue>(GV->getInitializer())) { 1497 auto *SI = new StoreInst(GV->getInitializer(), Alloca, FirstI); 1498 // FIXME: We're localizing a global and creating a store instruction for 1499 // the initial value of that global. Could we logically use the global 1500 // variable's (if one exists) line for this? 1501 SI->setDebugLoc(DebugLoc::getCompilerGenerated()); 1502 } 1503 1504 GV->replaceAllUsesWith(Alloca); 1505 GV->eraseFromParent(); 1506 ++NumLocalized; 1507 return true; 1508 } 1509 1510 bool Changed = false; 1511 1512 // If the global is never loaded (but may be stored to), it is dead. 1513 // Delete it now. 1514 if (!GS.IsLoaded) { 1515 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); 1516 1517 if (isLeakCheckerRoot(GV)) { 1518 // Delete any constant stores to the global. 1519 Changed = CleanupPointerRootUsers(GV, GetTLI); 1520 } else { 1521 // Delete any stores we can find to the global. We may not be able to 1522 // make it completely dead though. 1523 Changed = CleanupConstantGlobalUsers(GV, DL); 1524 } 1525 1526 // If the global is dead now, delete it. 1527 if (GV->use_empty()) { 1528 GV->eraseFromParent(); 1529 ++NumDeleted; 1530 Changed = true; 1531 } 1532 return Changed; 1533 1534 } 1535 if (GS.StoredType <= GlobalStatus::InitializerStored) { 1536 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); 1537 1538 // Don't actually mark a global constant if it's atomic because atomic loads 1539 // are implemented by a trivial cmpxchg in some edge-cases and that usually 1540 // requires write access to the variable even if it's not actually changed. 1541 if (GS.Ordering == AtomicOrdering::NotAtomic) { 1542 assert(!GV->isConstant() && "Expected a non-constant global"); 1543 GV->setConstant(true); 1544 Changed = true; 1545 } 1546 1547 // Clean up any obviously simplifiable users now. 1548 Changed |= CleanupConstantGlobalUsers(GV, DL); 1549 1550 // If the global is dead now, just nuke it. 1551 if (GV->use_empty()) { 1552 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 1553 << "all users and delete global!\n"); 1554 GV->eraseFromParent(); 1555 ++NumDeleted; 1556 return true; 1557 } 1558 1559 // Fall through to the next check; see if we can optimize further. 1560 ++NumMarked; 1561 } 1562 if (!GV->getInitializer()->getType()->isSingleValueType()) { 1563 const DataLayout &DL = GV->getDataLayout(); 1564 if (SRAGlobal(GV, DL)) 1565 return true; 1566 } 1567 Value *StoredOnceValue = GS.getStoredOnceValue(); 1568 if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) { 1569 Function &StoreFn = 1570 const_cast<Function &>(*GS.StoredOnceStore->getFunction()); 1571 bool CanHaveNonUndefGlobalInitializer = 1572 GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace( 1573 GV->getType()->getAddressSpace()); 1574 // If the initial value for the global was an undef value, and if only 1575 // one other value was stored into it, we can just change the 1576 // initializer to be the stored value, then delete all stores to the 1577 // global. This allows us to mark it constant. 1578 // This is restricted to address spaces that allow globals to have 1579 // initializers. NVPTX, for example, does not support initializers for 1580 // shared memory (AS 3). 1581 auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue); 1582 if (SOVConstant && isa<UndefValue>(GV->getInitializer()) && 1583 DL.getTypeAllocSize(SOVConstant->getType()) == 1584 DL.getTypeAllocSize(GV->getValueType()) && 1585 CanHaveNonUndefGlobalInitializer) { 1586 if (SOVConstant->getType() == GV->getValueType()) { 1587 // Change the initializer in place. 1588 GV->setInitializer(SOVConstant); 1589 } else { 1590 // Create a new global with adjusted type. 1591 auto *NGV = new GlobalVariable( 1592 *GV->getParent(), SOVConstant->getType(), GV->isConstant(), 1593 GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(), 1594 GV->getAddressSpace()); 1595 NGV->takeName(GV); 1596 NGV->copyAttributesFrom(GV); 1597 GV->replaceAllUsesWith(NGV); 1598 GV->eraseFromParent(); 1599 GV = NGV; 1600 } 1601 1602 // Clean up any obviously simplifiable users now. 1603 CleanupConstantGlobalUsers(GV, DL); 1604 1605 if (GV->use_empty()) { 1606 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " 1607 << "simplify all users and delete global!\n"); 1608 GV->eraseFromParent(); 1609 ++NumDeleted; 1610 } 1611 ++NumSubstitute; 1612 return true; 1613 } 1614 1615 // Try to optimize globals based on the knowledge that only one value 1616 // (besides its initializer) is ever stored to the global. 1617 if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) 1618 return true; 1619 1620 // Try to forward the store to any loads. If we have more than one store, we 1621 // may have a store of the initializer between StoredOnceStore and a load. 1622 if (GS.NumStores == 1) 1623 if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) 1624 return true; 1625 1626 // Otherwise, if the global was not a boolean, we can shrink it to be a 1627 // boolean. Skip this optimization for AS that doesn't allow an initializer. 1628 if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && 1629 (!isa<UndefValue>(GV->getInitializer()) || 1630 CanHaveNonUndefGlobalInitializer)) { 1631 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1632 ++NumShrunkToBool; 1633 return true; 1634 } 1635 } 1636 } 1637 1638 return Changed; 1639 } 1640 1641 /// Analyze the specified global variable and optimize it if possible. If we 1642 /// make a change, return true. 1643 static bool 1644 processGlobal(GlobalValue &GV, 1645 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1646 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1647 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1648 if (GV.getName().starts_with("llvm.")) 1649 return false; 1650 1651 GlobalStatus GS; 1652 1653 if (GlobalStatus::analyzeGlobal(&GV, GS)) 1654 return false; 1655 1656 bool Changed = false; 1657 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { 1658 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global 1659 : GlobalValue::UnnamedAddr::Local; 1660 if (NewUnnamedAddr != GV.getUnnamedAddr()) { 1661 GV.setUnnamedAddr(NewUnnamedAddr); 1662 NumUnnamed++; 1663 Changed = true; 1664 } 1665 } 1666 1667 // Do more involved optimizations if the global is internal. 1668 if (!GV.hasLocalLinkage()) 1669 return Changed; 1670 1671 auto *GVar = dyn_cast<GlobalVariable>(&GV); 1672 if (!GVar) 1673 return Changed; 1674 1675 if (GVar->isConstant() || !GVar->hasInitializer()) 1676 return Changed; 1677 1678 return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) || 1679 Changed; 1680 } 1681 1682 /// Walk all of the direct calls of the specified function, changing them to 1683 /// FastCC. 1684 static void ChangeCalleesToFastCall(Function *F) { 1685 for (User *U : F->users()) 1686 cast<CallBase>(U)->setCallingConv(CallingConv::Fast); 1687 } 1688 1689 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, 1690 Attribute::AttrKind A) { 1691 unsigned AttrIndex; 1692 if (Attrs.hasAttrSomewhere(A, &AttrIndex)) 1693 return Attrs.removeAttributeAtIndex(C, AttrIndex, A); 1694 return Attrs; 1695 } 1696 1697 static void RemoveAttribute(Function *F, Attribute::AttrKind A) { 1698 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); 1699 for (User *U : F->users()) { 1700 CallBase *CB = cast<CallBase>(U); 1701 CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); 1702 } 1703 } 1704 1705 /// Return true if this is a calling convention that we'd like to change. The 1706 /// idea here is that we don't want to mess with the convention if the user 1707 /// explicitly requested something with performance implications like coldcc, 1708 /// GHC, or anyregcc. 1709 static bool hasChangeableCCImpl(Function *F) { 1710 CallingConv::ID CC = F->getCallingConv(); 1711 1712 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? 1713 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) 1714 return false; 1715 1716 if (F->isVarArg()) 1717 return false; 1718 1719 // FIXME: Change CC for the whole chain of musttail calls when possible. 1720 // 1721 // Can't change CC of the function that either has musttail calls, or is a 1722 // musttail callee itself 1723 for (User *U : F->users()) { 1724 CallInst* CI = dyn_cast<CallInst>(U); 1725 if (!CI) 1726 continue; 1727 1728 if (CI->isMustTailCall()) 1729 return false; 1730 } 1731 1732 for (BasicBlock &BB : *F) 1733 if (BB.getTerminatingMustTailCall()) 1734 return false; 1735 1736 return !F->hasAddressTaken(); 1737 } 1738 1739 using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>; 1740 static bool hasChangeableCC(Function *F, 1741 ChangeableCCCacheTy &ChangeableCCCache) { 1742 auto Res = ChangeableCCCache.try_emplace(F, false); 1743 if (Res.second) 1744 Res.first->second = hasChangeableCCImpl(F); 1745 return Res.first->second; 1746 } 1747 1748 /// Return true if the block containing the call site has a BlockFrequency of 1749 /// less than ColdCCRelFreq% of the entry block. 1750 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { 1751 const BranchProbability ColdProb(ColdCCRelFreq, 100); 1752 auto *CallSiteBB = CB.getParent(); 1753 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); 1754 auto CallerEntryFreq = 1755 CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); 1756 return CallSiteFreq < CallerEntryFreq * ColdProb; 1757 } 1758 1759 // This function checks if the input function F is cold at all call sites. It 1760 // also looks each call site's containing function, returning false if the 1761 // caller function contains other non cold calls. The input vector AllCallsCold 1762 // contains a list of functions that only have call sites in cold blocks. 1763 static bool 1764 isValidCandidateForColdCC(Function &F, 1765 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1766 const std::vector<Function *> &AllCallsCold) { 1767 1768 if (F.user_empty()) 1769 return false; 1770 1771 for (User *U : F.users()) { 1772 CallBase &CB = cast<CallBase>(*U); 1773 Function *CallerFunc = CB.getParent()->getParent(); 1774 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); 1775 if (!isColdCallSite(CB, CallerBFI)) 1776 return false; 1777 if (!llvm::is_contained(AllCallsCold, CallerFunc)) 1778 return false; 1779 } 1780 return true; 1781 } 1782 1783 static void changeCallSitesToColdCC(Function *F) { 1784 for (User *U : F->users()) 1785 cast<CallBase>(U)->setCallingConv(CallingConv::Cold); 1786 } 1787 1788 // This function iterates over all the call instructions in the input Function 1789 // and checks that all call sites are in cold blocks and are allowed to use the 1790 // coldcc calling convention. 1791 static bool 1792 hasOnlyColdCalls(Function &F, 1793 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1794 ChangeableCCCacheTy &ChangeableCCCache) { 1795 for (BasicBlock &BB : F) { 1796 for (Instruction &I : BB) { 1797 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 1798 // Skip over isline asm instructions since they aren't function calls. 1799 if (CI->isInlineAsm()) 1800 continue; 1801 Function *CalledFn = CI->getCalledFunction(); 1802 if (!CalledFn) 1803 return false; 1804 // Skip over intrinsics since they won't remain as function calls. 1805 // Important to do this check before the linkage check below so we 1806 // won't bail out on debug intrinsics, possibly making the generated 1807 // code dependent on the presence of debug info. 1808 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) 1809 continue; 1810 if (!CalledFn->hasLocalLinkage()) 1811 return false; 1812 // Check if it's valid to use coldcc calling convention. 1813 if (!hasChangeableCC(CalledFn, ChangeableCCCache)) 1814 return false; 1815 BlockFrequencyInfo &CallerBFI = GetBFI(F); 1816 if (!isColdCallSite(*CI, CallerBFI)) 1817 return false; 1818 } 1819 } 1820 } 1821 return true; 1822 } 1823 1824 static bool hasMustTailCallers(Function *F) { 1825 for (User *U : F->users()) { 1826 CallBase *CB = cast<CallBase>(U); 1827 if (CB->isMustTailCall()) 1828 return true; 1829 } 1830 return false; 1831 } 1832 1833 static bool hasInvokeCallers(Function *F) { 1834 for (User *U : F->users()) 1835 if (isa<InvokeInst>(U)) 1836 return true; 1837 return false; 1838 } 1839 1840 static void RemovePreallocated(Function *F) { 1841 RemoveAttribute(F, Attribute::Preallocated); 1842 1843 auto *M = F->getParent(); 1844 1845 IRBuilder<> Builder(M->getContext()); 1846 1847 // Cannot modify users() while iterating over it, so make a copy. 1848 SmallVector<User *, 4> PreallocatedCalls(F->users()); 1849 for (User *U : PreallocatedCalls) { 1850 CallBase *CB = dyn_cast<CallBase>(U); 1851 if (!CB) 1852 continue; 1853 1854 assert( 1855 !CB->isMustTailCall() && 1856 "Shouldn't call RemotePreallocated() on a musttail preallocated call"); 1857 // Create copy of call without "preallocated" operand bundle. 1858 SmallVector<OperandBundleDef, 1> OpBundles; 1859 CB->getOperandBundlesAsDefs(OpBundles); 1860 CallBase *PreallocatedSetup = nullptr; 1861 for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { 1862 if (It->getTag() == "preallocated") { 1863 PreallocatedSetup = cast<CallBase>(*It->input_begin()); 1864 OpBundles.erase(It); 1865 break; 1866 } 1867 } 1868 assert(PreallocatedSetup && "Did not find preallocated bundle"); 1869 uint64_t ArgCount = 1870 cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); 1871 1872 assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) && 1873 "Unknown indirect call type"); 1874 CallBase *NewCB = CallBase::Create(CB, OpBundles, CB->getIterator()); 1875 CB->replaceAllUsesWith(NewCB); 1876 NewCB->takeName(CB); 1877 CB->eraseFromParent(); 1878 1879 Builder.SetInsertPoint(PreallocatedSetup); 1880 auto *StackSave = Builder.CreateStackSave(); 1881 Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); 1882 Builder.CreateStackRestore(StackSave); 1883 1884 // Replace @llvm.call.preallocated.arg() with alloca. 1885 // Cannot modify users() while iterating over it, so make a copy. 1886 // @llvm.call.preallocated.arg() can be called with the same index multiple 1887 // times. So for each @llvm.call.preallocated.arg(), we see if we have 1888 // already created a Value* for the index, and if not, create an alloca and 1889 // bitcast right after the @llvm.call.preallocated.setup() so that it 1890 // dominates all uses. 1891 SmallVector<Value *, 2> ArgAllocas(ArgCount); 1892 SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); 1893 for (auto *User : PreallocatedArgs) { 1894 auto *UseCall = cast<CallBase>(User); 1895 assert(UseCall->getCalledFunction()->getIntrinsicID() == 1896 Intrinsic::call_preallocated_arg && 1897 "preallocated token use was not a llvm.call.preallocated.arg"); 1898 uint64_t AllocArgIndex = 1899 cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); 1900 Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; 1901 if (!AllocaReplacement) { 1902 auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); 1903 auto *ArgType = 1904 UseCall->getFnAttr(Attribute::Preallocated).getValueAsType(); 1905 auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); 1906 Builder.SetInsertPoint(InsertBefore); 1907 auto *Alloca = 1908 Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); 1909 ArgAllocas[AllocArgIndex] = Alloca; 1910 AllocaReplacement = Alloca; 1911 } 1912 1913 UseCall->replaceAllUsesWith(AllocaReplacement); 1914 UseCall->eraseFromParent(); 1915 } 1916 // Remove @llvm.call.preallocated.setup(). 1917 cast<Instruction>(PreallocatedSetup)->eraseFromParent(); 1918 } 1919 } 1920 1921 static bool 1922 OptimizeFunctions(Module &M, 1923 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1924 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1925 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1926 function_ref<DominatorTree &(Function &)> LookupDomTree, 1927 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, 1928 function_ref<void(Function &F)> ChangedCFGCallback, 1929 function_ref<void(Function &F)> DeleteFnCallback) { 1930 1931 bool Changed = false; 1932 1933 ChangeableCCCacheTy ChangeableCCCache; 1934 std::vector<Function *> AllCallsCold; 1935 for (Function &F : llvm::make_early_inc_range(M)) 1936 if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache)) 1937 AllCallsCold.push_back(&F); 1938 1939 // Optimize functions. 1940 for (Function &F : llvm::make_early_inc_range(M)) { 1941 // Don't perform global opt pass on naked functions; we don't want fast 1942 // calling conventions for naked functions. 1943 if (F.hasFnAttribute(Attribute::Naked)) 1944 continue; 1945 1946 // Functions without names cannot be referenced outside this module. 1947 if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage()) 1948 F.setLinkage(GlobalValue::InternalLinkage); 1949 1950 if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) { 1951 Changed = true; 1952 continue; 1953 } 1954 1955 // LLVM's definition of dominance allows instructions that are cyclic 1956 // in unreachable blocks, e.g.: 1957 // %pat = select i1 %condition, @global, i16* %pat 1958 // because any instruction dominates an instruction in a block that's 1959 // not reachable from entry. 1960 // So, remove unreachable blocks from the function, because a) there's 1961 // no point in analyzing them and b) GlobalOpt should otherwise grow 1962 // some more complicated logic to break these cycles. 1963 // Notify the analysis manager that we've modified the function's CFG. 1964 if (!F.isDeclaration()) { 1965 if (removeUnreachableBlocks(F)) { 1966 Changed = true; 1967 ChangedCFGCallback(F); 1968 } 1969 } 1970 1971 Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree); 1972 1973 if (!F.hasLocalLinkage()) 1974 continue; 1975 1976 // If we have an inalloca parameter that we can safely remove the 1977 // inalloca attribute from, do so. This unlocks optimizations that 1978 // wouldn't be safe in the presence of inalloca. 1979 // FIXME: We should also hoist alloca affected by this to the entry 1980 // block if possible. 1981 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) && 1982 !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) { 1983 RemoveAttribute(&F, Attribute::InAlloca); 1984 Changed = true; 1985 } 1986 1987 // FIXME: handle invokes 1988 // FIXME: handle musttail 1989 if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { 1990 if (!F.hasAddressTaken() && !hasMustTailCallers(&F) && 1991 !hasInvokeCallers(&F)) { 1992 RemovePreallocated(&F); 1993 Changed = true; 1994 } 1995 continue; 1996 } 1997 1998 if (hasChangeableCC(&F, ChangeableCCCache)) { 1999 NumInternalFunc++; 2000 TargetTransformInfo &TTI = GetTTI(F); 2001 // Change the calling convention to coldcc if either stress testing is 2002 // enabled or the target would like to use coldcc on functions which are 2003 // cold at all call sites and the callers contain no other non coldcc 2004 // calls. 2005 if (EnableColdCCStressTest || 2006 (TTI.useColdCCForColdCall(F) && 2007 isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) { 2008 ChangeableCCCache.erase(&F); 2009 F.setCallingConv(CallingConv::Cold); 2010 changeCallSitesToColdCC(&F); 2011 Changed = true; 2012 NumColdCC++; 2013 } 2014 } 2015 2016 if (hasChangeableCC(&F, ChangeableCCCache)) { 2017 // If this function has a calling convention worth changing, is not a 2018 // varargs function, and is only called directly, promote it to use the 2019 // Fast calling convention. 2020 F.setCallingConv(CallingConv::Fast); 2021 ChangeCalleesToFastCall(&F); 2022 ++NumFastCallFns; 2023 Changed = true; 2024 } 2025 2026 if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) && 2027 !F.hasAddressTaken()) { 2028 // The function is not used by a trampoline intrinsic, so it is safe 2029 // to remove the 'nest' attribute. 2030 RemoveAttribute(&F, Attribute::Nest); 2031 ++NumNestRemoved; 2032 Changed = true; 2033 } 2034 } 2035 return Changed; 2036 } 2037 2038 static bool 2039 OptimizeGlobalVars(Module &M, 2040 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2041 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2042 function_ref<DominatorTree &(Function &)> LookupDomTree, 2043 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2044 bool Changed = false; 2045 2046 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 2047 // Global variables without names cannot be referenced outside this module. 2048 if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage()) 2049 GV.setLinkage(GlobalValue::InternalLinkage); 2050 // Simplify the initializer. 2051 if (GV.hasInitializer()) 2052 if (auto *C = dyn_cast<Constant>(GV.getInitializer())) { 2053 auto &DL = M.getDataLayout(); 2054 // TLI is not used in the case of a Constant, so use default nullptr 2055 // for that optional parameter, since we don't have a Function to 2056 // provide GetTLI anyway. 2057 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); 2058 if (New != C) 2059 GV.setInitializer(New); 2060 } 2061 2062 if (deleteIfDead(GV, NotDiscardableComdats)) { 2063 Changed = true; 2064 continue; 2065 } 2066 2067 Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); 2068 } 2069 return Changed; 2070 } 2071 2072 /// Evaluate static constructors in the function, if we can. Return true if we 2073 /// can, false otherwise. 2074 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, 2075 TargetLibraryInfo *TLI) { 2076 // Skip external functions. 2077 if (F->isDeclaration()) 2078 return false; 2079 // Call the function. 2080 Evaluator Eval(DL, TLI); 2081 Constant *RetValDummy; 2082 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, 2083 SmallVector<Constant*, 0>()); 2084 2085 if (EvalSuccess) { 2086 ++NumCtorsEvaluated; 2087 2088 // We succeeded at evaluation: commit the result. 2089 auto NewInitializers = Eval.getMutatedInitializers(); 2090 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2091 << F->getName() << "' to " << NewInitializers.size() 2092 << " stores.\n"); 2093 for (const auto &Pair : NewInitializers) 2094 Pair.first->setInitializer(Pair.second); 2095 for (GlobalVariable *GV : Eval.getInvariants()) 2096 GV->setConstant(true); 2097 } 2098 2099 return EvalSuccess; 2100 } 2101 2102 static int compareNames(Constant *const *A, Constant *const *B) { 2103 Value *AStripped = (*A)->stripPointerCasts(); 2104 Value *BStripped = (*B)->stripPointerCasts(); 2105 return AStripped->getName().compare(BStripped->getName()); 2106 } 2107 2108 static void setUsedInitializer(GlobalVariable &V, 2109 const SmallPtrSetImpl<GlobalValue *> &Init) { 2110 if (Init.empty()) { 2111 V.eraseFromParent(); 2112 return; 2113 } 2114 2115 // Get address space of pointers in the array of pointers. 2116 const Type *UsedArrayType = V.getValueType(); 2117 const auto *VAT = cast<ArrayType>(UsedArrayType); 2118 const auto *VEPT = cast<PointerType>(VAT->getArrayElementType()); 2119 2120 // Type of pointer to the array of pointers. 2121 PointerType *PtrTy = 2122 PointerType::get(V.getContext(), VEPT->getAddressSpace()); 2123 2124 SmallVector<Constant *, 8> UsedArray; 2125 for (GlobalValue *GV : Init) { 2126 Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, PtrTy); 2127 UsedArray.push_back(Cast); 2128 } 2129 2130 // Sort to get deterministic order. 2131 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); 2132 ArrayType *ATy = ArrayType::get(PtrTy, UsedArray.size()); 2133 2134 Module *M = V.getParent(); 2135 V.removeFromParent(); 2136 GlobalVariable *NV = 2137 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, 2138 ConstantArray::get(ATy, UsedArray), ""); 2139 NV->takeName(&V); 2140 NV->setSection("llvm.metadata"); 2141 delete &V; 2142 } 2143 2144 namespace { 2145 2146 /// An easy to access representation of llvm.used and llvm.compiler.used. 2147 class LLVMUsed { 2148 SmallPtrSet<GlobalValue *, 4> Used; 2149 SmallPtrSet<GlobalValue *, 4> CompilerUsed; 2150 GlobalVariable *UsedV; 2151 GlobalVariable *CompilerUsedV; 2152 2153 public: 2154 LLVMUsed(Module &M) { 2155 SmallVector<GlobalValue *, 4> Vec; 2156 UsedV = collectUsedGlobalVariables(M, Vec, false); 2157 Used = {llvm::from_range, Vec}; 2158 Vec.clear(); 2159 CompilerUsedV = collectUsedGlobalVariables(M, Vec, true); 2160 CompilerUsed = {llvm::from_range, Vec}; 2161 } 2162 2163 using iterator = SmallPtrSet<GlobalValue *, 4>::iterator; 2164 using used_iterator_range = iterator_range<iterator>; 2165 2166 iterator usedBegin() { return Used.begin(); } 2167 iterator usedEnd() { return Used.end(); } 2168 2169 used_iterator_range used() { 2170 return used_iterator_range(usedBegin(), usedEnd()); 2171 } 2172 2173 iterator compilerUsedBegin() { return CompilerUsed.begin(); } 2174 iterator compilerUsedEnd() { return CompilerUsed.end(); } 2175 2176 used_iterator_range compilerUsed() { 2177 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); 2178 } 2179 2180 bool usedCount(GlobalValue *GV) const { return Used.count(GV); } 2181 2182 bool compilerUsedCount(GlobalValue *GV) const { 2183 return CompilerUsed.count(GV); 2184 } 2185 2186 bool usedErase(GlobalValue *GV) { return Used.erase(GV); } 2187 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } 2188 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } 2189 2190 bool compilerUsedInsert(GlobalValue *GV) { 2191 return CompilerUsed.insert(GV).second; 2192 } 2193 2194 void syncVariablesAndSets() { 2195 if (UsedV) 2196 setUsedInitializer(*UsedV, Used); 2197 if (CompilerUsedV) 2198 setUsedInitializer(*CompilerUsedV, CompilerUsed); 2199 } 2200 }; 2201 2202 } // end anonymous namespace 2203 2204 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { 2205 if (GA.use_empty()) // No use at all. 2206 return false; 2207 2208 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && 2209 "We should have removed the duplicated " 2210 "element from llvm.compiler.used"); 2211 if (!GA.hasOneUse()) 2212 // Strictly more than one use. So at least one is not in llvm.used and 2213 // llvm.compiler.used. 2214 return true; 2215 2216 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. 2217 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); 2218 } 2219 2220 static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) { 2221 if (!GV.hasLocalLinkage()) 2222 return true; 2223 2224 return U.usedCount(&GV) || U.compilerUsedCount(&GV); 2225 } 2226 2227 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, 2228 bool &RenameTarget) { 2229 if (GA.isWeakForLinker()) 2230 return false; 2231 2232 RenameTarget = false; 2233 bool Ret = false; 2234 if (hasUseOtherThanLLVMUsed(GA, U)) 2235 Ret = true; 2236 2237 // If the alias is externally visible, we may still be able to simplify it. 2238 if (!mayHaveOtherReferences(GA, U)) 2239 return Ret; 2240 2241 // If the aliasee has internal linkage and no other references (e.g., 2242 // @llvm.used, @llvm.compiler.used), give it the name and linkage of the 2243 // alias, and delete the alias. This turns: 2244 // define internal ... @f(...) 2245 // @a = alias ... @f 2246 // into: 2247 // define ... @a(...) 2248 Constant *Aliasee = GA.getAliasee(); 2249 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2250 if (mayHaveOtherReferences(*Target, U)) 2251 return Ret; 2252 2253 RenameTarget = true; 2254 return true; 2255 } 2256 2257 static bool 2258 OptimizeGlobalAliases(Module &M, 2259 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2260 bool Changed = false; 2261 LLVMUsed Used(M); 2262 2263 for (GlobalValue *GV : Used.used()) 2264 Used.compilerUsedErase(GV); 2265 2266 // Return whether GV is explicitly or implicitly dso_local and not replaceable 2267 // by another definition in the current linkage unit. 2268 auto IsModuleLocal = [](GlobalValue &GV) { 2269 return !GlobalValue::isInterposableLinkage(GV.getLinkage()) && 2270 (GV.isDSOLocal() || GV.isImplicitDSOLocal()); 2271 }; 2272 2273 for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) { 2274 // Aliases without names cannot be referenced outside this module. 2275 if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage()) 2276 J.setLinkage(GlobalValue::InternalLinkage); 2277 2278 if (deleteIfDead(J, NotDiscardableComdats)) { 2279 Changed = true; 2280 continue; 2281 } 2282 2283 // If the alias can change at link time, nothing can be done - bail out. 2284 if (!IsModuleLocal(J)) 2285 continue; 2286 2287 Constant *Aliasee = J.getAliasee(); 2288 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); 2289 // We can't trivially replace the alias with the aliasee if the aliasee is 2290 // non-trivial in some way. We also can't replace the alias with the aliasee 2291 // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible 2292 // alias can be used to access the definition as if preemption did not 2293 // happen. 2294 // TODO: Try to handle non-zero GEPs of local aliasees. 2295 if (!Target || !IsModuleLocal(*Target)) 2296 continue; 2297 2298 Target->removeDeadConstantUsers(); 2299 2300 // Make all users of the alias use the aliasee instead. 2301 bool RenameTarget; 2302 if (!hasUsesToReplace(J, Used, RenameTarget)) 2303 continue; 2304 2305 J.replaceAllUsesWith(Aliasee); 2306 ++NumAliasesResolved; 2307 Changed = true; 2308 2309 if (RenameTarget) { 2310 // Give the aliasee the name, linkage and other attributes of the alias. 2311 Target->takeName(&J); 2312 Target->setLinkage(J.getLinkage()); 2313 Target->setDSOLocal(J.isDSOLocal()); 2314 Target->setVisibility(J.getVisibility()); 2315 Target->setDLLStorageClass(J.getDLLStorageClass()); 2316 2317 if (Used.usedErase(&J)) 2318 Used.usedInsert(Target); 2319 2320 if (Used.compilerUsedErase(&J)) 2321 Used.compilerUsedInsert(Target); 2322 } else if (mayHaveOtherReferences(J, Used)) 2323 continue; 2324 2325 // Delete the alias. 2326 M.eraseAlias(&J); 2327 ++NumAliasesRemoved; 2328 Changed = true; 2329 } 2330 2331 Used.syncVariablesAndSets(); 2332 2333 return Changed; 2334 } 2335 2336 static Function * 2337 FindAtExitLibFunc(Module &M, 2338 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2339 LibFunc Func) { 2340 // Hack to get a default TLI before we have actual Function. 2341 auto FuncIter = M.begin(); 2342 if (FuncIter == M.end()) 2343 return nullptr; 2344 auto *TLI = &GetTLI(*FuncIter); 2345 2346 if (!TLI->has(Func)) 2347 return nullptr; 2348 2349 Function *Fn = M.getFunction(TLI->getName(Func)); 2350 if (!Fn) 2351 return nullptr; 2352 2353 // Now get the actual TLI for Fn. 2354 TLI = &GetTLI(*Fn); 2355 2356 // Make sure that the function has the correct prototype. 2357 LibFunc F; 2358 if (!TLI->getLibFunc(*Fn, F) || F != Func) 2359 return nullptr; 2360 2361 return Fn; 2362 } 2363 2364 /// Returns whether the given function is an empty C++ destructor or atexit 2365 /// handler and can therefore be eliminated. Note that we assume that other 2366 /// optimization passes have already simplified the code so we simply check for 2367 /// 'ret'. 2368 static bool IsEmptyAtExitFunction(const Function &Fn) { 2369 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and 2370 // nounwind, but that doesn't seem worth doing. 2371 if (Fn.isDeclaration()) 2372 return false; 2373 2374 for (const auto &I : Fn.getEntryBlock()) { 2375 if (I.isDebugOrPseudoInst()) 2376 continue; 2377 if (isa<ReturnInst>(I)) 2378 return true; 2379 break; 2380 } 2381 return false; 2382 } 2383 2384 static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX) { 2385 /// Itanium C++ ABI p3.3.5: 2386 /// 2387 /// After constructing a global (or local static) object, that will require 2388 /// destruction on exit, a termination function is registered as follows: 2389 /// 2390 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); 2391 /// 2392 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the 2393 /// call f(p) when DSO d is unloaded, before all such termination calls 2394 /// registered before this one. It returns zero if registration is 2395 /// successful, nonzero on failure. 2396 2397 // This pass will look for calls to __cxa_atexit or atexit where the function 2398 // is trivial and remove them. 2399 bool Changed = false; 2400 2401 for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) { 2402 // We're only interested in calls. Theoretically, we could handle invoke 2403 // instructions as well, but neither llvm-gcc nor clang generate invokes 2404 // to __cxa_atexit. 2405 CallInst *CI = dyn_cast<CallInst>(U); 2406 if (!CI) 2407 continue; 2408 2409 Function *DtorFn = 2410 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); 2411 if (!DtorFn || !IsEmptyAtExitFunction(*DtorFn)) 2412 continue; 2413 2414 // Just remove the call. 2415 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); 2416 CI->eraseFromParent(); 2417 2418 if (isCXX) 2419 ++NumCXXDtorsRemoved; 2420 else 2421 ++NumAtExitRemoved; 2422 2423 Changed |= true; 2424 } 2425 2426 return Changed; 2427 } 2428 2429 static Function *hasSideeffectFreeStaticResolution(GlobalIFunc &IF) { 2430 if (IF.isInterposable()) 2431 return nullptr; 2432 2433 Function *Resolver = IF.getResolverFunction(); 2434 if (!Resolver) 2435 return nullptr; 2436 2437 if (Resolver->isInterposable()) 2438 return nullptr; 2439 2440 // Only handle functions that have been optimized into a single basic block. 2441 auto It = Resolver->begin(); 2442 if (++It != Resolver->end()) 2443 return nullptr; 2444 2445 BasicBlock &BB = Resolver->getEntryBlock(); 2446 2447 if (any_of(BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) 2448 return nullptr; 2449 2450 auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); 2451 if (!Ret) 2452 return nullptr; 2453 2454 return dyn_cast<Function>(Ret->getReturnValue()); 2455 } 2456 2457 /// Find IFuncs that have resolvers that always point at the same statically 2458 /// known callee, and replace their callers with a direct call. 2459 static bool OptimizeStaticIFuncs(Module &M) { 2460 bool Changed = false; 2461 for (GlobalIFunc &IF : M.ifuncs()) 2462 if (Function *Callee = hasSideeffectFreeStaticResolution(IF)) 2463 if (!IF.use_empty() && 2464 (!Callee->isDeclaration() || 2465 none_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); }))) { 2466 IF.replaceAllUsesWith(Callee); 2467 NumIFuncsResolved++; 2468 Changed = true; 2469 } 2470 return Changed; 2471 } 2472 2473 static bool 2474 DeleteDeadIFuncs(Module &M, 2475 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2476 bool Changed = false; 2477 for (GlobalIFunc &IF : make_early_inc_range(M.ifuncs())) 2478 if (deleteIfDead(IF, NotDiscardableComdats)) { 2479 NumIFuncsDeleted++; 2480 Changed = true; 2481 } 2482 return Changed; 2483 } 2484 2485 // Follows the use-def chain of \p V backwards until it finds a Function, 2486 // in which case it collects in \p Versions. Return true on successful 2487 // use-def chain traversal, false otherwise. 2488 static bool collectVersions(TargetTransformInfo &TTI, Value *V, 2489 SmallVectorImpl<Function *> &Versions) { 2490 if (auto *F = dyn_cast<Function>(V)) { 2491 if (!TTI.isMultiversionedFunction(*F)) 2492 return false; 2493 Versions.push_back(F); 2494 } else if (auto *Sel = dyn_cast<SelectInst>(V)) { 2495 if (!collectVersions(TTI, Sel->getTrueValue(), Versions)) 2496 return false; 2497 if (!collectVersions(TTI, Sel->getFalseValue(), Versions)) 2498 return false; 2499 } else if (auto *Phi = dyn_cast<PHINode>(V)) { 2500 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) 2501 if (!collectVersions(TTI, Phi->getIncomingValue(I), Versions)) 2502 return false; 2503 } else { 2504 // Unknown instruction type. Bail. 2505 return false; 2506 } 2507 return true; 2508 } 2509 2510 // Bypass the IFunc Resolver of MultiVersioned functions when possible. To 2511 // deduce whether the optimization is legal we need to compare the target 2512 // features between caller and callee versions. The criteria for bypassing 2513 // the resolver are the following: 2514 // 2515 // * If the callee's feature set is a subset of the caller's feature set, 2516 // then the callee is a candidate for direct call. 2517 // 2518 // * Among such candidates the one of highest priority is the best match 2519 // and it shall be picked, unless there is a version of the callee with 2520 // higher priority than the best match which cannot be picked from a 2521 // higher priority caller (directly or through the resolver). 2522 // 2523 // * For every higher priority callee version than the best match, there 2524 // is a higher priority caller version whose feature set availability 2525 // is implied by the callee's feature set. 2526 // 2527 static bool OptimizeNonTrivialIFuncs( 2528 Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) { 2529 bool Changed = false; 2530 2531 // Cache containing the mask constructed from a function's target features. 2532 DenseMap<Function *, uint64_t> FeatureMask; 2533 2534 for (GlobalIFunc &IF : M.ifuncs()) { 2535 if (IF.isInterposable()) 2536 continue; 2537 2538 Function *Resolver = IF.getResolverFunction(); 2539 if (!Resolver) 2540 continue; 2541 2542 if (Resolver->isInterposable()) 2543 continue; 2544 2545 TargetTransformInfo &TTI = GetTTI(*Resolver); 2546 2547 // Discover the callee versions. 2548 SmallVector<Function *> Callees; 2549 if (any_of(*Resolver, [&TTI, &Callees](BasicBlock &BB) { 2550 if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator())) 2551 if (!collectVersions(TTI, Ret->getReturnValue(), Callees)) 2552 return true; 2553 return false; 2554 })) 2555 continue; 2556 2557 assert(!Callees.empty() && "Expecting successful collection of versions"); 2558 2559 LLVM_DEBUG(dbgs() << "Statically resolving calls to function " 2560 << Resolver->getName() << "\n"); 2561 2562 // Cache the feature mask for each callee. 2563 for (Function *Callee : Callees) { 2564 auto [It, Inserted] = FeatureMask.try_emplace(Callee); 2565 if (Inserted) 2566 It->second = TTI.getFeatureMask(*Callee); 2567 } 2568 2569 // Sort the callee versions in decreasing priority order. 2570 sort(Callees, [&](auto *LHS, auto *RHS) { 2571 return FeatureMask[LHS] > FeatureMask[RHS]; 2572 }); 2573 2574 // Find the callsites and cache the feature mask for each caller. 2575 SmallVector<Function *> Callers; 2576 DenseMap<Function *, SmallVector<CallBase *>> CallSites; 2577 for (User *U : IF.users()) { 2578 if (auto *CB = dyn_cast<CallBase>(U)) { 2579 if (CB->getCalledOperand() == &IF) { 2580 Function *Caller = CB->getFunction(); 2581 auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller); 2582 if (FeatInserted) 2583 FeatIt->second = TTI.getFeatureMask(*Caller); 2584 auto [CallIt, CallInserted] = CallSites.try_emplace(Caller); 2585 if (CallInserted) 2586 Callers.push_back(Caller); 2587 CallIt->second.push_back(CB); 2588 } 2589 } 2590 } 2591 2592 // Sort the caller versions in decreasing priority order. 2593 sort(Callers, [&](auto *LHS, auto *RHS) { 2594 return FeatureMask[LHS] > FeatureMask[RHS]; 2595 }); 2596 2597 auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; }; 2598 2599 // Index to the highest priority candidate. 2600 unsigned I = 0; 2601 // Now try to redirect calls starting from higher priority callers. 2602 for (Function *Caller : Callers) { 2603 assert(I < Callees.size() && "Found callers of equal priority"); 2604 2605 Function *Callee = Callees[I]; 2606 uint64_t CallerBits = FeatureMask[Caller]; 2607 uint64_t CalleeBits = FeatureMask[Callee]; 2608 2609 // In the case of FMV callers, we know that all higher priority callers 2610 // than the current one did not get selected at runtime, which helps 2611 // reason about the callees (if they have versions that mandate presence 2612 // of the features which we already know are unavailable on this target). 2613 if (TTI.isMultiversionedFunction(*Caller)) { 2614 // If the feature set of the caller implies the feature set of the 2615 // highest priority candidate then it shall be picked. In case of 2616 // identical sets advance the candidate index one position. 2617 if (CallerBits == CalleeBits) 2618 ++I; 2619 else if (!implies(CallerBits, CalleeBits)) { 2620 // Keep advancing the candidate index as long as the caller's 2621 // features are a subset of the current candidate's. 2622 while (implies(CalleeBits, CallerBits)) { 2623 if (++I == Callees.size()) 2624 break; 2625 CalleeBits = FeatureMask[Callees[I]]; 2626 } 2627 continue; 2628 } 2629 } else { 2630 // We can't reason much about non-FMV callers. Just pick the highest 2631 // priority callee if it matches, otherwise bail. 2632 if (!OptimizeNonFMVCallers || I > 0 || !implies(CallerBits, CalleeBits)) 2633 continue; 2634 } 2635 auto &Calls = CallSites[Caller]; 2636 for (CallBase *CS : Calls) { 2637 LLVM_DEBUG(dbgs() << "Redirecting call " << Caller->getName() << " -> " 2638 << Callee->getName() << "\n"); 2639 CS->setCalledOperand(Callee); 2640 } 2641 Changed = true; 2642 } 2643 if (IF.use_empty() || 2644 all_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); })) 2645 NumIFuncsResolved++; 2646 } 2647 return Changed; 2648 } 2649 2650 static bool 2651 optimizeGlobalsInModule(Module &M, const DataLayout &DL, 2652 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2653 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2654 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2655 function_ref<DominatorTree &(Function &)> LookupDomTree, 2656 function_ref<void(Function &F)> ChangedCFGCallback, 2657 function_ref<void(Function &F)> DeleteFnCallback) { 2658 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; 2659 bool Changed = false; 2660 bool LocalChange = true; 2661 std::optional<uint32_t> FirstNotFullyEvaluatedPriority; 2662 2663 while (LocalChange) { 2664 LocalChange = false; 2665 2666 NotDiscardableComdats.clear(); 2667 for (const GlobalVariable &GV : M.globals()) 2668 if (const Comdat *C = GV.getComdat()) 2669 if (!GV.isDiscardableIfUnused() || !GV.use_empty()) 2670 NotDiscardableComdats.insert(C); 2671 for (Function &F : M) 2672 if (const Comdat *C = F.getComdat()) 2673 if (!F.isDefTriviallyDead()) 2674 NotDiscardableComdats.insert(C); 2675 for (GlobalAlias &GA : M.aliases()) 2676 if (const Comdat *C = GA.getComdat()) 2677 if (!GA.isDiscardableIfUnused() || !GA.use_empty()) 2678 NotDiscardableComdats.insert(C); 2679 2680 // Delete functions that are trivially dead, ccc -> fastcc 2681 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, 2682 NotDiscardableComdats, ChangedCFGCallback, 2683 DeleteFnCallback); 2684 2685 // Optimize global_ctors list. 2686 LocalChange |= 2687 optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) { 2688 if (FirstNotFullyEvaluatedPriority && 2689 *FirstNotFullyEvaluatedPriority != Priority) 2690 return false; 2691 bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F)); 2692 if (!Evaluated) 2693 FirstNotFullyEvaluatedPriority = Priority; 2694 return Evaluated; 2695 }); 2696 2697 // Optimize non-address-taken globals. 2698 LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree, 2699 NotDiscardableComdats); 2700 2701 // Resolve aliases, when possible. 2702 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); 2703 2704 // Try to remove trivial global destructors if they are not removed 2705 // already. 2706 if (Function *CXAAtExitFn = 2707 FindAtExitLibFunc(M, GetTLI, LibFunc_cxa_atexit)) 2708 LocalChange |= OptimizeEmptyGlobalAtExitDtors(CXAAtExitFn, true); 2709 2710 if (Function *AtExitFn = FindAtExitLibFunc(M, GetTLI, LibFunc_atexit)) 2711 LocalChange |= OptimizeEmptyGlobalAtExitDtors(AtExitFn, false); 2712 2713 // Optimize IFuncs whose callee's are statically known. 2714 LocalChange |= OptimizeStaticIFuncs(M); 2715 2716 // Optimize IFuncs based on the target features of the caller. 2717 LocalChange |= OptimizeNonTrivialIFuncs(M, GetTTI); 2718 2719 // Remove any IFuncs that are now dead. 2720 LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats); 2721 2722 Changed |= LocalChange; 2723 } 2724 2725 // TODO: Move all global ctors functions to the end of the module for code 2726 // layout. 2727 2728 return Changed; 2729 } 2730 2731 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { 2732 auto &DL = M.getDataLayout(); 2733 auto &FAM = 2734 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2735 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ 2736 return FAM.getResult<DominatorTreeAnalysis>(F); 2737 }; 2738 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2739 return FAM.getResult<TargetLibraryAnalysis>(F); 2740 }; 2741 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { 2742 return FAM.getResult<TargetIRAnalysis>(F); 2743 }; 2744 2745 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { 2746 return FAM.getResult<BlockFrequencyAnalysis>(F); 2747 }; 2748 auto ChangedCFGCallback = [&FAM](Function &F) { 2749 FAM.invalidate(F, PreservedAnalyses::none()); 2750 }; 2751 auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); }; 2752 2753 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, 2754 ChangedCFGCallback, DeleteFnCallback)) 2755 return PreservedAnalyses::all(); 2756 2757 PreservedAnalyses PA = PreservedAnalyses::none(); 2758 // We made sure to clear analyses for deleted functions. 2759 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 2760 // The only place we modify the CFG is when calling 2761 // removeUnreachableBlocks(), but there we make sure to invalidate analyses 2762 // for modified functions. 2763 PA.preserveSet<CFGAnalyses>(); 2764 return PA; 2765 } 2766