1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass transforms simple global variables that never have their address 10 // taken. If obviously true, it marks read/write globals as constant, deletes 11 // variables only stored to, etc. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/GlobalOpt.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/BlockFrequencyInfo.h" 24 #include "llvm/Analysis/ConstantFolding.h" 25 #include "llvm/Analysis/MemoryBuiltins.h" 26 #include "llvm/Analysis/TargetLibraryInfo.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/BinaryFormat/Dwarf.h" 30 #include "llvm/IR/Attributes.h" 31 #include "llvm/IR/BasicBlock.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/Constant.h" 34 #include "llvm/IR/Constants.h" 35 #include "llvm/IR/DataLayout.h" 36 #include "llvm/IR/DebugInfoMetadata.h" 37 #include "llvm/IR/DerivedTypes.h" 38 #include "llvm/IR/Dominators.h" 39 #include "llvm/IR/Function.h" 40 #include "llvm/IR/GlobalAlias.h" 41 #include "llvm/IR/GlobalValue.h" 42 #include "llvm/IR/GlobalVariable.h" 43 #include "llvm/IR/IRBuilder.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/IR/Operator.h" 50 #include "llvm/IR/Type.h" 51 #include "llvm/IR/Use.h" 52 #include "llvm/IR/User.h" 53 #include "llvm/IR/Value.h" 54 #include "llvm/IR/ValueHandle.h" 55 #include "llvm/Support/AtomicOrdering.h" 56 #include "llvm/Support/Casting.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/ErrorHandling.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include "llvm/Transforms/IPO.h" 62 #include "llvm/Transforms/Utils/CtorUtils.h" 63 #include "llvm/Transforms/Utils/Evaluator.h" 64 #include "llvm/Transforms/Utils/GlobalStatus.h" 65 #include "llvm/Transforms/Utils/Local.h" 66 #include <cassert> 67 #include <cstdint> 68 #include <optional> 69 #include <utility> 70 #include <vector> 71 72 using namespace llvm; 73 74 #define DEBUG_TYPE "globalopt" 75 76 STATISTIC(NumMarked , "Number of globals marked constant"); 77 STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); 78 STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); 79 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); 80 STATISTIC(NumDeleted , "Number of globals deleted"); 81 STATISTIC(NumGlobUses , "Number of global uses devirtualized"); 82 STATISTIC(NumLocalized , "Number of globals localized"); 83 STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); 84 STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); 85 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); 86 STATISTIC(NumNestRemoved , "Number of nest attributes removed"); 87 STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); 88 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); 89 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); 90 STATISTIC(NumAtExitRemoved, "Number of atexit handlers removed"); 91 STATISTIC(NumInternalFunc, "Number of internal functions"); 92 STATISTIC(NumColdCC, "Number of functions marked coldcc"); 93 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); 94 STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); 95 96 static cl::opt<bool> 97 EnableColdCCStressTest("enable-coldcc-stress-test", 98 cl::desc("Enable stress test of coldcc by adding " 99 "calling conv to all internal functions."), 100 cl::init(false), cl::Hidden); 101 102 static cl::opt<int> ColdCCRelFreq( 103 "coldcc-rel-freq", cl::Hidden, cl::init(2), 104 cl::desc( 105 "Maximum block frequency, expressed as a percentage of caller's " 106 "entry frequency, for a call site to be considered cold for enabling" 107 "coldcc")); 108 109 /// Is this global variable possibly used by a leak checker as a root? If so, 110 /// we might not really want to eliminate the stores to it. 111 static bool isLeakCheckerRoot(GlobalVariable *GV) { 112 // A global variable is a root if it is a pointer, or could plausibly contain 113 // a pointer. There are two challenges; one is that we could have a struct 114 // the has an inner member which is a pointer. We recurse through the type to 115 // detect these (up to a point). The other is that we may actually be a union 116 // of a pointer and another type, and so our LLVM type is an integer which 117 // gets converted into a pointer, or our type is an [i8 x #] with a pointer 118 // potentially contained here. 119 120 if (GV->hasPrivateLinkage()) 121 return false; 122 123 SmallVector<Type *, 4> Types; 124 Types.push_back(GV->getValueType()); 125 126 unsigned Limit = 20; 127 do { 128 Type *Ty = Types.pop_back_val(); 129 switch (Ty->getTypeID()) { 130 default: break; 131 case Type::PointerTyID: 132 return true; 133 case Type::FixedVectorTyID: 134 case Type::ScalableVectorTyID: 135 if (cast<VectorType>(Ty)->getElementType()->isPointerTy()) 136 return true; 137 break; 138 case Type::ArrayTyID: 139 Types.push_back(cast<ArrayType>(Ty)->getElementType()); 140 break; 141 case Type::StructTyID: { 142 StructType *STy = cast<StructType>(Ty); 143 if (STy->isOpaque()) return true; 144 for (Type *InnerTy : STy->elements()) { 145 if (isa<PointerType>(InnerTy)) return true; 146 if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) || 147 isa<VectorType>(InnerTy)) 148 Types.push_back(InnerTy); 149 } 150 break; 151 } 152 } 153 if (--Limit == 0) return true; 154 } while (!Types.empty()); 155 return false; 156 } 157 158 /// Given a value that is stored to a global but never read, determine whether 159 /// it's safe to remove the store and the chain of computation that feeds the 160 /// store. 161 static bool IsSafeComputationToRemove( 162 Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 163 do { 164 if (isa<Constant>(V)) 165 return true; 166 if (!V->hasOneUse()) 167 return false; 168 if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || 169 isa<GlobalValue>(V)) 170 return false; 171 if (isAllocationFn(V, GetTLI)) 172 return true; 173 174 Instruction *I = cast<Instruction>(V); 175 if (I->mayHaveSideEffects()) 176 return false; 177 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 178 if (!GEP->hasAllConstantIndices()) 179 return false; 180 } else if (I->getNumOperands() != 1) { 181 return false; 182 } 183 184 V = I->getOperand(0); 185 } while (true); 186 } 187 188 /// This GV is a pointer root. Loop over all users of the global and clean up 189 /// any that obviously don't assign the global a value that isn't dynamically 190 /// allocated. 191 static bool 192 CleanupPointerRootUsers(GlobalVariable *GV, 193 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 194 // A brief explanation of leak checkers. The goal is to find bugs where 195 // pointers are forgotten, causing an accumulating growth in memory 196 // usage over time. The common strategy for leak checkers is to explicitly 197 // allow the memory pointed to by globals at exit. This is popular because it 198 // also solves another problem where the main thread of a C++ program may shut 199 // down before other threads that are still expecting to use those globals. To 200 // handle that case, we expect the program may create a singleton and never 201 // destroy it. 202 203 bool Changed = false; 204 205 // If Dead[n].first is the only use of a malloc result, we can delete its 206 // chain of computation and the store to the global in Dead[n].second. 207 SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; 208 209 SmallVector<User *> Worklist(GV->users()); 210 // Constants can't be pointers to dynamically allocated memory. 211 while (!Worklist.empty()) { 212 User *U = Worklist.pop_back_val(); 213 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 214 Value *V = SI->getValueOperand(); 215 if (isa<Constant>(V)) { 216 Changed = true; 217 SI->eraseFromParent(); 218 } else if (Instruction *I = dyn_cast<Instruction>(V)) { 219 if (I->hasOneUse()) 220 Dead.push_back(std::make_pair(I, SI)); 221 } 222 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) { 223 if (isa<Constant>(MSI->getValue())) { 224 Changed = true; 225 MSI->eraseFromParent(); 226 } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) { 227 if (I->hasOneUse()) 228 Dead.push_back(std::make_pair(I, MSI)); 229 } 230 } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) { 231 GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); 232 if (MemSrc && MemSrc->isConstant()) { 233 Changed = true; 234 MTI->eraseFromParent(); 235 } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) { 236 if (I->hasOneUse()) 237 Dead.push_back(std::make_pair(I, MTI)); 238 } 239 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) { 240 if (isa<GEPOperator>(CE)) 241 append_range(Worklist, CE->users()); 242 } 243 } 244 245 for (int i = 0, e = Dead.size(); i != e; ++i) { 246 if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) { 247 Dead[i].second->eraseFromParent(); 248 Instruction *I = Dead[i].first; 249 do { 250 if (isAllocationFn(I, GetTLI)) 251 break; 252 Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); 253 if (!J) 254 break; 255 I->eraseFromParent(); 256 I = J; 257 } while (true); 258 I->eraseFromParent(); 259 Changed = true; 260 } 261 } 262 263 GV->removeDeadConstantUsers(); 264 return Changed; 265 } 266 267 /// We just marked GV constant. Loop over all users of the global, cleaning up 268 /// the obvious ones. This is largely just a quick scan over the use list to 269 /// clean up the easy and obvious cruft. This returns true if it made a change. 270 static bool CleanupConstantGlobalUsers(GlobalVariable *GV, 271 const DataLayout &DL) { 272 Constant *Init = GV->getInitializer(); 273 SmallVector<User *, 8> WorkList(GV->users()); 274 SmallPtrSet<User *, 8> Visited; 275 bool Changed = false; 276 277 SmallVector<WeakTrackingVH> MaybeDeadInsts; 278 auto EraseFromParent = [&](Instruction *I) { 279 for (Value *Op : I->operands()) 280 if (auto *OpI = dyn_cast<Instruction>(Op)) 281 MaybeDeadInsts.push_back(OpI); 282 I->eraseFromParent(); 283 Changed = true; 284 }; 285 while (!WorkList.empty()) { 286 User *U = WorkList.pop_back_val(); 287 if (!Visited.insert(U).second) 288 continue; 289 290 if (auto *BO = dyn_cast<BitCastOperator>(U)) 291 append_range(WorkList, BO->users()); 292 if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U)) 293 append_range(WorkList, ASC->users()); 294 else if (auto *GEP = dyn_cast<GEPOperator>(U)) 295 append_range(WorkList, GEP->users()); 296 else if (auto *LI = dyn_cast<LoadInst>(U)) { 297 // A load from a uniform value is always the same, regardless of any 298 // applied offset. 299 Type *Ty = LI->getType(); 300 if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty, DL)) { 301 LI->replaceAllUsesWith(Res); 302 EraseFromParent(LI); 303 continue; 304 } 305 306 Value *PtrOp = LI->getPointerOperand(); 307 APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0); 308 PtrOp = PtrOp->stripAndAccumulateConstantOffsets( 309 DL, Offset, /* AllowNonInbounds */ true); 310 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(PtrOp)) { 311 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) 312 PtrOp = II->getArgOperand(0); 313 } 314 if (PtrOp == GV) { 315 if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) { 316 LI->replaceAllUsesWith(Value); 317 EraseFromParent(LI); 318 } 319 } 320 } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 321 // Store must be unreachable or storing Init into the global. 322 EraseFromParent(SI); 323 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv 324 if (getUnderlyingObject(MI->getRawDest()) == GV) 325 EraseFromParent(MI); 326 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 327 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) 328 append_range(WorkList, II->users()); 329 } 330 } 331 332 Changed |= 333 RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts); 334 GV->removeDeadConstantUsers(); 335 return Changed; 336 } 337 338 /// Part of the global at a specific offset, which is only accessed through 339 /// loads and stores with the given type. 340 struct GlobalPart { 341 Type *Ty; 342 Constant *Initializer = nullptr; 343 bool IsLoaded = false; 344 bool IsStored = false; 345 }; 346 347 /// Look at all uses of the global and determine which (offset, type) pairs it 348 /// can be split into. 349 static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts, 350 GlobalVariable *GV, const DataLayout &DL) { 351 SmallVector<Use *, 16> Worklist; 352 SmallPtrSet<Use *, 16> Visited; 353 auto AppendUses = [&](Value *V) { 354 for (Use &U : V->uses()) 355 if (Visited.insert(&U).second) 356 Worklist.push_back(&U); 357 }; 358 AppendUses(GV); 359 while (!Worklist.empty()) { 360 Use *U = Worklist.pop_back_val(); 361 User *V = U->getUser(); 362 363 auto *GEP = dyn_cast<GEPOperator>(V); 364 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || 365 (GEP && GEP->hasAllConstantIndices())) { 366 AppendUses(V); 367 continue; 368 } 369 370 if (Value *Ptr = getLoadStorePointerOperand(V)) { 371 // This is storing the global address into somewhere, not storing into 372 // the global. 373 if (isa<StoreInst>(V) && U->getOperandNo() == 0) 374 return false; 375 376 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 377 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 378 /* AllowNonInbounds */ true); 379 if (Ptr != GV || Offset.getActiveBits() >= 64) 380 return false; 381 382 // TODO: We currently require that all accesses at a given offset must 383 // use the same type. This could be relaxed. 384 Type *Ty = getLoadStoreType(V); 385 const auto &[It, Inserted] = 386 Parts.try_emplace(Offset.getZExtValue(), GlobalPart{Ty}); 387 if (Ty != It->second.Ty) 388 return false; 389 390 if (Inserted) { 391 It->second.Initializer = 392 ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL); 393 if (!It->second.Initializer) { 394 LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of " 395 << *GV << " with type " << *Ty << " at offset " 396 << Offset.getZExtValue()); 397 return false; 398 } 399 } 400 401 // Scalable types not currently supported. 402 if (Ty->isScalableTy()) 403 return false; 404 405 auto IsStored = [](Value *V, Constant *Initializer) { 406 auto *SI = dyn_cast<StoreInst>(V); 407 if (!SI) 408 return false; 409 410 Constant *StoredConst = dyn_cast<Constant>(SI->getOperand(0)); 411 if (!StoredConst) 412 return true; 413 414 // Don't consider stores that only write the initializer value. 415 return Initializer != StoredConst; 416 }; 417 418 It->second.IsLoaded |= isa<LoadInst>(V); 419 It->second.IsStored |= IsStored(V, It->second.Initializer); 420 continue; 421 } 422 423 // Ignore dead constant users. 424 if (auto *C = dyn_cast<Constant>(V)) { 425 if (!isSafeToDestroyConstant(C)) 426 return false; 427 continue; 428 } 429 430 // Unknown user. 431 return false; 432 } 433 434 return true; 435 } 436 437 /// Copy over the debug info for a variable to its SRA replacements. 438 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, 439 uint64_t FragmentOffsetInBits, 440 uint64_t FragmentSizeInBits, 441 uint64_t VarSize) { 442 SmallVector<DIGlobalVariableExpression *, 1> GVs; 443 GV->getDebugInfo(GVs); 444 for (auto *GVE : GVs) { 445 DIVariable *Var = GVE->getVariable(); 446 DIExpression *Expr = GVE->getExpression(); 447 int64_t CurVarOffsetInBytes = 0; 448 uint64_t CurVarOffsetInBits = 0; 449 uint64_t FragmentEndInBits = FragmentOffsetInBits + FragmentSizeInBits; 450 451 // Calculate the offset (Bytes), Continue if unknown. 452 if (!Expr->extractIfOffset(CurVarOffsetInBytes)) 453 continue; 454 455 // Ignore negative offset. 456 if (CurVarOffsetInBytes < 0) 457 continue; 458 459 // Convert offset to bits. 460 CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes; 461 462 // Current var starts after the fragment, ignore. 463 if (CurVarOffsetInBits >= FragmentEndInBits) 464 continue; 465 466 uint64_t CurVarSize = Var->getType()->getSizeInBits(); 467 uint64_t CurVarEndInBits = CurVarOffsetInBits + CurVarSize; 468 // Current variable ends before start of fragment, ignore. 469 if (CurVarSize != 0 && /* CurVarSize is known */ 470 CurVarEndInBits <= FragmentOffsetInBits) 471 continue; 472 473 // Current variable fits in (not greater than) the fragment, 474 // does not need fragment expression. 475 if (CurVarSize != 0 && /* CurVarSize is known */ 476 CurVarOffsetInBits >= FragmentOffsetInBits && 477 CurVarEndInBits <= FragmentEndInBits) { 478 uint64_t CurVarOffsetInFragment = 479 (CurVarOffsetInBits - FragmentOffsetInBits) / 8; 480 if (CurVarOffsetInFragment != 0) 481 Expr = DIExpression::get(Expr->getContext(), {dwarf::DW_OP_plus_uconst, 482 CurVarOffsetInFragment}); 483 else 484 Expr = DIExpression::get(Expr->getContext(), {}); 485 auto *NGVE = 486 DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); 487 NGV->addDebugInfo(NGVE); 488 continue; 489 } 490 // Current variable does not fit in single fragment, 491 // emit a fragment expression. 492 if (FragmentSizeInBits < VarSize) { 493 if (CurVarOffsetInBits > FragmentOffsetInBits) 494 continue; 495 uint64_t CurVarFragmentOffsetInBits = 496 FragmentOffsetInBits - CurVarOffsetInBits; 497 uint64_t CurVarFragmentSizeInBits = FragmentSizeInBits; 498 if (CurVarSize != 0 && CurVarEndInBits < FragmentEndInBits) 499 CurVarFragmentSizeInBits -= (FragmentEndInBits - CurVarEndInBits); 500 if (CurVarOffsetInBits) 501 Expr = DIExpression::get(Expr->getContext(), {}); 502 if (auto E = DIExpression::createFragmentExpression( 503 Expr, CurVarFragmentOffsetInBits, CurVarFragmentSizeInBits)) 504 Expr = *E; 505 else 506 continue; 507 } 508 auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); 509 NGV->addDebugInfo(NGVE); 510 } 511 } 512 513 /// Perform scalar replacement of aggregates on the specified global variable. 514 /// This opens the door for other optimizations by exposing the behavior of the 515 /// program in a more fine-grained way. We have determined that this 516 /// transformation is safe already. We return the first global variable we 517 /// insert so that the caller can reprocess it. 518 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { 519 assert(GV->hasLocalLinkage()); 520 521 // Collect types to split into. 522 DenseMap<uint64_t, GlobalPart> Parts; 523 if (!collectSRATypes(Parts, GV, DL) || Parts.empty()) 524 return nullptr; 525 526 // Make sure we don't SRA back to the same type. 527 if (Parts.size() == 1 && Parts.begin()->second.Ty == GV->getValueType()) 528 return nullptr; 529 530 // Don't perform SRA if we would have to split into many globals. Ignore 531 // parts that are either only loaded or only stored, because we expect them 532 // to be optimized away. 533 unsigned NumParts = count_if(Parts, [](const auto &Pair) { 534 return Pair.second.IsLoaded && Pair.second.IsStored; 535 }); 536 if (NumParts > 16) 537 return nullptr; 538 539 // Sort by offset. 540 SmallVector<std::tuple<uint64_t, Type *, Constant *>, 16> TypesVector; 541 for (const auto &Pair : Parts) { 542 TypesVector.push_back( 543 {Pair.first, Pair.second.Ty, Pair.second.Initializer}); 544 } 545 sort(TypesVector, llvm::less_first()); 546 547 // Check that the types are non-overlapping. 548 uint64_t Offset = 0; 549 for (const auto &[OffsetForTy, Ty, _] : TypesVector) { 550 // Overlaps with previous type. 551 if (OffsetForTy < Offset) 552 return nullptr; 553 554 Offset = OffsetForTy + DL.getTypeAllocSize(Ty); 555 } 556 557 // Some accesses go beyond the end of the global, don't bother. 558 if (Offset > DL.getTypeAllocSize(GV->getValueType())) 559 return nullptr; 560 561 LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); 562 563 // Get the alignment of the global, either explicit or target-specific. 564 Align StartAlignment = 565 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); 566 uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType()); 567 568 // Create replacement globals. 569 DenseMap<uint64_t, GlobalVariable *> NewGlobals; 570 unsigned NameSuffix = 0; 571 for (auto &[OffsetForTy, Ty, Initializer] : TypesVector) { 572 GlobalVariable *NGV = new GlobalVariable( 573 *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage, 574 Initializer, GV->getName() + "." + Twine(NameSuffix++), GV, 575 GV->getThreadLocalMode(), GV->getAddressSpace()); 576 NGV->copyAttributesFrom(GV); 577 NewGlobals.insert({OffsetForTy, NGV}); 578 579 // Calculate the known alignment of the field. If the original aggregate 580 // had 256 byte alignment for example, something might depend on that: 581 // propagate info to each field. 582 Align NewAlign = commonAlignment(StartAlignment, OffsetForTy); 583 if (NewAlign > DL.getABITypeAlign(Ty)) 584 NGV->setAlignment(NewAlign); 585 586 // Copy over the debug info for the variable. 587 transferSRADebugInfo(GV, NGV, OffsetForTy * 8, 588 DL.getTypeAllocSizeInBits(Ty), VarSize); 589 } 590 591 // Replace uses of the original global with uses of the new global. 592 SmallVector<Value *, 16> Worklist; 593 SmallPtrSet<Value *, 16> Visited; 594 SmallVector<WeakTrackingVH, 16> DeadInsts; 595 auto AppendUsers = [&](Value *V) { 596 for (User *U : V->users()) 597 if (Visited.insert(U).second) 598 Worklist.push_back(U); 599 }; 600 AppendUsers(GV); 601 while (!Worklist.empty()) { 602 Value *V = Worklist.pop_back_val(); 603 if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) || 604 isa<GEPOperator>(V)) { 605 AppendUsers(V); 606 if (isa<Instruction>(V)) 607 DeadInsts.push_back(V); 608 continue; 609 } 610 611 if (Value *Ptr = getLoadStorePointerOperand(V)) { 612 APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); 613 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, 614 /* AllowNonInbounds */ true); 615 assert(Ptr == GV && "Load/store must be from/to global"); 616 GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()]; 617 assert(NGV && "Must have replacement global for this offset"); 618 619 // Update the pointer operand and recalculate alignment. 620 Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V)); 621 Align NewAlign = 622 getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V)); 623 624 if (auto *LI = dyn_cast<LoadInst>(V)) { 625 LI->setOperand(0, NGV); 626 LI->setAlignment(NewAlign); 627 } else { 628 auto *SI = cast<StoreInst>(V); 629 SI->setOperand(1, NGV); 630 SI->setAlignment(NewAlign); 631 } 632 continue; 633 } 634 635 assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) && 636 "Other users can only be dead constants"); 637 } 638 639 // Delete old instructions and global. 640 RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); 641 GV->removeDeadConstantUsers(); 642 GV->eraseFromParent(); 643 ++NumSRA; 644 645 assert(NewGlobals.size() > 0); 646 return NewGlobals.begin()->second; 647 } 648 649 /// Return true if all users of the specified value will trap if the value is 650 /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid 651 /// reprocessing them. 652 static bool AllUsesOfValueWillTrapIfNull(const Value *V, 653 SmallPtrSetImpl<const PHINode*> &PHIs) { 654 for (const User *U : V->users()) { 655 if (const Instruction *I = dyn_cast<Instruction>(U)) { 656 // If null pointer is considered valid, then all uses are non-trapping. 657 // Non address-space 0 globals have already been pruned by the caller. 658 if (NullPointerIsDefined(I->getFunction())) 659 return false; 660 } 661 if (isa<LoadInst>(U)) { 662 // Will trap. 663 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 664 if (SI->getOperand(0) == V) { 665 return false; // Storing the value. 666 } 667 } else if (const CallInst *CI = dyn_cast<CallInst>(U)) { 668 if (CI->getCalledOperand() != V) { 669 return false; // Not calling the ptr 670 } 671 } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) { 672 if (II->getCalledOperand() != V) { 673 return false; // Not calling the ptr 674 } 675 } else if (const AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(U)) { 676 if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) 677 return false; 678 } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) { 679 if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false; 680 } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { 681 // If we've already seen this phi node, ignore it, it has already been 682 // checked. 683 if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs)) 684 return false; 685 } else if (isa<ICmpInst>(U) && 686 !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) && 687 isa<LoadInst>(U->getOperand(0)) && 688 isa<ConstantPointerNull>(U->getOperand(1))) { 689 assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0)) 690 ->getPointerOperand() 691 ->stripPointerCasts()) && 692 "Should be GlobalVariable"); 693 // This and only this kind of non-signed ICmpInst is to be replaced with 694 // the comparing of the value of the created global init bool later in 695 // optimizeGlobalAddressOfAllocation for the global variable. 696 } else { 697 return false; 698 } 699 } 700 return true; 701 } 702 703 /// Return true if all uses of any loads from GV will trap if the loaded value 704 /// is null. Note that this also permits comparisons of the loaded value 705 /// against null, as a special case. 706 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { 707 SmallVector<const Value *, 4> Worklist; 708 Worklist.push_back(GV); 709 while (!Worklist.empty()) { 710 const Value *P = Worklist.pop_back_val(); 711 for (const auto *U : P->users()) { 712 if (auto *LI = dyn_cast<LoadInst>(U)) { 713 SmallPtrSet<const PHINode *, 8> PHIs; 714 if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) 715 return false; 716 } else if (auto *SI = dyn_cast<StoreInst>(U)) { 717 // Ignore stores to the global. 718 if (SI->getPointerOperand() != P) 719 return false; 720 } else if (auto *CE = dyn_cast<ConstantExpr>(U)) { 721 if (CE->stripPointerCasts() != GV) 722 return false; 723 // Check further the ConstantExpr. 724 Worklist.push_back(CE); 725 } else { 726 // We don't know or understand this user, bail out. 727 return false; 728 } 729 } 730 } 731 732 return true; 733 } 734 735 /// Get all the loads/store uses for global variable \p GV. 736 static void allUsesOfLoadAndStores(GlobalVariable *GV, 737 SmallVector<Value *, 4> &Uses) { 738 SmallVector<Value *, 4> Worklist; 739 Worklist.push_back(GV); 740 while (!Worklist.empty()) { 741 auto *P = Worklist.pop_back_val(); 742 for (auto *U : P->users()) { 743 if (auto *CE = dyn_cast<ConstantExpr>(U)) { 744 Worklist.push_back(CE); 745 continue; 746 } 747 748 assert((isa<LoadInst>(U) || isa<StoreInst>(U)) && 749 "Expect only load or store instructions"); 750 Uses.push_back(U); 751 } 752 } 753 } 754 755 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { 756 bool Changed = false; 757 for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) { 758 Instruction *I = cast<Instruction>(*UI++); 759 // Uses are non-trapping if null pointer is considered valid. 760 // Non address-space 0 globals are already pruned by the caller. 761 if (NullPointerIsDefined(I->getFunction())) 762 return false; 763 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 764 LI->setOperand(0, NewV); 765 Changed = true; 766 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 767 if (SI->getOperand(1) == V) { 768 SI->setOperand(1, NewV); 769 Changed = true; 770 } 771 } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 772 CallBase *CB = cast<CallBase>(I); 773 if (CB->getCalledOperand() == V) { 774 // Calling through the pointer! Turn into a direct call, but be careful 775 // that the pointer is not also being passed as an argument. 776 CB->setCalledOperand(NewV); 777 Changed = true; 778 bool PassedAsArg = false; 779 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) 780 if (CB->getArgOperand(i) == V) { 781 PassedAsArg = true; 782 CB->setArgOperand(i, NewV); 783 } 784 785 if (PassedAsArg) { 786 // Being passed as an argument also. Be careful to not invalidate UI! 787 UI = V->user_begin(); 788 } 789 } 790 } else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(I)) { 791 Changed |= OptimizeAwayTrappingUsesOfValue( 792 CI, ConstantExpr::getAddrSpaceCast(NewV, CI->getType())); 793 if (CI->use_empty()) { 794 Changed = true; 795 CI->eraseFromParent(); 796 } 797 } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 798 // Should handle GEP here. 799 SmallVector<Constant*, 8> Idxs; 800 Idxs.reserve(GEPI->getNumOperands()-1); 801 for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); 802 i != e; ++i) 803 if (Constant *C = dyn_cast<Constant>(*i)) 804 Idxs.push_back(C); 805 else 806 break; 807 if (Idxs.size() == GEPI->getNumOperands()-1) 808 Changed |= OptimizeAwayTrappingUsesOfValue( 809 GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(), 810 NewV, Idxs)); 811 if (GEPI->use_empty()) { 812 Changed = true; 813 GEPI->eraseFromParent(); 814 } 815 } 816 } 817 818 return Changed; 819 } 820 821 /// The specified global has only one non-null value stored into it. If there 822 /// are uses of the loaded value that would trap if the loaded value is 823 /// dynamically null, then we know that they cannot be reachable with a null 824 /// optimize away the load. 825 static bool OptimizeAwayTrappingUsesOfLoads( 826 GlobalVariable *GV, Constant *LV, const DataLayout &DL, 827 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 828 bool Changed = false; 829 830 // Keep track of whether we are able to remove all the uses of the global 831 // other than the store that defines it. 832 bool AllNonStoreUsesGone = true; 833 834 // Replace all uses of loads with uses of uses of the stored value. 835 for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) { 836 if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { 837 Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); 838 // If we were able to delete all uses of the loads 839 if (LI->use_empty()) { 840 LI->eraseFromParent(); 841 Changed = true; 842 } else { 843 AllNonStoreUsesGone = false; 844 } 845 } else if (isa<StoreInst>(GlobalUser)) { 846 // Ignore the store that stores "LV" to the global. 847 assert(GlobalUser->getOperand(1) == GV && 848 "Must be storing *to* the global"); 849 } else { 850 AllNonStoreUsesGone = false; 851 852 // If we get here we could have other crazy uses that are transitively 853 // loaded. 854 assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || 855 isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || 856 isa<BitCastInst>(GlobalUser) || 857 isa<GetElementPtrInst>(GlobalUser) || 858 isa<AddrSpaceCastInst>(GlobalUser)) && 859 "Only expect load and stores!"); 860 } 861 } 862 863 if (Changed) { 864 LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV 865 << "\n"); 866 ++NumGlobUses; 867 } 868 869 // If we nuked all of the loads, then none of the stores are needed either, 870 // nor is the global. 871 if (AllNonStoreUsesGone) { 872 if (isLeakCheckerRoot(GV)) { 873 Changed |= CleanupPointerRootUsers(GV, GetTLI); 874 } else { 875 Changed = true; 876 CleanupConstantGlobalUsers(GV, DL); 877 } 878 if (GV->use_empty()) { 879 LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); 880 Changed = true; 881 GV->eraseFromParent(); 882 ++NumDeleted; 883 } 884 } 885 return Changed; 886 } 887 888 /// Walk the use list of V, constant folding all of the instructions that are 889 /// foldable. 890 static void ConstantPropUsersOf(Value *V, const DataLayout &DL, 891 TargetLibraryInfo *TLI) { 892 for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) 893 if (Instruction *I = dyn_cast<Instruction>(*UI++)) 894 if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) { 895 I->replaceAllUsesWith(NewC); 896 897 // Advance UI to the next non-I use to avoid invalidating it! 898 // Instructions could multiply use V. 899 while (UI != E && *UI == I) 900 ++UI; 901 if (isInstructionTriviallyDead(I, TLI)) 902 I->eraseFromParent(); 903 } 904 } 905 906 /// This function takes the specified global variable, and transforms the 907 /// program as if it always contained the result of the specified malloc. 908 /// Because it is always the result of the specified malloc, there is no reason 909 /// to actually DO the malloc. Instead, turn the malloc into a global, and any 910 /// loads of GV as uses of the new global. 911 static GlobalVariable * 912 OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI, 913 uint64_t AllocSize, Constant *InitVal, 914 const DataLayout &DL, 915 TargetLibraryInfo *TLI) { 916 LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI 917 << '\n'); 918 919 // Create global of type [AllocSize x i8]. 920 Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()), 921 AllocSize); 922 923 // Create the new global variable. The contents of the allocated memory is 924 // undefined initially, so initialize with an undef value. 925 GlobalVariable *NewGV = new GlobalVariable( 926 *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, 927 UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, 928 GV->getThreadLocalMode()); 929 930 // Initialize the global at the point of the original call. Note that this 931 // is a different point from the initialization referred to below for the 932 // nullability handling. Sublety: We have not proven the original global was 933 // only initialized once. As such, we can not fold this into the initializer 934 // of the new global as may need to re-init the storage multiple times. 935 if (!isa<UndefValue>(InitVal)) { 936 IRBuilder<> Builder(CI->getNextNode()); 937 // TODO: Use alignment above if align!=1 938 Builder.CreateMemSet(NewGV, InitVal, AllocSize, std::nullopt); 939 } 940 941 // Update users of the allocation to use the new global instead. 942 CI->replaceAllUsesWith(NewGV); 943 944 // If there is a comparison against null, we will insert a global bool to 945 // keep track of whether the global was initialized yet or not. 946 GlobalVariable *InitBool = 947 new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, 948 GlobalValue::InternalLinkage, 949 ConstantInt::getFalse(GV->getContext()), 950 GV->getName()+".init", GV->getThreadLocalMode()); 951 bool InitBoolUsed = false; 952 953 // Loop over all instruction uses of GV, processing them in turn. 954 SmallVector<Value *, 4> Guses; 955 allUsesOfLoadAndStores(GV, Guses); 956 for (auto *U : Guses) { 957 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 958 // The global is initialized when the store to it occurs. If the stored 959 // value is null value, the global bool is set to false, otherwise true. 960 new StoreInst(ConstantInt::getBool( 961 GV->getContext(), 962 !isa<ConstantPointerNull>(SI->getValueOperand())), 963 InitBool, false, Align(1), SI->getOrdering(), 964 SI->getSyncScopeID(), SI->getIterator()); 965 SI->eraseFromParent(); 966 continue; 967 } 968 969 LoadInst *LI = cast<LoadInst>(U); 970 while (!LI->use_empty()) { 971 Use &LoadUse = *LI->use_begin(); 972 ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); 973 if (!ICI) { 974 LoadUse.set(NewGV); 975 continue; 976 } 977 978 // Replace the cmp X, 0 with a use of the bool value. 979 Value *LV = new LoadInst(InitBool->getValueType(), InitBool, 980 InitBool->getName() + ".val", false, Align(1), 981 LI->getOrdering(), LI->getSyncScopeID(), 982 LI->getIterator()); 983 InitBoolUsed = true; 984 switch (ICI->getPredicate()) { 985 default: llvm_unreachable("Unknown ICmp Predicate!"); 986 case ICmpInst::ICMP_ULT: // X < null -> always false 987 LV = ConstantInt::getFalse(GV->getContext()); 988 break; 989 case ICmpInst::ICMP_UGE: // X >= null -> always true 990 LV = ConstantInt::getTrue(GV->getContext()); 991 break; 992 case ICmpInst::ICMP_ULE: 993 case ICmpInst::ICMP_EQ: 994 LV = BinaryOperator::CreateNot(LV, "notinit", ICI->getIterator()); 995 break; 996 case ICmpInst::ICMP_NE: 997 case ICmpInst::ICMP_UGT: 998 break; // no change. 999 } 1000 ICI->replaceAllUsesWith(LV); 1001 ICI->eraseFromParent(); 1002 } 1003 LI->eraseFromParent(); 1004 } 1005 1006 // If the initialization boolean was used, insert it, otherwise delete it. 1007 if (!InitBoolUsed) { 1008 while (!InitBool->use_empty()) // Delete initializations 1009 cast<StoreInst>(InitBool->user_back())->eraseFromParent(); 1010 delete InitBool; 1011 } else 1012 GV->getParent()->insertGlobalVariable(GV->getIterator(), InitBool); 1013 1014 // Now the GV is dead, nuke it and the allocation.. 1015 GV->eraseFromParent(); 1016 CI->eraseFromParent(); 1017 1018 // To further other optimizations, loop over all users of NewGV and try to 1019 // constant prop them. This will promote GEP instructions with constant 1020 // indices into GEP constant-exprs, which will allow global-opt to hack on it. 1021 ConstantPropUsersOf(NewGV, DL, TLI); 1022 1023 return NewGV; 1024 } 1025 1026 /// Scan the use-list of GV checking to make sure that there are no complex uses 1027 /// of GV. We permit simple things like dereferencing the pointer, but not 1028 /// storing through the address, unless it is to the specified global. 1029 static bool 1030 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI, 1031 const GlobalVariable *GV) { 1032 SmallPtrSet<const Value *, 4> Visited; 1033 SmallVector<const Value *, 4> Worklist; 1034 Worklist.push_back(CI); 1035 1036 while (!Worklist.empty()) { 1037 const Value *V = Worklist.pop_back_val(); 1038 if (!Visited.insert(V).second) 1039 continue; 1040 1041 for (const Use &VUse : V->uses()) { 1042 const User *U = VUse.getUser(); 1043 if (isa<LoadInst>(U) || isa<CmpInst>(U)) 1044 continue; // Fine, ignore. 1045 1046 if (auto *SI = dyn_cast<StoreInst>(U)) { 1047 if (SI->getValueOperand() == V && 1048 SI->getPointerOperand()->stripPointerCasts() != GV) 1049 return false; // Storing the pointer not into GV... bad. 1050 continue; // Otherwise, storing through it, or storing into GV... fine. 1051 } 1052 1053 if (auto *BCI = dyn_cast<BitCastInst>(U)) { 1054 Worklist.push_back(BCI); 1055 continue; 1056 } 1057 1058 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) { 1059 Worklist.push_back(GEPI); 1060 continue; 1061 } 1062 1063 return false; 1064 } 1065 } 1066 1067 return true; 1068 } 1069 1070 /// If we have a global that is only initialized with a fixed size allocation 1071 /// try to transform the program to use global memory instead of heap 1072 /// allocated memory. This eliminates dynamic allocation, avoids an indirection 1073 /// accessing the data, and exposes the resultant global to further GlobalOpt. 1074 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV, 1075 CallInst *CI, 1076 const DataLayout &DL, 1077 TargetLibraryInfo *TLI) { 1078 if (!isRemovableAlloc(CI, TLI)) 1079 // Must be able to remove the call when we get done.. 1080 return false; 1081 1082 Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext()); 1083 Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty); 1084 if (!InitVal) 1085 // Must be able to emit a memset for initialization 1086 return false; 1087 1088 uint64_t AllocSize; 1089 if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts())) 1090 return false; 1091 1092 // Restrict this transformation to only working on small allocations 1093 // (2048 bytes currently), as we don't want to introduce a 16M global or 1094 // something. 1095 if (AllocSize >= 2048) 1096 return false; 1097 1098 // We can't optimize this global unless all uses of it are *known* to be 1099 // of the malloc value, not of the null initializer value (consider a use 1100 // that compares the global's value against zero to see if the malloc has 1101 // been reached). To do this, we check to see if all uses of the global 1102 // would trap if the global were null: this proves that they must all 1103 // happen after the malloc. 1104 if (!allUsesOfLoadedValueWillTrapIfNull(GV)) 1105 return false; 1106 1107 // We can't optimize this if the malloc itself is used in a complex way, 1108 // for example, being stored into multiple globals. This allows the 1109 // malloc to be stored into the specified global, loaded, gep, icmp'd. 1110 // These are all things we could transform to using the global for. 1111 if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV)) 1112 return false; 1113 1114 OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI); 1115 return true; 1116 } 1117 1118 // Try to optimize globals based on the knowledge that only one value (besides 1119 // its initializer) is ever stored to the global. 1120 static bool 1121 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, 1122 const DataLayout &DL, 1123 function_ref<TargetLibraryInfo &(Function &)> GetTLI) { 1124 // Ignore no-op GEPs and bitcasts. 1125 StoredOnceVal = StoredOnceVal->stripPointerCasts(); 1126 1127 // If we are dealing with a pointer global that is initialized to null and 1128 // only has one (non-null) value stored into it, then we can optimize any 1129 // users of the loaded value (often calls and loads) that would trap if the 1130 // value was null. 1131 if (GV->getInitializer()->getType()->isPointerTy() && 1132 GV->getInitializer()->isNullValue() && 1133 StoredOnceVal->getType()->isPointerTy() && 1134 !NullPointerIsDefined( 1135 nullptr /* F */, 1136 GV->getInitializer()->getType()->getPointerAddressSpace())) { 1137 if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) { 1138 // Optimize away any trapping uses of the loaded value. 1139 if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI)) 1140 return true; 1141 } else if (isAllocationFn(StoredOnceVal, GetTLI)) { 1142 if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) { 1143 auto *TLI = &GetTLI(*CI->getFunction()); 1144 if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI)) 1145 return true; 1146 } 1147 } 1148 } 1149 1150 return false; 1151 } 1152 1153 /// At this point, we have learned that the only two values ever stored into GV 1154 /// are its initializer and OtherVal. See if we can shrink the global into a 1155 /// boolean and select between the two values whenever it is used. This exposes 1156 /// the values to other scalar optimizations. 1157 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { 1158 Type *GVElType = GV->getValueType(); 1159 1160 // If GVElType is already i1, it is already shrunk. If the type of the GV is 1161 // an FP value, pointer or vector, don't do this optimization because a select 1162 // between them is very expensive and unlikely to lead to later 1163 // simplification. In these cases, we typically end up with "cond ? v1 : v2" 1164 // where v1 and v2 both require constant pool loads, a big loss. 1165 if (GVElType == Type::getInt1Ty(GV->getContext()) || 1166 GVElType->isFloatingPointTy() || 1167 GVElType->isPointerTy() || GVElType->isVectorTy()) 1168 return false; 1169 1170 // Walk the use list of the global seeing if all the uses are load or store. 1171 // If there is anything else, bail out. 1172 for (User *U : GV->users()) { 1173 if (!isa<LoadInst>(U) && !isa<StoreInst>(U)) 1174 return false; 1175 if (getLoadStoreType(U) != GVElType) 1176 return false; 1177 } 1178 1179 LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); 1180 1181 // Create the new global, initializing it to false. 1182 GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), 1183 false, 1184 GlobalValue::InternalLinkage, 1185 ConstantInt::getFalse(GV->getContext()), 1186 GV->getName()+".b", 1187 GV->getThreadLocalMode(), 1188 GV->getType()->getAddressSpace()); 1189 NewGV->copyAttributesFrom(GV); 1190 GV->getParent()->insertGlobalVariable(GV->getIterator(), NewGV); 1191 1192 Constant *InitVal = GV->getInitializer(); 1193 assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && 1194 "No reason to shrink to bool!"); 1195 1196 SmallVector<DIGlobalVariableExpression *, 1> GVs; 1197 GV->getDebugInfo(GVs); 1198 1199 // If initialized to zero and storing one into the global, we can use a cast 1200 // instead of a select to synthesize the desired value. 1201 bool IsOneZero = false; 1202 bool EmitOneOrZero = true; 1203 auto *CI = dyn_cast<ConstantInt>(OtherVal); 1204 if (CI && CI->getValue().getActiveBits() <= 64) { 1205 IsOneZero = InitVal->isNullValue() && CI->isOne(); 1206 1207 auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer()); 1208 if (CIInit && CIInit->getValue().getActiveBits() <= 64) { 1209 uint64_t ValInit = CIInit->getZExtValue(); 1210 uint64_t ValOther = CI->getZExtValue(); 1211 uint64_t ValMinus = ValOther - ValInit; 1212 1213 for(auto *GVe : GVs){ 1214 DIGlobalVariable *DGV = GVe->getVariable(); 1215 DIExpression *E = GVe->getExpression(); 1216 const DataLayout &DL = GV->getDataLayout(); 1217 unsigned SizeInOctets = 1218 DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8; 1219 1220 // It is expected that the address of global optimized variable is on 1221 // top of the stack. After optimization, value of that variable will 1222 // be ether 0 for initial value or 1 for other value. The following 1223 // expression should return constant integer value depending on the 1224 // value at global object address: 1225 // val * (ValOther - ValInit) + ValInit: 1226 // DW_OP_deref DW_OP_constu <ValMinus> 1227 // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value 1228 SmallVector<uint64_t, 12> Ops = { 1229 dwarf::DW_OP_deref_size, SizeInOctets, 1230 dwarf::DW_OP_constu, ValMinus, 1231 dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit, 1232 dwarf::DW_OP_plus}; 1233 bool WithStackValue = true; 1234 E = DIExpression::prependOpcodes(E, Ops, WithStackValue); 1235 DIGlobalVariableExpression *DGVE = 1236 DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); 1237 NewGV->addDebugInfo(DGVE); 1238 } 1239 EmitOneOrZero = false; 1240 } 1241 } 1242 1243 if (EmitOneOrZero) { 1244 // FIXME: This will only emit address for debugger on which will 1245 // be written only 0 or 1. 1246 for(auto *GV : GVs) 1247 NewGV->addDebugInfo(GV); 1248 } 1249 1250 while (!GV->use_empty()) { 1251 Instruction *UI = cast<Instruction>(GV->user_back()); 1252 if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 1253 // Change the store into a boolean store. 1254 bool StoringOther = SI->getOperand(0) == OtherVal; 1255 // Only do this if we weren't storing a loaded value. 1256 Value *StoreVal; 1257 if (StoringOther || SI->getOperand(0) == InitVal) { 1258 StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), 1259 StoringOther); 1260 } else { 1261 // Otherwise, we are storing a previously loaded copy. To do this, 1262 // change the copy from copying the original value to just copying the 1263 // bool. 1264 Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); 1265 1266 // If we've already replaced the input, StoredVal will be a cast or 1267 // select instruction. If not, it will be a load of the original 1268 // global. 1269 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 1270 assert(LI->getOperand(0) == GV && "Not a copy!"); 1271 // Insert a new load, to preserve the saved value. 1272 StoreVal = 1273 new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b", 1274 false, Align(1), LI->getOrdering(), 1275 LI->getSyncScopeID(), LI->getIterator()); 1276 } else { 1277 assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && 1278 "This is not a form that we understand!"); 1279 StoreVal = StoredVal->getOperand(0); 1280 assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); 1281 } 1282 } 1283 StoreInst *NSI = 1284 new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(), 1285 SI->getSyncScopeID(), SI->getIterator()); 1286 NSI->setDebugLoc(SI->getDebugLoc()); 1287 } else { 1288 // Change the load into a load of bool then a select. 1289 LoadInst *LI = cast<LoadInst>(UI); 1290 LoadInst *NLI = new LoadInst( 1291 NewGV->getValueType(), NewGV, LI->getName() + ".b", false, Align(1), 1292 LI->getOrdering(), LI->getSyncScopeID(), LI->getIterator()); 1293 Instruction *NSI; 1294 if (IsOneZero) 1295 NSI = new ZExtInst(NLI, LI->getType(), "", LI->getIterator()); 1296 else 1297 NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI->getIterator()); 1298 NSI->takeName(LI); 1299 // Since LI is split into two instructions, NLI and NSI both inherit the 1300 // same DebugLoc 1301 NLI->setDebugLoc(LI->getDebugLoc()); 1302 NSI->setDebugLoc(LI->getDebugLoc()); 1303 LI->replaceAllUsesWith(NSI); 1304 } 1305 UI->eraseFromParent(); 1306 } 1307 1308 // Retain the name of the old global variable. People who are debugging their 1309 // programs may expect these variables to be named the same. 1310 NewGV->takeName(GV); 1311 GV->eraseFromParent(); 1312 return true; 1313 } 1314 1315 static bool 1316 deleteIfDead(GlobalValue &GV, 1317 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, 1318 function_ref<void(Function &)> DeleteFnCallback = nullptr) { 1319 GV.removeDeadConstantUsers(); 1320 1321 if (!GV.isDiscardableIfUnused() && !GV.isDeclaration()) 1322 return false; 1323 1324 if (const Comdat *C = GV.getComdat()) 1325 if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C)) 1326 return false; 1327 1328 bool Dead; 1329 if (auto *F = dyn_cast<Function>(&GV)) 1330 Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead(); 1331 else 1332 Dead = GV.use_empty(); 1333 if (!Dead) 1334 return false; 1335 1336 LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n"); 1337 if (auto *F = dyn_cast<Function>(&GV)) { 1338 if (DeleteFnCallback) 1339 DeleteFnCallback(*F); 1340 } 1341 GV.eraseFromParent(); 1342 ++NumDeleted; 1343 return true; 1344 } 1345 1346 static bool isPointerValueDeadOnEntryToFunction( 1347 const Function *F, GlobalValue *GV, 1348 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1349 // Find all uses of GV. We expect them all to be in F, and if we can't 1350 // identify any of the uses we bail out. 1351 // 1352 // On each of these uses, identify if the memory that GV points to is 1353 // used/required/live at the start of the function. If it is not, for example 1354 // if the first thing the function does is store to the GV, the GV can 1355 // possibly be demoted. 1356 // 1357 // We don't do an exhaustive search for memory operations - simply look 1358 // through bitcasts as they're quite common and benign. 1359 const DataLayout &DL = GV->getDataLayout(); 1360 SmallVector<LoadInst *, 4> Loads; 1361 SmallVector<StoreInst *, 4> Stores; 1362 for (auto *U : GV->users()) { 1363 Instruction *I = dyn_cast<Instruction>(U); 1364 if (!I) 1365 return false; 1366 assert(I->getParent()->getParent() == F); 1367 1368 if (auto *LI = dyn_cast<LoadInst>(I)) 1369 Loads.push_back(LI); 1370 else if (auto *SI = dyn_cast<StoreInst>(I)) 1371 Stores.push_back(SI); 1372 else 1373 return false; 1374 } 1375 1376 // We have identified all uses of GV into loads and stores. Now check if all 1377 // of them are known not to depend on the value of the global at the function 1378 // entry point. We do this by ensuring that every load is dominated by at 1379 // least one store. 1380 auto &DT = LookupDomTree(*const_cast<Function *>(F)); 1381 1382 // The below check is quadratic. Check we're not going to do too many tests. 1383 // FIXME: Even though this will always have worst-case quadratic time, we 1384 // could put effort into minimizing the average time by putting stores that 1385 // have been shown to dominate at least one load at the beginning of the 1386 // Stores array, making subsequent dominance checks more likely to succeed 1387 // early. 1388 // 1389 // The threshold here is fairly large because global->local demotion is a 1390 // very powerful optimization should it fire. 1391 const unsigned Threshold = 100; 1392 if (Loads.size() * Stores.size() > Threshold) 1393 return false; 1394 1395 for (auto *L : Loads) { 1396 auto *LTy = L->getType(); 1397 if (none_of(Stores, [&](const StoreInst *S) { 1398 auto *STy = S->getValueOperand()->getType(); 1399 // The load is only dominated by the store if DomTree says so 1400 // and the number of bits loaded in L is less than or equal to 1401 // the number of bits stored in S. 1402 return DT.dominates(S, L) && 1403 DL.getTypeStoreSize(LTy).getFixedValue() <= 1404 DL.getTypeStoreSize(STy).getFixedValue(); 1405 })) 1406 return false; 1407 } 1408 // All loads have known dependences inside F, so the global can be localized. 1409 return true; 1410 } 1411 1412 // For a global variable with one store, if the store dominates any loads, 1413 // those loads will always load the stored value (as opposed to the 1414 // initializer), even in the presence of recursion. 1415 static bool forwardStoredOnceStore( 1416 GlobalVariable *GV, const StoreInst *StoredOnceStore, 1417 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1418 const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); 1419 // We can do this optimization for non-constants in nosync + norecurse 1420 // functions, but globals used in exactly one norecurse functions are already 1421 // promoted to an alloca. 1422 if (!isa<Constant>(StoredOnceValue)) 1423 return false; 1424 const Function *F = StoredOnceStore->getFunction(); 1425 SmallVector<LoadInst *> Loads; 1426 for (User *U : GV->users()) { 1427 if (auto *LI = dyn_cast<LoadInst>(U)) { 1428 if (LI->getFunction() == F && 1429 LI->getType() == StoredOnceValue->getType() && LI->isSimple()) 1430 Loads.push_back(LI); 1431 } 1432 } 1433 // Only compute DT if we have any loads to examine. 1434 bool MadeChange = false; 1435 if (!Loads.empty()) { 1436 auto &DT = LookupDomTree(*const_cast<Function *>(F)); 1437 for (auto *LI : Loads) { 1438 if (DT.dominates(StoredOnceStore, LI)) { 1439 LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue)); 1440 LI->eraseFromParent(); 1441 MadeChange = true; 1442 } 1443 } 1444 } 1445 return MadeChange; 1446 } 1447 1448 /// Analyze the specified global variable and optimize 1449 /// it if possible. If we make a change, return true. 1450 static bool 1451 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, 1452 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1453 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1454 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1455 auto &DL = GV->getDataLayout(); 1456 // If this is a first class global and has only one accessing function and 1457 // this function is non-recursive, we replace the global with a local alloca 1458 // in this function. 1459 // 1460 // NOTE: It doesn't make sense to promote non-single-value types since we 1461 // are just replacing static memory to stack memory. 1462 // 1463 // If the global is in different address space, don't bring it to stack. 1464 if (!GS.HasMultipleAccessingFunctions && 1465 GS.AccessingFunction && 1466 GV->getValueType()->isSingleValueType() && 1467 GV->getType()->getAddressSpace() == DL.getAllocaAddrSpace() && 1468 !GV->isExternallyInitialized() && 1469 GS.AccessingFunction->doesNotRecurse() && 1470 isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, 1471 LookupDomTree)) { 1472 const DataLayout &DL = GV->getDataLayout(); 1473 1474 LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); 1475 BasicBlock::iterator FirstI = 1476 GS.AccessingFunction->getEntryBlock().begin().getNonConst(); 1477 Type *ElemTy = GV->getValueType(); 1478 // FIXME: Pass Global's alignment when globals have alignment 1479 AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), 1480 nullptr, GV->getName(), FirstI); 1481 if (!isa<UndefValue>(GV->getInitializer())) 1482 new StoreInst(GV->getInitializer(), Alloca, FirstI); 1483 1484 GV->replaceAllUsesWith(Alloca); 1485 GV->eraseFromParent(); 1486 ++NumLocalized; 1487 return true; 1488 } 1489 1490 bool Changed = false; 1491 1492 // If the global is never loaded (but may be stored to), it is dead. 1493 // Delete it now. 1494 if (!GS.IsLoaded) { 1495 LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); 1496 1497 if (isLeakCheckerRoot(GV)) { 1498 // Delete any constant stores to the global. 1499 Changed = CleanupPointerRootUsers(GV, GetTLI); 1500 } else { 1501 // Delete any stores we can find to the global. We may not be able to 1502 // make it completely dead though. 1503 Changed = CleanupConstantGlobalUsers(GV, DL); 1504 } 1505 1506 // If the global is dead now, delete it. 1507 if (GV->use_empty()) { 1508 GV->eraseFromParent(); 1509 ++NumDeleted; 1510 Changed = true; 1511 } 1512 return Changed; 1513 1514 } 1515 if (GS.StoredType <= GlobalStatus::InitializerStored) { 1516 LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); 1517 1518 // Don't actually mark a global constant if it's atomic because atomic loads 1519 // are implemented by a trivial cmpxchg in some edge-cases and that usually 1520 // requires write access to the variable even if it's not actually changed. 1521 if (GS.Ordering == AtomicOrdering::NotAtomic) { 1522 assert(!GV->isConstant() && "Expected a non-constant global"); 1523 GV->setConstant(true); 1524 Changed = true; 1525 } 1526 1527 // Clean up any obviously simplifiable users now. 1528 Changed |= CleanupConstantGlobalUsers(GV, DL); 1529 1530 // If the global is dead now, just nuke it. 1531 if (GV->use_empty()) { 1532 LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify " 1533 << "all users and delete global!\n"); 1534 GV->eraseFromParent(); 1535 ++NumDeleted; 1536 return true; 1537 } 1538 1539 // Fall through to the next check; see if we can optimize further. 1540 ++NumMarked; 1541 } 1542 if (!GV->getInitializer()->getType()->isSingleValueType()) { 1543 const DataLayout &DL = GV->getDataLayout(); 1544 if (SRAGlobal(GV, DL)) 1545 return true; 1546 } 1547 Value *StoredOnceValue = GS.getStoredOnceValue(); 1548 if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) { 1549 Function &StoreFn = 1550 const_cast<Function &>(*GS.StoredOnceStore->getFunction()); 1551 bool CanHaveNonUndefGlobalInitializer = 1552 GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace( 1553 GV->getType()->getAddressSpace()); 1554 // If the initial value for the global was an undef value, and if only 1555 // one other value was stored into it, we can just change the 1556 // initializer to be the stored value, then delete all stores to the 1557 // global. This allows us to mark it constant. 1558 // This is restricted to address spaces that allow globals to have 1559 // initializers. NVPTX, for example, does not support initializers for 1560 // shared memory (AS 3). 1561 auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue); 1562 if (SOVConstant && isa<UndefValue>(GV->getInitializer()) && 1563 DL.getTypeAllocSize(SOVConstant->getType()) == 1564 DL.getTypeAllocSize(GV->getValueType()) && 1565 CanHaveNonUndefGlobalInitializer) { 1566 if (SOVConstant->getType() == GV->getValueType()) { 1567 // Change the initializer in place. 1568 GV->setInitializer(SOVConstant); 1569 } else { 1570 // Create a new global with adjusted type. 1571 auto *NGV = new GlobalVariable( 1572 *GV->getParent(), SOVConstant->getType(), GV->isConstant(), 1573 GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(), 1574 GV->getAddressSpace()); 1575 NGV->takeName(GV); 1576 NGV->copyAttributesFrom(GV); 1577 GV->replaceAllUsesWith(NGV); 1578 GV->eraseFromParent(); 1579 GV = NGV; 1580 } 1581 1582 // Clean up any obviously simplifiable users now. 1583 CleanupConstantGlobalUsers(GV, DL); 1584 1585 if (GV->use_empty()) { 1586 LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to " 1587 << "simplify all users and delete global!\n"); 1588 GV->eraseFromParent(); 1589 ++NumDeleted; 1590 } 1591 ++NumSubstitute; 1592 return true; 1593 } 1594 1595 // Try to optimize globals based on the knowledge that only one value 1596 // (besides its initializer) is ever stored to the global. 1597 if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) 1598 return true; 1599 1600 // Try to forward the store to any loads. If we have more than one store, we 1601 // may have a store of the initializer between StoredOnceStore and a load. 1602 if (GS.NumStores == 1) 1603 if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) 1604 return true; 1605 1606 // Otherwise, if the global was not a boolean, we can shrink it to be a 1607 // boolean. Skip this optimization for AS that doesn't allow an initializer. 1608 if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && 1609 (!isa<UndefValue>(GV->getInitializer()) || 1610 CanHaveNonUndefGlobalInitializer)) { 1611 if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { 1612 ++NumShrunkToBool; 1613 return true; 1614 } 1615 } 1616 } 1617 1618 return Changed; 1619 } 1620 1621 /// Analyze the specified global variable and optimize it if possible. If we 1622 /// make a change, return true. 1623 static bool 1624 processGlobal(GlobalValue &GV, 1625 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1626 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1627 function_ref<DominatorTree &(Function &)> LookupDomTree) { 1628 if (GV.getName().starts_with("llvm.")) 1629 return false; 1630 1631 GlobalStatus GS; 1632 1633 if (GlobalStatus::analyzeGlobal(&GV, GS)) 1634 return false; 1635 1636 bool Changed = false; 1637 if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) { 1638 auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global 1639 : GlobalValue::UnnamedAddr::Local; 1640 if (NewUnnamedAddr != GV.getUnnamedAddr()) { 1641 GV.setUnnamedAddr(NewUnnamedAddr); 1642 NumUnnamed++; 1643 Changed = true; 1644 } 1645 } 1646 1647 // Do more involved optimizations if the global is internal. 1648 if (!GV.hasLocalLinkage()) 1649 return Changed; 1650 1651 auto *GVar = dyn_cast<GlobalVariable>(&GV); 1652 if (!GVar) 1653 return Changed; 1654 1655 if (GVar->isConstant() || !GVar->hasInitializer()) 1656 return Changed; 1657 1658 return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) || 1659 Changed; 1660 } 1661 1662 /// Walk all of the direct calls of the specified function, changing them to 1663 /// FastCC. 1664 static void ChangeCalleesToFastCall(Function *F) { 1665 for (User *U : F->users()) { 1666 if (isa<BlockAddress>(U)) 1667 continue; 1668 cast<CallBase>(U)->setCallingConv(CallingConv::Fast); 1669 } 1670 } 1671 1672 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs, 1673 Attribute::AttrKind A) { 1674 unsigned AttrIndex; 1675 if (Attrs.hasAttrSomewhere(A, &AttrIndex)) 1676 return Attrs.removeAttributeAtIndex(C, AttrIndex, A); 1677 return Attrs; 1678 } 1679 1680 static void RemoveAttribute(Function *F, Attribute::AttrKind A) { 1681 F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A)); 1682 for (User *U : F->users()) { 1683 if (isa<BlockAddress>(U)) 1684 continue; 1685 CallBase *CB = cast<CallBase>(U); 1686 CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A)); 1687 } 1688 } 1689 1690 /// Return true if this is a calling convention that we'd like to change. The 1691 /// idea here is that we don't want to mess with the convention if the user 1692 /// explicitly requested something with performance implications like coldcc, 1693 /// GHC, or anyregcc. 1694 static bool hasChangeableCCImpl(Function *F) { 1695 CallingConv::ID CC = F->getCallingConv(); 1696 1697 // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? 1698 if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall) 1699 return false; 1700 1701 if (F->isVarArg()) 1702 return false; 1703 1704 // FIXME: Change CC for the whole chain of musttail calls when possible. 1705 // 1706 // Can't change CC of the function that either has musttail calls, or is a 1707 // musttail callee itself 1708 for (User *U : F->users()) { 1709 if (isa<BlockAddress>(U)) 1710 continue; 1711 CallInst* CI = dyn_cast<CallInst>(U); 1712 if (!CI) 1713 continue; 1714 1715 if (CI->isMustTailCall()) 1716 return false; 1717 } 1718 1719 for (BasicBlock &BB : *F) 1720 if (BB.getTerminatingMustTailCall()) 1721 return false; 1722 1723 return !F->hasAddressTaken(); 1724 } 1725 1726 using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>; 1727 static bool hasChangeableCC(Function *F, 1728 ChangeableCCCacheTy &ChangeableCCCache) { 1729 auto Res = ChangeableCCCache.try_emplace(F, false); 1730 if (Res.second) 1731 Res.first->second = hasChangeableCCImpl(F); 1732 return Res.first->second; 1733 } 1734 1735 /// Return true if the block containing the call site has a BlockFrequency of 1736 /// less than ColdCCRelFreq% of the entry block. 1737 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) { 1738 const BranchProbability ColdProb(ColdCCRelFreq, 100); 1739 auto *CallSiteBB = CB.getParent(); 1740 auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB); 1741 auto CallerEntryFreq = 1742 CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock())); 1743 return CallSiteFreq < CallerEntryFreq * ColdProb; 1744 } 1745 1746 // This function checks if the input function F is cold at all call sites. It 1747 // also looks each call site's containing function, returning false if the 1748 // caller function contains other non cold calls. The input vector AllCallsCold 1749 // contains a list of functions that only have call sites in cold blocks. 1750 static bool 1751 isValidCandidateForColdCC(Function &F, 1752 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1753 const std::vector<Function *> &AllCallsCold) { 1754 1755 if (F.user_empty()) 1756 return false; 1757 1758 for (User *U : F.users()) { 1759 if (isa<BlockAddress>(U)) 1760 continue; 1761 1762 CallBase &CB = cast<CallBase>(*U); 1763 Function *CallerFunc = CB.getParent()->getParent(); 1764 BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc); 1765 if (!isColdCallSite(CB, CallerBFI)) 1766 return false; 1767 if (!llvm::is_contained(AllCallsCold, CallerFunc)) 1768 return false; 1769 } 1770 return true; 1771 } 1772 1773 static void changeCallSitesToColdCC(Function *F) { 1774 for (User *U : F->users()) { 1775 if (isa<BlockAddress>(U)) 1776 continue; 1777 cast<CallBase>(U)->setCallingConv(CallingConv::Cold); 1778 } 1779 } 1780 1781 // This function iterates over all the call instructions in the input Function 1782 // and checks that all call sites are in cold blocks and are allowed to use the 1783 // coldcc calling convention. 1784 static bool 1785 hasOnlyColdCalls(Function &F, 1786 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1787 ChangeableCCCacheTy &ChangeableCCCache) { 1788 for (BasicBlock &BB : F) { 1789 for (Instruction &I : BB) { 1790 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 1791 // Skip over isline asm instructions since they aren't function calls. 1792 if (CI->isInlineAsm()) 1793 continue; 1794 Function *CalledFn = CI->getCalledFunction(); 1795 if (!CalledFn) 1796 return false; 1797 // Skip over intrinsics since they won't remain as function calls. 1798 // Important to do this check before the linkage check below so we 1799 // won't bail out on debug intrinsics, possibly making the generated 1800 // code dependent on the presence of debug info. 1801 if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic) 1802 continue; 1803 if (!CalledFn->hasLocalLinkage()) 1804 return false; 1805 // Check if it's valid to use coldcc calling convention. 1806 if (!hasChangeableCC(CalledFn, ChangeableCCCache)) 1807 return false; 1808 BlockFrequencyInfo &CallerBFI = GetBFI(F); 1809 if (!isColdCallSite(*CI, CallerBFI)) 1810 return false; 1811 } 1812 } 1813 } 1814 return true; 1815 } 1816 1817 static bool hasMustTailCallers(Function *F) { 1818 for (User *U : F->users()) { 1819 CallBase *CB = dyn_cast<CallBase>(U); 1820 if (!CB) { 1821 assert(isa<BlockAddress>(U) && 1822 "Expected either CallBase or BlockAddress"); 1823 continue; 1824 } 1825 if (CB->isMustTailCall()) 1826 return true; 1827 } 1828 return false; 1829 } 1830 1831 static bool hasInvokeCallers(Function *F) { 1832 for (User *U : F->users()) 1833 if (isa<InvokeInst>(U)) 1834 return true; 1835 return false; 1836 } 1837 1838 static void RemovePreallocated(Function *F) { 1839 RemoveAttribute(F, Attribute::Preallocated); 1840 1841 auto *M = F->getParent(); 1842 1843 IRBuilder<> Builder(M->getContext()); 1844 1845 // Cannot modify users() while iterating over it, so make a copy. 1846 SmallVector<User *, 4> PreallocatedCalls(F->users()); 1847 for (User *U : PreallocatedCalls) { 1848 CallBase *CB = dyn_cast<CallBase>(U); 1849 if (!CB) 1850 continue; 1851 1852 assert( 1853 !CB->isMustTailCall() && 1854 "Shouldn't call RemotePreallocated() on a musttail preallocated call"); 1855 // Create copy of call without "preallocated" operand bundle. 1856 SmallVector<OperandBundleDef, 1> OpBundles; 1857 CB->getOperandBundlesAsDefs(OpBundles); 1858 CallBase *PreallocatedSetup = nullptr; 1859 for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { 1860 if (It->getTag() == "preallocated") { 1861 PreallocatedSetup = cast<CallBase>(*It->input_begin()); 1862 OpBundles.erase(It); 1863 break; 1864 } 1865 } 1866 assert(PreallocatedSetup && "Did not find preallocated bundle"); 1867 uint64_t ArgCount = 1868 cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue(); 1869 1870 assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) && 1871 "Unknown indirect call type"); 1872 CallBase *NewCB = CallBase::Create(CB, OpBundles, CB->getIterator()); 1873 CB->replaceAllUsesWith(NewCB); 1874 NewCB->takeName(CB); 1875 CB->eraseFromParent(); 1876 1877 Builder.SetInsertPoint(PreallocatedSetup); 1878 auto *StackSave = Builder.CreateStackSave(); 1879 Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction()); 1880 Builder.CreateStackRestore(StackSave); 1881 1882 // Replace @llvm.call.preallocated.arg() with alloca. 1883 // Cannot modify users() while iterating over it, so make a copy. 1884 // @llvm.call.preallocated.arg() can be called with the same index multiple 1885 // times. So for each @llvm.call.preallocated.arg(), we see if we have 1886 // already created a Value* for the index, and if not, create an alloca and 1887 // bitcast right after the @llvm.call.preallocated.setup() so that it 1888 // dominates all uses. 1889 SmallVector<Value *, 2> ArgAllocas(ArgCount); 1890 SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users()); 1891 for (auto *User : PreallocatedArgs) { 1892 auto *UseCall = cast<CallBase>(User); 1893 assert(UseCall->getCalledFunction()->getIntrinsicID() == 1894 Intrinsic::call_preallocated_arg && 1895 "preallocated token use was not a llvm.call.preallocated.arg"); 1896 uint64_t AllocArgIndex = 1897 cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue(); 1898 Value *AllocaReplacement = ArgAllocas[AllocArgIndex]; 1899 if (!AllocaReplacement) { 1900 auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); 1901 auto *ArgType = 1902 UseCall->getFnAttr(Attribute::Preallocated).getValueAsType(); 1903 auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction(); 1904 Builder.SetInsertPoint(InsertBefore); 1905 auto *Alloca = 1906 Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg"); 1907 ArgAllocas[AllocArgIndex] = Alloca; 1908 AllocaReplacement = Alloca; 1909 } 1910 1911 UseCall->replaceAllUsesWith(AllocaReplacement); 1912 UseCall->eraseFromParent(); 1913 } 1914 // Remove @llvm.call.preallocated.setup(). 1915 cast<Instruction>(PreallocatedSetup)->eraseFromParent(); 1916 } 1917 } 1918 1919 static bool 1920 OptimizeFunctions(Module &M, 1921 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 1922 function_ref<TargetTransformInfo &(Function &)> GetTTI, 1923 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 1924 function_ref<DominatorTree &(Function &)> LookupDomTree, 1925 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats, 1926 function_ref<void(Function &F)> ChangedCFGCallback, 1927 function_ref<void(Function &F)> DeleteFnCallback) { 1928 1929 bool Changed = false; 1930 1931 ChangeableCCCacheTy ChangeableCCCache; 1932 std::vector<Function *> AllCallsCold; 1933 for (Function &F : llvm::make_early_inc_range(M)) 1934 if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache)) 1935 AllCallsCold.push_back(&F); 1936 1937 // Optimize functions. 1938 for (Function &F : llvm::make_early_inc_range(M)) { 1939 // Don't perform global opt pass on naked functions; we don't want fast 1940 // calling conventions for naked functions. 1941 if (F.hasFnAttribute(Attribute::Naked)) 1942 continue; 1943 1944 // Functions without names cannot be referenced outside this module. 1945 if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage()) 1946 F.setLinkage(GlobalValue::InternalLinkage); 1947 1948 if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) { 1949 Changed = true; 1950 continue; 1951 } 1952 1953 // LLVM's definition of dominance allows instructions that are cyclic 1954 // in unreachable blocks, e.g.: 1955 // %pat = select i1 %condition, @global, i16* %pat 1956 // because any instruction dominates an instruction in a block that's 1957 // not reachable from entry. 1958 // So, remove unreachable blocks from the function, because a) there's 1959 // no point in analyzing them and b) GlobalOpt should otherwise grow 1960 // some more complicated logic to break these cycles. 1961 // Notify the analysis manager that we've modified the function's CFG. 1962 if (!F.isDeclaration()) { 1963 if (removeUnreachableBlocks(F)) { 1964 Changed = true; 1965 ChangedCFGCallback(F); 1966 } 1967 } 1968 1969 Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree); 1970 1971 if (!F.hasLocalLinkage()) 1972 continue; 1973 1974 // If we have an inalloca parameter that we can safely remove the 1975 // inalloca attribute from, do so. This unlocks optimizations that 1976 // wouldn't be safe in the presence of inalloca. 1977 // FIXME: We should also hoist alloca affected by this to the entry 1978 // block if possible. 1979 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) && 1980 !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) { 1981 RemoveAttribute(&F, Attribute::InAlloca); 1982 Changed = true; 1983 } 1984 1985 // FIXME: handle invokes 1986 // FIXME: handle musttail 1987 if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) { 1988 if (!F.hasAddressTaken() && !hasMustTailCallers(&F) && 1989 !hasInvokeCallers(&F)) { 1990 RemovePreallocated(&F); 1991 Changed = true; 1992 } 1993 continue; 1994 } 1995 1996 if (hasChangeableCC(&F, ChangeableCCCache)) { 1997 NumInternalFunc++; 1998 TargetTransformInfo &TTI = GetTTI(F); 1999 // Change the calling convention to coldcc if either stress testing is 2000 // enabled or the target would like to use coldcc on functions which are 2001 // cold at all call sites and the callers contain no other non coldcc 2002 // calls. 2003 if (EnableColdCCStressTest || 2004 (TTI.useColdCCForColdCall(F) && 2005 isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) { 2006 ChangeableCCCache.erase(&F); 2007 F.setCallingConv(CallingConv::Cold); 2008 changeCallSitesToColdCC(&F); 2009 Changed = true; 2010 NumColdCC++; 2011 } 2012 } 2013 2014 if (hasChangeableCC(&F, ChangeableCCCache)) { 2015 // If this function has a calling convention worth changing, is not a 2016 // varargs function, and is only called directly, promote it to use the 2017 // Fast calling convention. 2018 F.setCallingConv(CallingConv::Fast); 2019 ChangeCalleesToFastCall(&F); 2020 ++NumFastCallFns; 2021 Changed = true; 2022 } 2023 2024 if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) && 2025 !F.hasAddressTaken()) { 2026 // The function is not used by a trampoline intrinsic, so it is safe 2027 // to remove the 'nest' attribute. 2028 RemoveAttribute(&F, Attribute::Nest); 2029 ++NumNestRemoved; 2030 Changed = true; 2031 } 2032 } 2033 return Changed; 2034 } 2035 2036 static bool 2037 OptimizeGlobalVars(Module &M, 2038 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2039 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2040 function_ref<DominatorTree &(Function &)> LookupDomTree, 2041 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2042 bool Changed = false; 2043 2044 for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 2045 // Global variables without names cannot be referenced outside this module. 2046 if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage()) 2047 GV.setLinkage(GlobalValue::InternalLinkage); 2048 // Simplify the initializer. 2049 if (GV.hasInitializer()) 2050 if (auto *C = dyn_cast<Constant>(GV.getInitializer())) { 2051 auto &DL = M.getDataLayout(); 2052 // TLI is not used in the case of a Constant, so use default nullptr 2053 // for that optional parameter, since we don't have a Function to 2054 // provide GetTLI anyway. 2055 Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr); 2056 if (New != C) 2057 GV.setInitializer(New); 2058 } 2059 2060 if (deleteIfDead(GV, NotDiscardableComdats)) { 2061 Changed = true; 2062 continue; 2063 } 2064 2065 Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree); 2066 } 2067 return Changed; 2068 } 2069 2070 /// Evaluate static constructors in the function, if we can. Return true if we 2071 /// can, false otherwise. 2072 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, 2073 TargetLibraryInfo *TLI) { 2074 // Skip external functions. 2075 if (F->isDeclaration()) 2076 return false; 2077 // Call the function. 2078 Evaluator Eval(DL, TLI); 2079 Constant *RetValDummy; 2080 bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, 2081 SmallVector<Constant*, 0>()); 2082 2083 if (EvalSuccess) { 2084 ++NumCtorsEvaluated; 2085 2086 // We succeeded at evaluation: commit the result. 2087 auto NewInitializers = Eval.getMutatedInitializers(); 2088 LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" 2089 << F->getName() << "' to " << NewInitializers.size() 2090 << " stores.\n"); 2091 for (const auto &Pair : NewInitializers) 2092 Pair.first->setInitializer(Pair.second); 2093 for (GlobalVariable *GV : Eval.getInvariants()) 2094 GV->setConstant(true); 2095 } 2096 2097 return EvalSuccess; 2098 } 2099 2100 static int compareNames(Constant *const *A, Constant *const *B) { 2101 Value *AStripped = (*A)->stripPointerCasts(); 2102 Value *BStripped = (*B)->stripPointerCasts(); 2103 return AStripped->getName().compare(BStripped->getName()); 2104 } 2105 2106 static void setUsedInitializer(GlobalVariable &V, 2107 const SmallPtrSetImpl<GlobalValue *> &Init) { 2108 if (Init.empty()) { 2109 V.eraseFromParent(); 2110 return; 2111 } 2112 2113 // Get address space of pointers in the array of pointers. 2114 const Type *UsedArrayType = V.getValueType(); 2115 const auto *VAT = cast<ArrayType>(UsedArrayType); 2116 const auto *VEPT = cast<PointerType>(VAT->getArrayElementType()); 2117 2118 // Type of pointer to the array of pointers. 2119 PointerType *PtrTy = 2120 PointerType::get(V.getContext(), VEPT->getAddressSpace()); 2121 2122 SmallVector<Constant *, 8> UsedArray; 2123 for (GlobalValue *GV : Init) { 2124 Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, PtrTy); 2125 UsedArray.push_back(Cast); 2126 } 2127 2128 // Sort to get deterministic order. 2129 array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); 2130 ArrayType *ATy = ArrayType::get(PtrTy, UsedArray.size()); 2131 2132 Module *M = V.getParent(); 2133 V.removeFromParent(); 2134 GlobalVariable *NV = 2135 new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, 2136 ConstantArray::get(ATy, UsedArray), ""); 2137 NV->takeName(&V); 2138 NV->setSection("llvm.metadata"); 2139 delete &V; 2140 } 2141 2142 namespace { 2143 2144 /// An easy to access representation of llvm.used and llvm.compiler.used. 2145 class LLVMUsed { 2146 SmallPtrSet<GlobalValue *, 4> Used; 2147 SmallPtrSet<GlobalValue *, 4> CompilerUsed; 2148 GlobalVariable *UsedV; 2149 GlobalVariable *CompilerUsedV; 2150 2151 public: 2152 LLVMUsed(Module &M) { 2153 SmallVector<GlobalValue *, 4> Vec; 2154 UsedV = collectUsedGlobalVariables(M, Vec, false); 2155 Used = {Vec.begin(), Vec.end()}; 2156 Vec.clear(); 2157 CompilerUsedV = collectUsedGlobalVariables(M, Vec, true); 2158 CompilerUsed = {Vec.begin(), Vec.end()}; 2159 } 2160 2161 using iterator = SmallPtrSet<GlobalValue *, 4>::iterator; 2162 using used_iterator_range = iterator_range<iterator>; 2163 2164 iterator usedBegin() { return Used.begin(); } 2165 iterator usedEnd() { return Used.end(); } 2166 2167 used_iterator_range used() { 2168 return used_iterator_range(usedBegin(), usedEnd()); 2169 } 2170 2171 iterator compilerUsedBegin() { return CompilerUsed.begin(); } 2172 iterator compilerUsedEnd() { return CompilerUsed.end(); } 2173 2174 used_iterator_range compilerUsed() { 2175 return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); 2176 } 2177 2178 bool usedCount(GlobalValue *GV) const { return Used.count(GV); } 2179 2180 bool compilerUsedCount(GlobalValue *GV) const { 2181 return CompilerUsed.count(GV); 2182 } 2183 2184 bool usedErase(GlobalValue *GV) { return Used.erase(GV); } 2185 bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } 2186 bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } 2187 2188 bool compilerUsedInsert(GlobalValue *GV) { 2189 return CompilerUsed.insert(GV).second; 2190 } 2191 2192 void syncVariablesAndSets() { 2193 if (UsedV) 2194 setUsedInitializer(*UsedV, Used); 2195 if (CompilerUsedV) 2196 setUsedInitializer(*CompilerUsedV, CompilerUsed); 2197 } 2198 }; 2199 2200 } // end anonymous namespace 2201 2202 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { 2203 if (GA.use_empty()) // No use at all. 2204 return false; 2205 2206 assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && 2207 "We should have removed the duplicated " 2208 "element from llvm.compiler.used"); 2209 if (!GA.hasOneUse()) 2210 // Strictly more than one use. So at least one is not in llvm.used and 2211 // llvm.compiler.used. 2212 return true; 2213 2214 // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. 2215 return !U.usedCount(&GA) && !U.compilerUsedCount(&GA); 2216 } 2217 2218 static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) { 2219 if (!GV.hasLocalLinkage()) 2220 return true; 2221 2222 return U.usedCount(&GV) || U.compilerUsedCount(&GV); 2223 } 2224 2225 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, 2226 bool &RenameTarget) { 2227 if (GA.isWeakForLinker()) 2228 return false; 2229 2230 RenameTarget = false; 2231 bool Ret = false; 2232 if (hasUseOtherThanLLVMUsed(GA, U)) 2233 Ret = true; 2234 2235 // If the alias is externally visible, we may still be able to simplify it. 2236 if (!mayHaveOtherReferences(GA, U)) 2237 return Ret; 2238 2239 // If the aliasee has internal linkage and no other references (e.g., 2240 // @llvm.used, @llvm.compiler.used), give it the name and linkage of the 2241 // alias, and delete the alias. This turns: 2242 // define internal ... @f(...) 2243 // @a = alias ... @f 2244 // into: 2245 // define ... @a(...) 2246 Constant *Aliasee = GA.getAliasee(); 2247 GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); 2248 if (mayHaveOtherReferences(*Target, U)) 2249 return Ret; 2250 2251 RenameTarget = true; 2252 return true; 2253 } 2254 2255 static bool 2256 OptimizeGlobalAliases(Module &M, 2257 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2258 bool Changed = false; 2259 LLVMUsed Used(M); 2260 2261 for (GlobalValue *GV : Used.used()) 2262 Used.compilerUsedErase(GV); 2263 2264 // Return whether GV is explicitly or implicitly dso_local and not replaceable 2265 // by another definition in the current linkage unit. 2266 auto IsModuleLocal = [](GlobalValue &GV) { 2267 return !GlobalValue::isInterposableLinkage(GV.getLinkage()) && 2268 (GV.isDSOLocal() || GV.isImplicitDSOLocal()); 2269 }; 2270 2271 for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) { 2272 // Aliases without names cannot be referenced outside this module. 2273 if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage()) 2274 J.setLinkage(GlobalValue::InternalLinkage); 2275 2276 if (deleteIfDead(J, NotDiscardableComdats)) { 2277 Changed = true; 2278 continue; 2279 } 2280 2281 // If the alias can change at link time, nothing can be done - bail out. 2282 if (!IsModuleLocal(J)) 2283 continue; 2284 2285 Constant *Aliasee = J.getAliasee(); 2286 GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); 2287 // We can't trivially replace the alias with the aliasee if the aliasee is 2288 // non-trivial in some way. We also can't replace the alias with the aliasee 2289 // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible 2290 // alias can be used to access the definition as if preemption did not 2291 // happen. 2292 // TODO: Try to handle non-zero GEPs of local aliasees. 2293 if (!Target || !IsModuleLocal(*Target)) 2294 continue; 2295 2296 Target->removeDeadConstantUsers(); 2297 2298 // Make all users of the alias use the aliasee instead. 2299 bool RenameTarget; 2300 if (!hasUsesToReplace(J, Used, RenameTarget)) 2301 continue; 2302 2303 J.replaceAllUsesWith(Aliasee); 2304 ++NumAliasesResolved; 2305 Changed = true; 2306 2307 if (RenameTarget) { 2308 // Give the aliasee the name, linkage and other attributes of the alias. 2309 Target->takeName(&J); 2310 Target->setLinkage(J.getLinkage()); 2311 Target->setDSOLocal(J.isDSOLocal()); 2312 Target->setVisibility(J.getVisibility()); 2313 Target->setDLLStorageClass(J.getDLLStorageClass()); 2314 2315 if (Used.usedErase(&J)) 2316 Used.usedInsert(Target); 2317 2318 if (Used.compilerUsedErase(&J)) 2319 Used.compilerUsedInsert(Target); 2320 } else if (mayHaveOtherReferences(J, Used)) 2321 continue; 2322 2323 // Delete the alias. 2324 M.eraseAlias(&J); 2325 ++NumAliasesRemoved; 2326 Changed = true; 2327 } 2328 2329 Used.syncVariablesAndSets(); 2330 2331 return Changed; 2332 } 2333 2334 static Function * 2335 FindAtExitLibFunc(Module &M, 2336 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2337 LibFunc Func) { 2338 // Hack to get a default TLI before we have actual Function. 2339 auto FuncIter = M.begin(); 2340 if (FuncIter == M.end()) 2341 return nullptr; 2342 auto *TLI = &GetTLI(*FuncIter); 2343 2344 if (!TLI->has(Func)) 2345 return nullptr; 2346 2347 Function *Fn = M.getFunction(TLI->getName(Func)); 2348 if (!Fn) 2349 return nullptr; 2350 2351 // Now get the actual TLI for Fn. 2352 TLI = &GetTLI(*Fn); 2353 2354 // Make sure that the function has the correct prototype. 2355 LibFunc F; 2356 if (!TLI->getLibFunc(*Fn, F) || F != Func) 2357 return nullptr; 2358 2359 return Fn; 2360 } 2361 2362 /// Returns whether the given function is an empty C++ destructor or atexit 2363 /// handler and can therefore be eliminated. Note that we assume that other 2364 /// optimization passes have already simplified the code so we simply check for 2365 /// 'ret'. 2366 static bool IsEmptyAtExitFunction(const Function &Fn) { 2367 // FIXME: We could eliminate C++ destructors if they're readonly/readnone and 2368 // nounwind, but that doesn't seem worth doing. 2369 if (Fn.isDeclaration()) 2370 return false; 2371 2372 for (const auto &I : Fn.getEntryBlock()) { 2373 if (I.isDebugOrPseudoInst()) 2374 continue; 2375 if (isa<ReturnInst>(I)) 2376 return true; 2377 break; 2378 } 2379 return false; 2380 } 2381 2382 static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX) { 2383 /// Itanium C++ ABI p3.3.5: 2384 /// 2385 /// After constructing a global (or local static) object, that will require 2386 /// destruction on exit, a termination function is registered as follows: 2387 /// 2388 /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); 2389 /// 2390 /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the 2391 /// call f(p) when DSO d is unloaded, before all such termination calls 2392 /// registered before this one. It returns zero if registration is 2393 /// successful, nonzero on failure. 2394 2395 // This pass will look for calls to __cxa_atexit or atexit where the function 2396 // is trivial and remove them. 2397 bool Changed = false; 2398 2399 for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) { 2400 // We're only interested in calls. Theoretically, we could handle invoke 2401 // instructions as well, but neither llvm-gcc nor clang generate invokes 2402 // to __cxa_atexit. 2403 CallInst *CI = dyn_cast<CallInst>(U); 2404 if (!CI) 2405 continue; 2406 2407 Function *DtorFn = 2408 dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); 2409 if (!DtorFn || !IsEmptyAtExitFunction(*DtorFn)) 2410 continue; 2411 2412 // Just remove the call. 2413 CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); 2414 CI->eraseFromParent(); 2415 2416 if (isCXX) 2417 ++NumCXXDtorsRemoved; 2418 else 2419 ++NumAtExitRemoved; 2420 2421 Changed |= true; 2422 } 2423 2424 return Changed; 2425 } 2426 2427 static Function *hasSideeffectFreeStaticResolution(GlobalIFunc &IF) { 2428 if (IF.isInterposable()) 2429 return nullptr; 2430 2431 Function *Resolver = IF.getResolverFunction(); 2432 if (!Resolver) 2433 return nullptr; 2434 2435 if (Resolver->isInterposable()) 2436 return nullptr; 2437 2438 // Only handle functions that have been optimized into a single basic block. 2439 auto It = Resolver->begin(); 2440 if (++It != Resolver->end()) 2441 return nullptr; 2442 2443 BasicBlock &BB = Resolver->getEntryBlock(); 2444 2445 if (any_of(BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) 2446 return nullptr; 2447 2448 auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); 2449 if (!Ret) 2450 return nullptr; 2451 2452 return dyn_cast<Function>(Ret->getReturnValue()); 2453 } 2454 2455 /// Find IFuncs that have resolvers that always point at the same statically 2456 /// known callee, and replace their callers with a direct call. 2457 static bool OptimizeStaticIFuncs(Module &M) { 2458 bool Changed = false; 2459 for (GlobalIFunc &IF : M.ifuncs()) 2460 if (Function *Callee = hasSideeffectFreeStaticResolution(IF)) 2461 if (!IF.use_empty() && 2462 (!Callee->isDeclaration() || 2463 none_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); }))) { 2464 IF.replaceAllUsesWith(Callee); 2465 NumIFuncsResolved++; 2466 Changed = true; 2467 } 2468 return Changed; 2469 } 2470 2471 static bool 2472 DeleteDeadIFuncs(Module &M, 2473 SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) { 2474 bool Changed = false; 2475 for (GlobalIFunc &IF : make_early_inc_range(M.ifuncs())) 2476 if (deleteIfDead(IF, NotDiscardableComdats)) { 2477 NumIFuncsDeleted++; 2478 Changed = true; 2479 } 2480 return Changed; 2481 } 2482 2483 static bool 2484 optimizeGlobalsInModule(Module &M, const DataLayout &DL, 2485 function_ref<TargetLibraryInfo &(Function &)> GetTLI, 2486 function_ref<TargetTransformInfo &(Function &)> GetTTI, 2487 function_ref<BlockFrequencyInfo &(Function &)> GetBFI, 2488 function_ref<DominatorTree &(Function &)> LookupDomTree, 2489 function_ref<void(Function &F)> ChangedCFGCallback, 2490 function_ref<void(Function &F)> DeleteFnCallback) { 2491 SmallPtrSet<const Comdat *, 8> NotDiscardableComdats; 2492 bool Changed = false; 2493 bool LocalChange = true; 2494 std::optional<uint32_t> FirstNotFullyEvaluatedPriority; 2495 2496 while (LocalChange) { 2497 LocalChange = false; 2498 2499 NotDiscardableComdats.clear(); 2500 for (const GlobalVariable &GV : M.globals()) 2501 if (const Comdat *C = GV.getComdat()) 2502 if (!GV.isDiscardableIfUnused() || !GV.use_empty()) 2503 NotDiscardableComdats.insert(C); 2504 for (Function &F : M) 2505 if (const Comdat *C = F.getComdat()) 2506 if (!F.isDefTriviallyDead()) 2507 NotDiscardableComdats.insert(C); 2508 for (GlobalAlias &GA : M.aliases()) 2509 if (const Comdat *C = GA.getComdat()) 2510 if (!GA.isDiscardableIfUnused() || !GA.use_empty()) 2511 NotDiscardableComdats.insert(C); 2512 2513 // Delete functions that are trivially dead, ccc -> fastcc 2514 LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree, 2515 NotDiscardableComdats, ChangedCFGCallback, 2516 DeleteFnCallback); 2517 2518 // Optimize global_ctors list. 2519 LocalChange |= 2520 optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) { 2521 if (FirstNotFullyEvaluatedPriority && 2522 *FirstNotFullyEvaluatedPriority != Priority) 2523 return false; 2524 bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F)); 2525 if (!Evaluated) 2526 FirstNotFullyEvaluatedPriority = Priority; 2527 return Evaluated; 2528 }); 2529 2530 // Optimize non-address-taken globals. 2531 LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree, 2532 NotDiscardableComdats); 2533 2534 // Resolve aliases, when possible. 2535 LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); 2536 2537 // Try to remove trivial global destructors if they are not removed 2538 // already. 2539 if (Function *CXAAtExitFn = 2540 FindAtExitLibFunc(M, GetTLI, LibFunc_cxa_atexit)) 2541 LocalChange |= OptimizeEmptyGlobalAtExitDtors(CXAAtExitFn, true); 2542 2543 if (Function *AtExitFn = FindAtExitLibFunc(M, GetTLI, LibFunc_atexit)) 2544 LocalChange |= OptimizeEmptyGlobalAtExitDtors(AtExitFn, false); 2545 2546 // Optimize IFuncs whose callee's are statically known. 2547 LocalChange |= OptimizeStaticIFuncs(M); 2548 2549 // Remove any IFuncs that are now dead. 2550 LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats); 2551 2552 Changed |= LocalChange; 2553 } 2554 2555 // TODO: Move all global ctors functions to the end of the module for code 2556 // layout. 2557 2558 return Changed; 2559 } 2560 2561 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { 2562 auto &DL = M.getDataLayout(); 2563 auto &FAM = 2564 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 2565 auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ 2566 return FAM.getResult<DominatorTreeAnalysis>(F); 2567 }; 2568 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 2569 return FAM.getResult<TargetLibraryAnalysis>(F); 2570 }; 2571 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { 2572 return FAM.getResult<TargetIRAnalysis>(F); 2573 }; 2574 2575 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & { 2576 return FAM.getResult<BlockFrequencyAnalysis>(F); 2577 }; 2578 auto ChangedCFGCallback = [&FAM](Function &F) { 2579 FAM.invalidate(F, PreservedAnalyses::none()); 2580 }; 2581 auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); }; 2582 2583 if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree, 2584 ChangedCFGCallback, DeleteFnCallback)) 2585 return PreservedAnalyses::all(); 2586 2587 PreservedAnalyses PA = PreservedAnalyses::none(); 2588 // We made sure to clear analyses for deleted functions. 2589 PA.preserve<FunctionAnalysisManagerModuleProxy>(); 2590 // The only place we modify the CFG is when calling 2591 // removeUnreachableBlocks(), but there we make sure to invalidate analyses 2592 // for modified functions. 2593 PA.preserveSet<CFGAnalyses>(); 2594 return PA; 2595 } 2596