xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms simple global variables that never have their address
10 // taken.  If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/GlobalOpt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/ConstantFolding.h"
25 #include "llvm/Analysis/MemoryBuiltins.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DebugInfoMetadata.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GlobalAlias.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/GlobalVariable.h"
43 #include "llvm/IR/IRBuilder.h"
44 #include "llvm/IR/InstrTypes.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/Use.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/Support/AtomicOrdering.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Debug.h"
59 #include "llvm/Support/ErrorHandling.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include "llvm/Transforms/IPO.h"
62 #include "llvm/Transforms/Utils/CtorUtils.h"
63 #include "llvm/Transforms/Utils/Evaluator.h"
64 #include "llvm/Transforms/Utils/GlobalStatus.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include <cassert>
67 #include <cstdint>
68 #include <optional>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "globalopt"
75 
76 STATISTIC(NumMarked    , "Number of globals marked constant");
77 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
78 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
79 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
80 STATISTIC(NumDeleted   , "Number of globals deleted");
81 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
82 STATISTIC(NumLocalized , "Number of globals localized");
83 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
84 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
85 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
86 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
87 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
88 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
89 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
90 STATISTIC(NumAtExitRemoved, "Number of atexit handlers removed");
91 STATISTIC(NumInternalFunc, "Number of internal functions");
92 STATISTIC(NumColdCC, "Number of functions marked coldcc");
93 STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs");
94 STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed");
95 
96 static cl::opt<bool>
97     EnableColdCCStressTest("enable-coldcc-stress-test",
98                            cl::desc("Enable stress test of coldcc by adding "
99                                     "calling conv to all internal functions."),
100                            cl::init(false), cl::Hidden);
101 
102 static cl::opt<int> ColdCCRelFreq(
103     "coldcc-rel-freq", cl::Hidden, cl::init(2),
104     cl::desc(
105         "Maximum block frequency, expressed as a percentage of caller's "
106         "entry frequency, for a call site to be considered cold for enabling"
107         "coldcc"));
108 
109 /// Is this global variable possibly used by a leak checker as a root?  If so,
110 /// we might not really want to eliminate the stores to it.
111 static bool isLeakCheckerRoot(GlobalVariable *GV) {
112   // A global variable is a root if it is a pointer, or could plausibly contain
113   // a pointer.  There are two challenges; one is that we could have a struct
114   // the has an inner member which is a pointer.  We recurse through the type to
115   // detect these (up to a point).  The other is that we may actually be a union
116   // of a pointer and another type, and so our LLVM type is an integer which
117   // gets converted into a pointer, or our type is an [i8 x #] with a pointer
118   // potentially contained here.
119 
120   if (GV->hasPrivateLinkage())
121     return false;
122 
123   SmallVector<Type *, 4> Types;
124   Types.push_back(GV->getValueType());
125 
126   unsigned Limit = 20;
127   do {
128     Type *Ty = Types.pop_back_val();
129     switch (Ty->getTypeID()) {
130       default: break;
131       case Type::PointerTyID:
132         return true;
133       case Type::FixedVectorTyID:
134       case Type::ScalableVectorTyID:
135         if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
136           return true;
137         break;
138       case Type::ArrayTyID:
139         Types.push_back(cast<ArrayType>(Ty)->getElementType());
140         break;
141       case Type::StructTyID: {
142         StructType *STy = cast<StructType>(Ty);
143         if (STy->isOpaque()) return true;
144         for (Type *InnerTy : STy->elements()) {
145           if (isa<PointerType>(InnerTy)) return true;
146           if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
147               isa<VectorType>(InnerTy))
148             Types.push_back(InnerTy);
149         }
150         break;
151       }
152     }
153     if (--Limit == 0) return true;
154   } while (!Types.empty());
155   return false;
156 }
157 
158 /// Given a value that is stored to a global but never read, determine whether
159 /// it's safe to remove the store and the chain of computation that feeds the
160 /// store.
161 static bool IsSafeComputationToRemove(
162     Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
163   do {
164     if (isa<Constant>(V))
165       return true;
166     if (!V->hasOneUse())
167       return false;
168     if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
169         isa<GlobalValue>(V))
170       return false;
171     if (isAllocationFn(V, GetTLI))
172       return true;
173 
174     Instruction *I = cast<Instruction>(V);
175     if (I->mayHaveSideEffects())
176       return false;
177     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
178       if (!GEP->hasAllConstantIndices())
179         return false;
180     } else if (I->getNumOperands() != 1) {
181       return false;
182     }
183 
184     V = I->getOperand(0);
185   } while (true);
186 }
187 
188 /// This GV is a pointer root.  Loop over all users of the global and clean up
189 /// any that obviously don't assign the global a value that isn't dynamically
190 /// allocated.
191 static bool
192 CleanupPointerRootUsers(GlobalVariable *GV,
193                         function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
194   // A brief explanation of leak checkers.  The goal is to find bugs where
195   // pointers are forgotten, causing an accumulating growth in memory
196   // usage over time.  The common strategy for leak checkers is to explicitly
197   // allow the memory pointed to by globals at exit.  This is popular because it
198   // also solves another problem where the main thread of a C++ program may shut
199   // down before other threads that are still expecting to use those globals. To
200   // handle that case, we expect the program may create a singleton and never
201   // destroy it.
202 
203   bool Changed = false;
204 
205   // If Dead[n].first is the only use of a malloc result, we can delete its
206   // chain of computation and the store to the global in Dead[n].second.
207   SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
208 
209   SmallVector<User *> Worklist(GV->users());
210   // Constants can't be pointers to dynamically allocated memory.
211   while (!Worklist.empty()) {
212     User *U = Worklist.pop_back_val();
213     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
214       Value *V = SI->getValueOperand();
215       if (isa<Constant>(V)) {
216         Changed = true;
217         SI->eraseFromParent();
218       } else if (Instruction *I = dyn_cast<Instruction>(V)) {
219         if (I->hasOneUse())
220           Dead.push_back(std::make_pair(I, SI));
221       }
222     } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
223       if (isa<Constant>(MSI->getValue())) {
224         Changed = true;
225         MSI->eraseFromParent();
226       } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
227         if (I->hasOneUse())
228           Dead.push_back(std::make_pair(I, MSI));
229       }
230     } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
231       GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
232       if (MemSrc && MemSrc->isConstant()) {
233         Changed = true;
234         MTI->eraseFromParent();
235       } else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
236         if (I->hasOneUse())
237           Dead.push_back(std::make_pair(I, MTI));
238       }
239     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
240       if (isa<GEPOperator>(CE))
241         append_range(Worklist, CE->users());
242     }
243   }
244 
245   for (int i = 0, e = Dead.size(); i != e; ++i) {
246     if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
247       Dead[i].second->eraseFromParent();
248       Instruction *I = Dead[i].first;
249       do {
250         if (isAllocationFn(I, GetTLI))
251           break;
252         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
253         if (!J)
254           break;
255         I->eraseFromParent();
256         I = J;
257       } while (true);
258       I->eraseFromParent();
259       Changed = true;
260     }
261   }
262 
263   GV->removeDeadConstantUsers();
264   return Changed;
265 }
266 
267 /// We just marked GV constant.  Loop over all users of the global, cleaning up
268 /// the obvious ones.  This is largely just a quick scan over the use list to
269 /// clean up the easy and obvious cruft.  This returns true if it made a change.
270 static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
271                                        const DataLayout &DL) {
272   Constant *Init = GV->getInitializer();
273   SmallVector<User *, 8> WorkList(GV->users());
274   SmallPtrSet<User *, 8> Visited;
275   bool Changed = false;
276 
277   SmallVector<WeakTrackingVH> MaybeDeadInsts;
278   auto EraseFromParent = [&](Instruction *I) {
279     for (Value *Op : I->operands())
280       if (auto *OpI = dyn_cast<Instruction>(Op))
281         MaybeDeadInsts.push_back(OpI);
282     I->eraseFromParent();
283     Changed = true;
284   };
285   while (!WorkList.empty()) {
286     User *U = WorkList.pop_back_val();
287     if (!Visited.insert(U).second)
288       continue;
289 
290     if (auto *BO = dyn_cast<BitCastOperator>(U))
291       append_range(WorkList, BO->users());
292     if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
293       append_range(WorkList, ASC->users());
294     else if (auto *GEP = dyn_cast<GEPOperator>(U))
295       append_range(WorkList, GEP->users());
296     else if (auto *LI = dyn_cast<LoadInst>(U)) {
297       // A load from a uniform value is always the same, regardless of any
298       // applied offset.
299       Type *Ty = LI->getType();
300       if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty, DL)) {
301         LI->replaceAllUsesWith(Res);
302         EraseFromParent(LI);
303         continue;
304       }
305 
306       Value *PtrOp = LI->getPointerOperand();
307       APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
308       PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
309           DL, Offset, /* AllowNonInbounds */ true);
310       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(PtrOp)) {
311         if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
312           PtrOp = II->getArgOperand(0);
313       }
314       if (PtrOp == GV) {
315         if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
316           LI->replaceAllUsesWith(Value);
317           EraseFromParent(LI);
318         }
319       }
320     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
321       // Store must be unreachable or storing Init into the global.
322       EraseFromParent(SI);
323     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
324       if (getUnderlyingObject(MI->getRawDest()) == GV)
325         EraseFromParent(MI);
326     } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
327       if (II->getIntrinsicID() == Intrinsic::threadlocal_address)
328         append_range(WorkList, II->users());
329     }
330   }
331 
332   Changed |=
333       RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
334   GV->removeDeadConstantUsers();
335   return Changed;
336 }
337 
338 /// Part of the global at a specific offset, which is only accessed through
339 /// loads and stores with the given type.
340 struct GlobalPart {
341   Type *Ty;
342   Constant *Initializer = nullptr;
343   bool IsLoaded = false;
344   bool IsStored = false;
345 };
346 
347 /// Look at all uses of the global and determine which (offset, type) pairs it
348 /// can be split into.
349 static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
350                             GlobalVariable *GV, const DataLayout &DL) {
351   SmallVector<Use *, 16> Worklist;
352   SmallPtrSet<Use *, 16> Visited;
353   auto AppendUses = [&](Value *V) {
354     for (Use &U : V->uses())
355       if (Visited.insert(&U).second)
356         Worklist.push_back(&U);
357   };
358   AppendUses(GV);
359   while (!Worklist.empty()) {
360     Use *U = Worklist.pop_back_val();
361     User *V = U->getUser();
362 
363     auto *GEP = dyn_cast<GEPOperator>(V);
364     if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
365         (GEP && GEP->hasAllConstantIndices())) {
366       AppendUses(V);
367       continue;
368     }
369 
370     if (Value *Ptr = getLoadStorePointerOperand(V)) {
371       // This is storing the global address into somewhere, not storing into
372       // the global.
373       if (isa<StoreInst>(V) && U->getOperandNo() == 0)
374         return false;
375 
376       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
377       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
378                                                    /* AllowNonInbounds */ true);
379       if (Ptr != GV || Offset.getActiveBits() >= 64)
380         return false;
381 
382       // TODO: We currently require that all accesses at a given offset must
383       // use the same type. This could be relaxed.
384       Type *Ty = getLoadStoreType(V);
385       const auto &[It, Inserted] =
386           Parts.try_emplace(Offset.getZExtValue(), GlobalPart{Ty});
387       if (Ty != It->second.Ty)
388         return false;
389 
390       if (Inserted) {
391         It->second.Initializer =
392             ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL);
393         if (!It->second.Initializer) {
394           LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
395                             << *GV << " with type " << *Ty << " at offset "
396                             << Offset.getZExtValue());
397           return false;
398         }
399       }
400 
401       // Scalable types not currently supported.
402       if (Ty->isScalableTy())
403         return false;
404 
405       auto IsStored = [](Value *V, Constant *Initializer) {
406         auto *SI = dyn_cast<StoreInst>(V);
407         if (!SI)
408           return false;
409 
410         Constant *StoredConst = dyn_cast<Constant>(SI->getOperand(0));
411         if (!StoredConst)
412           return true;
413 
414         // Don't consider stores that only write the initializer value.
415         return Initializer != StoredConst;
416       };
417 
418       It->second.IsLoaded |= isa<LoadInst>(V);
419       It->second.IsStored |= IsStored(V, It->second.Initializer);
420       continue;
421     }
422 
423     // Ignore dead constant users.
424     if (auto *C = dyn_cast<Constant>(V)) {
425       if (!isSafeToDestroyConstant(C))
426         return false;
427       continue;
428     }
429 
430     // Unknown user.
431     return false;
432   }
433 
434   return true;
435 }
436 
437 /// Copy over the debug info for a variable to its SRA replacements.
438 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
439                                  uint64_t FragmentOffsetInBits,
440                                  uint64_t FragmentSizeInBits,
441                                  uint64_t VarSize) {
442   SmallVector<DIGlobalVariableExpression *, 1> GVs;
443   GV->getDebugInfo(GVs);
444   for (auto *GVE : GVs) {
445     DIVariable *Var = GVE->getVariable();
446     DIExpression *Expr = GVE->getExpression();
447     int64_t CurVarOffsetInBytes = 0;
448     uint64_t CurVarOffsetInBits = 0;
449     uint64_t FragmentEndInBits = FragmentOffsetInBits + FragmentSizeInBits;
450 
451     // Calculate the offset (Bytes), Continue if unknown.
452     if (!Expr->extractIfOffset(CurVarOffsetInBytes))
453       continue;
454 
455     // Ignore negative offset.
456     if (CurVarOffsetInBytes < 0)
457       continue;
458 
459     // Convert offset to bits.
460     CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
461 
462     // Current var starts after the fragment, ignore.
463     if (CurVarOffsetInBits >= FragmentEndInBits)
464       continue;
465 
466     uint64_t CurVarSize = Var->getType()->getSizeInBits();
467     uint64_t CurVarEndInBits = CurVarOffsetInBits + CurVarSize;
468     // Current variable ends before start of fragment, ignore.
469     if (CurVarSize != 0 && /* CurVarSize is known */
470         CurVarEndInBits <= FragmentOffsetInBits)
471       continue;
472 
473     // Current variable fits in (not greater than) the fragment,
474     // does not need fragment expression.
475     if (CurVarSize != 0 && /* CurVarSize is known */
476         CurVarOffsetInBits >= FragmentOffsetInBits &&
477         CurVarEndInBits <= FragmentEndInBits) {
478       uint64_t CurVarOffsetInFragment =
479           (CurVarOffsetInBits - FragmentOffsetInBits) / 8;
480       if (CurVarOffsetInFragment != 0)
481         Expr = DIExpression::get(Expr->getContext(), {dwarf::DW_OP_plus_uconst,
482                                                       CurVarOffsetInFragment});
483       else
484         Expr = DIExpression::get(Expr->getContext(), {});
485       auto *NGVE =
486           DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
487       NGV->addDebugInfo(NGVE);
488       continue;
489     }
490     // Current variable does not fit in single fragment,
491     // emit a fragment expression.
492     if (FragmentSizeInBits < VarSize) {
493       if (CurVarOffsetInBits > FragmentOffsetInBits)
494         continue;
495       uint64_t CurVarFragmentOffsetInBits =
496           FragmentOffsetInBits - CurVarOffsetInBits;
497       uint64_t CurVarFragmentSizeInBits = FragmentSizeInBits;
498       if (CurVarSize != 0 && CurVarEndInBits < FragmentEndInBits)
499         CurVarFragmentSizeInBits -= (FragmentEndInBits - CurVarEndInBits);
500       if (CurVarOffsetInBits)
501         Expr = DIExpression::get(Expr->getContext(), {});
502       if (auto E = DIExpression::createFragmentExpression(
503               Expr, CurVarFragmentOffsetInBits, CurVarFragmentSizeInBits))
504         Expr = *E;
505       else
506         continue;
507     }
508     auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
509     NGV->addDebugInfo(NGVE);
510   }
511 }
512 
513 /// Perform scalar replacement of aggregates on the specified global variable.
514 /// This opens the door for other optimizations by exposing the behavior of the
515 /// program in a more fine-grained way.  We have determined that this
516 /// transformation is safe already.  We return the first global variable we
517 /// insert so that the caller can reprocess it.
518 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
519   assert(GV->hasLocalLinkage());
520 
521   // Collect types to split into.
522   DenseMap<uint64_t, GlobalPart> Parts;
523   if (!collectSRATypes(Parts, GV, DL) || Parts.empty())
524     return nullptr;
525 
526   // Make sure we don't SRA back to the same type.
527   if (Parts.size() == 1 && Parts.begin()->second.Ty == GV->getValueType())
528     return nullptr;
529 
530   // Don't perform SRA if we would have to split into many globals. Ignore
531   // parts that are either only loaded or only stored, because we expect them
532   // to be optimized away.
533   unsigned NumParts = count_if(Parts, [](const auto &Pair) {
534     return Pair.second.IsLoaded && Pair.second.IsStored;
535   });
536   if (NumParts > 16)
537     return nullptr;
538 
539   // Sort by offset.
540   SmallVector<std::tuple<uint64_t, Type *, Constant *>, 16> TypesVector;
541   for (const auto &Pair : Parts) {
542     TypesVector.push_back(
543         {Pair.first, Pair.second.Ty, Pair.second.Initializer});
544   }
545   sort(TypesVector, llvm::less_first());
546 
547   // Check that the types are non-overlapping.
548   uint64_t Offset = 0;
549   for (const auto &[OffsetForTy, Ty, _] : TypesVector) {
550     // Overlaps with previous type.
551     if (OffsetForTy < Offset)
552       return nullptr;
553 
554     Offset = OffsetForTy + DL.getTypeAllocSize(Ty);
555   }
556 
557   // Some accesses go beyond the end of the global, don't bother.
558   if (Offset > DL.getTypeAllocSize(GV->getValueType()))
559     return nullptr;
560 
561   LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
562 
563   // Get the alignment of the global, either explicit or target-specific.
564   Align StartAlignment =
565       DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
566   uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());
567 
568   // Create replacement globals.
569   DenseMap<uint64_t, GlobalVariable *> NewGlobals;
570   unsigned NameSuffix = 0;
571   for (auto &[OffsetForTy, Ty, Initializer] : TypesVector) {
572     GlobalVariable *NGV = new GlobalVariable(
573         *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
574         Initializer, GV->getName() + "." + Twine(NameSuffix++), GV,
575         GV->getThreadLocalMode(), GV->getAddressSpace());
576     NGV->copyAttributesFrom(GV);
577     NewGlobals.insert({OffsetForTy, NGV});
578 
579     // Calculate the known alignment of the field.  If the original aggregate
580     // had 256 byte alignment for example, something might depend on that:
581     // propagate info to each field.
582     Align NewAlign = commonAlignment(StartAlignment, OffsetForTy);
583     if (NewAlign > DL.getABITypeAlign(Ty))
584       NGV->setAlignment(NewAlign);
585 
586     // Copy over the debug info for the variable.
587     transferSRADebugInfo(GV, NGV, OffsetForTy * 8,
588                          DL.getTypeAllocSizeInBits(Ty), VarSize);
589   }
590 
591   // Replace uses of the original global with uses of the new global.
592   SmallVector<Value *, 16> Worklist;
593   SmallPtrSet<Value *, 16> Visited;
594   SmallVector<WeakTrackingVH, 16> DeadInsts;
595   auto AppendUsers = [&](Value *V) {
596     for (User *U : V->users())
597       if (Visited.insert(U).second)
598         Worklist.push_back(U);
599   };
600   AppendUsers(GV);
601   while (!Worklist.empty()) {
602     Value *V = Worklist.pop_back_val();
603     if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
604         isa<GEPOperator>(V)) {
605       AppendUsers(V);
606       if (isa<Instruction>(V))
607         DeadInsts.push_back(V);
608       continue;
609     }
610 
611     if (Value *Ptr = getLoadStorePointerOperand(V)) {
612       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
613       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
614                                                    /* AllowNonInbounds */ true);
615       assert(Ptr == GV && "Load/store must be from/to global");
616       GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
617       assert(NGV && "Must have replacement global for this offset");
618 
619       // Update the pointer operand and recalculate alignment.
620       Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
621       Align NewAlign =
622           getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));
623 
624       if (auto *LI = dyn_cast<LoadInst>(V)) {
625         LI->setOperand(0, NGV);
626         LI->setAlignment(NewAlign);
627       } else {
628         auto *SI = cast<StoreInst>(V);
629         SI->setOperand(1, NGV);
630         SI->setAlignment(NewAlign);
631       }
632       continue;
633     }
634 
635     assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
636            "Other users can only be dead constants");
637   }
638 
639   // Delete old instructions and global.
640   RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
641   GV->removeDeadConstantUsers();
642   GV->eraseFromParent();
643   ++NumSRA;
644 
645   assert(NewGlobals.size() > 0);
646   return NewGlobals.begin()->second;
647 }
648 
649 /// Return true if all users of the specified value will trap if the value is
650 /// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
651 /// reprocessing them.
652 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
653                                         SmallPtrSetImpl<const PHINode*> &PHIs) {
654   for (const User *U : V->users()) {
655     if (const Instruction *I = dyn_cast<Instruction>(U)) {
656       // If null pointer is considered valid, then all uses are non-trapping.
657       // Non address-space 0 globals have already been pruned by the caller.
658       if (NullPointerIsDefined(I->getFunction()))
659         return false;
660     }
661     if (isa<LoadInst>(U)) {
662       // Will trap.
663     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
664       if (SI->getOperand(0) == V) {
665         return false;  // Storing the value.
666       }
667     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
668       if (CI->getCalledOperand() != V) {
669         return false;  // Not calling the ptr
670       }
671     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
672       if (II->getCalledOperand() != V) {
673         return false;  // Not calling the ptr
674       }
675     } else if (const AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(U)) {
676       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs))
677         return false;
678     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
679       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
680     } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
681       // If we've already seen this phi node, ignore it, it has already been
682       // checked.
683       if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
684         return false;
685     } else if (isa<ICmpInst>(U) &&
686                !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
687                isa<LoadInst>(U->getOperand(0)) &&
688                isa<ConstantPointerNull>(U->getOperand(1))) {
689       assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
690                                   ->getPointerOperand()
691                                   ->stripPointerCasts()) &&
692              "Should be GlobalVariable");
693       // This and only this kind of non-signed ICmpInst is to be replaced with
694       // the comparing of the value of the created global init bool later in
695       // optimizeGlobalAddressOfAllocation for the global variable.
696     } else {
697       return false;
698     }
699   }
700   return true;
701 }
702 
703 /// Return true if all uses of any loads from GV will trap if the loaded value
704 /// is null.  Note that this also permits comparisons of the loaded value
705 /// against null, as a special case.
706 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
707   SmallVector<const Value *, 4> Worklist;
708   Worklist.push_back(GV);
709   while (!Worklist.empty()) {
710     const Value *P = Worklist.pop_back_val();
711     for (const auto *U : P->users()) {
712       if (auto *LI = dyn_cast<LoadInst>(U)) {
713         SmallPtrSet<const PHINode *, 8> PHIs;
714         if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
715           return false;
716       } else if (auto *SI = dyn_cast<StoreInst>(U)) {
717         // Ignore stores to the global.
718         if (SI->getPointerOperand() != P)
719           return false;
720       } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
721         if (CE->stripPointerCasts() != GV)
722           return false;
723         // Check further the ConstantExpr.
724         Worklist.push_back(CE);
725       } else {
726         // We don't know or understand this user, bail out.
727         return false;
728       }
729     }
730   }
731 
732   return true;
733 }
734 
735 /// Get all the loads/store uses for global variable \p GV.
736 static void allUsesOfLoadAndStores(GlobalVariable *GV,
737                                    SmallVector<Value *, 4> &Uses) {
738   SmallVector<Value *, 4> Worklist;
739   Worklist.push_back(GV);
740   while (!Worklist.empty()) {
741     auto *P = Worklist.pop_back_val();
742     for (auto *U : P->users()) {
743       if (auto *CE = dyn_cast<ConstantExpr>(U)) {
744         Worklist.push_back(CE);
745         continue;
746       }
747 
748       assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
749              "Expect only load or store instructions");
750       Uses.push_back(U);
751     }
752   }
753 }
754 
755 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
756   bool Changed = false;
757   for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
758     Instruction *I = cast<Instruction>(*UI++);
759     // Uses are non-trapping if null pointer is considered valid.
760     // Non address-space 0 globals are already pruned by the caller.
761     if (NullPointerIsDefined(I->getFunction()))
762       return false;
763     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
764       LI->setOperand(0, NewV);
765       Changed = true;
766     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
767       if (SI->getOperand(1) == V) {
768         SI->setOperand(1, NewV);
769         Changed = true;
770       }
771     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
772       CallBase *CB = cast<CallBase>(I);
773       if (CB->getCalledOperand() == V) {
774         // Calling through the pointer!  Turn into a direct call, but be careful
775         // that the pointer is not also being passed as an argument.
776         CB->setCalledOperand(NewV);
777         Changed = true;
778         bool PassedAsArg = false;
779         for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
780           if (CB->getArgOperand(i) == V) {
781             PassedAsArg = true;
782             CB->setArgOperand(i, NewV);
783           }
784 
785         if (PassedAsArg) {
786           // Being passed as an argument also.  Be careful to not invalidate UI!
787           UI = V->user_begin();
788         }
789       }
790     } else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(I)) {
791       Changed |= OptimizeAwayTrappingUsesOfValue(
792           CI, ConstantExpr::getAddrSpaceCast(NewV, CI->getType()));
793       if (CI->use_empty()) {
794         Changed = true;
795         CI->eraseFromParent();
796       }
797     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
798       // Should handle GEP here.
799       SmallVector<Constant*, 8> Idxs;
800       Idxs.reserve(GEPI->getNumOperands()-1);
801       for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
802            i != e; ++i)
803         if (Constant *C = dyn_cast<Constant>(*i))
804           Idxs.push_back(C);
805         else
806           break;
807       if (Idxs.size() == GEPI->getNumOperands()-1)
808         Changed |= OptimizeAwayTrappingUsesOfValue(
809             GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
810                                                  NewV, Idxs));
811       if (GEPI->use_empty()) {
812         Changed = true;
813         GEPI->eraseFromParent();
814       }
815     }
816   }
817 
818   return Changed;
819 }
820 
821 /// The specified global has only one non-null value stored into it.  If there
822 /// are uses of the loaded value that would trap if the loaded value is
823 /// dynamically null, then we know that they cannot be reachable with a null
824 /// optimize away the load.
825 static bool OptimizeAwayTrappingUsesOfLoads(
826     GlobalVariable *GV, Constant *LV, const DataLayout &DL,
827     function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
828   bool Changed = false;
829 
830   // Keep track of whether we are able to remove all the uses of the global
831   // other than the store that defines it.
832   bool AllNonStoreUsesGone = true;
833 
834   // Replace all uses of loads with uses of uses of the stored value.
835   for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
836     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
837       Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
838       // If we were able to delete all uses of the loads
839       if (LI->use_empty()) {
840         LI->eraseFromParent();
841         Changed = true;
842       } else {
843         AllNonStoreUsesGone = false;
844       }
845     } else if (isa<StoreInst>(GlobalUser)) {
846       // Ignore the store that stores "LV" to the global.
847       assert(GlobalUser->getOperand(1) == GV &&
848              "Must be storing *to* the global");
849     } else {
850       AllNonStoreUsesGone = false;
851 
852       // If we get here we could have other crazy uses that are transitively
853       // loaded.
854       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
855               isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
856               isa<BitCastInst>(GlobalUser) ||
857               isa<GetElementPtrInst>(GlobalUser) ||
858               isa<AddrSpaceCastInst>(GlobalUser)) &&
859              "Only expect load and stores!");
860     }
861   }
862 
863   if (Changed) {
864     LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
865                       << "\n");
866     ++NumGlobUses;
867   }
868 
869   // If we nuked all of the loads, then none of the stores are needed either,
870   // nor is the global.
871   if (AllNonStoreUsesGone) {
872     if (isLeakCheckerRoot(GV)) {
873       Changed |= CleanupPointerRootUsers(GV, GetTLI);
874     } else {
875       Changed = true;
876       CleanupConstantGlobalUsers(GV, DL);
877     }
878     if (GV->use_empty()) {
879       LLVM_DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
880       Changed = true;
881       GV->eraseFromParent();
882       ++NumDeleted;
883     }
884   }
885   return Changed;
886 }
887 
888 /// Walk the use list of V, constant folding all of the instructions that are
889 /// foldable.
890 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
891                                 TargetLibraryInfo *TLI) {
892   for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
893     if (Instruction *I = dyn_cast<Instruction>(*UI++))
894       if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
895         I->replaceAllUsesWith(NewC);
896 
897         // Advance UI to the next non-I use to avoid invalidating it!
898         // Instructions could multiply use V.
899         while (UI != E && *UI == I)
900           ++UI;
901         if (isInstructionTriviallyDead(I, TLI))
902           I->eraseFromParent();
903       }
904 }
905 
906 /// This function takes the specified global variable, and transforms the
907 /// program as if it always contained the result of the specified malloc.
908 /// Because it is always the result of the specified malloc, there is no reason
909 /// to actually DO the malloc.  Instead, turn the malloc into a global, and any
910 /// loads of GV as uses of the new global.
911 static GlobalVariable *
912 OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
913                                   uint64_t AllocSize, Constant *InitVal,
914                                   const DataLayout &DL,
915                                   TargetLibraryInfo *TLI) {
916   LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI
917                     << '\n');
918 
919   // Create global of type [AllocSize x i8].
920   Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
921                                     AllocSize);
922 
923   // Create the new global variable.  The contents of the allocated memory is
924   // undefined initially, so initialize with an undef value.
925   GlobalVariable *NewGV = new GlobalVariable(
926       *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
927       UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
928       GV->getThreadLocalMode());
929 
930   // Initialize the global at the point of the original call.  Note that this
931   // is a different point from the initialization referred to below for the
932   // nullability handling.  Sublety: We have not proven the original global was
933   // only initialized once.  As such, we can not fold this into the initializer
934   // of the new global as may need to re-init the storage multiple times.
935   if (!isa<UndefValue>(InitVal)) {
936     IRBuilder<> Builder(CI->getNextNode());
937     // TODO: Use alignment above if align!=1
938     Builder.CreateMemSet(NewGV, InitVal, AllocSize, std::nullopt);
939   }
940 
941   // Update users of the allocation to use the new global instead.
942   CI->replaceAllUsesWith(NewGV);
943 
944   // If there is a comparison against null, we will insert a global bool to
945   // keep track of whether the global was initialized yet or not.
946   GlobalVariable *InitBool =
947     new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
948                        GlobalValue::InternalLinkage,
949                        ConstantInt::getFalse(GV->getContext()),
950                        GV->getName()+".init", GV->getThreadLocalMode());
951   bool InitBoolUsed = false;
952 
953   // Loop over all instruction uses of GV, processing them in turn.
954   SmallVector<Value *, 4> Guses;
955   allUsesOfLoadAndStores(GV, Guses);
956   for (auto *U : Guses) {
957     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
958       // The global is initialized when the store to it occurs. If the stored
959       // value is null value, the global bool is set to false, otherwise true.
960       new StoreInst(ConstantInt::getBool(
961                         GV->getContext(),
962                         !isa<ConstantPointerNull>(SI->getValueOperand())),
963                     InitBool, false, Align(1), SI->getOrdering(),
964                     SI->getSyncScopeID(), SI->getIterator());
965       SI->eraseFromParent();
966       continue;
967     }
968 
969     LoadInst *LI = cast<LoadInst>(U);
970     while (!LI->use_empty()) {
971       Use &LoadUse = *LI->use_begin();
972       ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
973       if (!ICI) {
974         LoadUse.set(NewGV);
975         continue;
976       }
977 
978       // Replace the cmp X, 0 with a use of the bool value.
979       Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
980                                InitBool->getName() + ".val", false, Align(1),
981                                LI->getOrdering(), LI->getSyncScopeID(),
982                                LI->getIterator());
983       InitBoolUsed = true;
984       switch (ICI->getPredicate()) {
985       default: llvm_unreachable("Unknown ICmp Predicate!");
986       case ICmpInst::ICMP_ULT: // X < null -> always false
987         LV = ConstantInt::getFalse(GV->getContext());
988         break;
989       case ICmpInst::ICMP_UGE: // X >= null -> always true
990         LV = ConstantInt::getTrue(GV->getContext());
991         break;
992       case ICmpInst::ICMP_ULE:
993       case ICmpInst::ICMP_EQ:
994         LV = BinaryOperator::CreateNot(LV, "notinit", ICI->getIterator());
995         break;
996       case ICmpInst::ICMP_NE:
997       case ICmpInst::ICMP_UGT:
998         break;  // no change.
999       }
1000       ICI->replaceAllUsesWith(LV);
1001       ICI->eraseFromParent();
1002     }
1003     LI->eraseFromParent();
1004   }
1005 
1006   // If the initialization boolean was used, insert it, otherwise delete it.
1007   if (!InitBoolUsed) {
1008     while (!InitBool->use_empty())  // Delete initializations
1009       cast<StoreInst>(InitBool->user_back())->eraseFromParent();
1010     delete InitBool;
1011   } else
1012     GV->getParent()->insertGlobalVariable(GV->getIterator(), InitBool);
1013 
1014   // Now the GV is dead, nuke it and the allocation..
1015   GV->eraseFromParent();
1016   CI->eraseFromParent();
1017 
1018   // To further other optimizations, loop over all users of NewGV and try to
1019   // constant prop them.  This will promote GEP instructions with constant
1020   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
1021   ConstantPropUsersOf(NewGV, DL, TLI);
1022 
1023   return NewGV;
1024 }
1025 
1026 /// Scan the use-list of GV checking to make sure that there are no complex uses
1027 /// of GV.  We permit simple things like dereferencing the pointer, but not
1028 /// storing through the address, unless it is to the specified global.
1029 static bool
1030 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
1031                                           const GlobalVariable *GV) {
1032   SmallPtrSet<const Value *, 4> Visited;
1033   SmallVector<const Value *, 4> Worklist;
1034   Worklist.push_back(CI);
1035 
1036   while (!Worklist.empty()) {
1037     const Value *V = Worklist.pop_back_val();
1038     if (!Visited.insert(V).second)
1039       continue;
1040 
1041     for (const Use &VUse : V->uses()) {
1042       const User *U = VUse.getUser();
1043       if (isa<LoadInst>(U) || isa<CmpInst>(U))
1044         continue; // Fine, ignore.
1045 
1046       if (auto *SI = dyn_cast<StoreInst>(U)) {
1047         if (SI->getValueOperand() == V &&
1048             SI->getPointerOperand()->stripPointerCasts() != GV)
1049           return false; // Storing the pointer not into GV... bad.
1050         continue; // Otherwise, storing through it, or storing into GV... fine.
1051       }
1052 
1053       if (auto *BCI = dyn_cast<BitCastInst>(U)) {
1054         Worklist.push_back(BCI);
1055         continue;
1056       }
1057 
1058       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1059         Worklist.push_back(GEPI);
1060         continue;
1061       }
1062 
1063       return false;
1064     }
1065   }
1066 
1067   return true;
1068 }
1069 
1070 /// If we have a global that is only initialized with a fixed size allocation
1071 /// try to transform the program to use global memory instead of heap
1072 /// allocated memory. This eliminates dynamic allocation, avoids an indirection
1073 /// accessing the data, and exposes the resultant global to further GlobalOpt.
1074 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
1075                                                    CallInst *CI,
1076                                                    const DataLayout &DL,
1077                                                    TargetLibraryInfo *TLI) {
1078   if (!isRemovableAlloc(CI, TLI))
1079     // Must be able to remove the call when we get done..
1080     return false;
1081 
1082   Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
1083   Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
1084   if (!InitVal)
1085     // Must be able to emit a memset for initialization
1086     return false;
1087 
1088   uint64_t AllocSize;
1089   if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
1090     return false;
1091 
1092   // Restrict this transformation to only working on small allocations
1093   // (2048 bytes currently), as we don't want to introduce a 16M global or
1094   // something.
1095   if (AllocSize >= 2048)
1096     return false;
1097 
1098   // We can't optimize this global unless all uses of it are *known* to be
1099   // of the malloc value, not of the null initializer value (consider a use
1100   // that compares the global's value against zero to see if the malloc has
1101   // been reached).  To do this, we check to see if all uses of the global
1102   // would trap if the global were null: this proves that they must all
1103   // happen after the malloc.
1104   if (!allUsesOfLoadedValueWillTrapIfNull(GV))
1105     return false;
1106 
1107   // We can't optimize this if the malloc itself is used in a complex way,
1108   // for example, being stored into multiple globals.  This allows the
1109   // malloc to be stored into the specified global, loaded, gep, icmp'd.
1110   // These are all things we could transform to using the global for.
1111   if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
1112     return false;
1113 
1114   OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
1115   return true;
1116 }
1117 
1118 // Try to optimize globals based on the knowledge that only one value (besides
1119 // its initializer) is ever stored to the global.
1120 static bool
1121 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1122                          const DataLayout &DL,
1123                          function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1124   // Ignore no-op GEPs and bitcasts.
1125   StoredOnceVal = StoredOnceVal->stripPointerCasts();
1126 
1127   // If we are dealing with a pointer global that is initialized to null and
1128   // only has one (non-null) value stored into it, then we can optimize any
1129   // users of the loaded value (often calls and loads) that would trap if the
1130   // value was null.
1131   if (GV->getInitializer()->getType()->isPointerTy() &&
1132       GV->getInitializer()->isNullValue() &&
1133       StoredOnceVal->getType()->isPointerTy() &&
1134       !NullPointerIsDefined(
1135           nullptr /* F */,
1136           GV->getInitializer()->getType()->getPointerAddressSpace())) {
1137     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1138       // Optimize away any trapping uses of the loaded value.
1139       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1140         return true;
1141     } else if (isAllocationFn(StoredOnceVal, GetTLI)) {
1142       if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
1143         auto *TLI = &GetTLI(*CI->getFunction());
1144         if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
1145           return true;
1146       }
1147     }
1148   }
1149 
1150   return false;
1151 }
1152 
1153 /// At this point, we have learned that the only two values ever stored into GV
1154 /// are its initializer and OtherVal.  See if we can shrink the global into a
1155 /// boolean and select between the two values whenever it is used.  This exposes
1156 /// the values to other scalar optimizations.
1157 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1158   Type *GVElType = GV->getValueType();
1159 
1160   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1161   // an FP value, pointer or vector, don't do this optimization because a select
1162   // between them is very expensive and unlikely to lead to later
1163   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1164   // where v1 and v2 both require constant pool loads, a big loss.
1165   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1166       GVElType->isFloatingPointTy() ||
1167       GVElType->isPointerTy() || GVElType->isVectorTy())
1168     return false;
1169 
1170   // Walk the use list of the global seeing if all the uses are load or store.
1171   // If there is anything else, bail out.
1172   for (User *U : GV->users()) {
1173     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1174       return false;
1175     if (getLoadStoreType(U) != GVElType)
1176       return false;
1177   }
1178 
1179   LLVM_DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
1180 
1181   // Create the new global, initializing it to false.
1182   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1183                                              false,
1184                                              GlobalValue::InternalLinkage,
1185                                         ConstantInt::getFalse(GV->getContext()),
1186                                              GV->getName()+".b",
1187                                              GV->getThreadLocalMode(),
1188                                              GV->getType()->getAddressSpace());
1189   NewGV->copyAttributesFrom(GV);
1190   GV->getParent()->insertGlobalVariable(GV->getIterator(), NewGV);
1191 
1192   Constant *InitVal = GV->getInitializer();
1193   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1194          "No reason to shrink to bool!");
1195 
1196   SmallVector<DIGlobalVariableExpression *, 1> GVs;
1197   GV->getDebugInfo(GVs);
1198 
1199   // If initialized to zero and storing one into the global, we can use a cast
1200   // instead of a select to synthesize the desired value.
1201   bool IsOneZero = false;
1202   bool EmitOneOrZero = true;
1203   auto *CI = dyn_cast<ConstantInt>(OtherVal);
1204   if (CI && CI->getValue().getActiveBits() <= 64) {
1205     IsOneZero = InitVal->isNullValue() && CI->isOne();
1206 
1207     auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1208     if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1209       uint64_t ValInit = CIInit->getZExtValue();
1210       uint64_t ValOther = CI->getZExtValue();
1211       uint64_t ValMinus = ValOther - ValInit;
1212 
1213       for(auto *GVe : GVs){
1214         DIGlobalVariable *DGV = GVe->getVariable();
1215         DIExpression *E = GVe->getExpression();
1216         const DataLayout &DL = GV->getDataLayout();
1217         unsigned SizeInOctets =
1218             DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;
1219 
1220         // It is expected that the address of global optimized variable is on
1221         // top of the stack. After optimization, value of that variable will
1222         // be ether 0 for initial value or 1 for other value. The following
1223         // expression should return constant integer value depending on the
1224         // value at global object address:
1225         // val * (ValOther - ValInit) + ValInit:
1226         // DW_OP_deref DW_OP_constu <ValMinus>
1227         // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1228         SmallVector<uint64_t, 12> Ops = {
1229             dwarf::DW_OP_deref_size, SizeInOctets,
1230             dwarf::DW_OP_constu, ValMinus,
1231             dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1232             dwarf::DW_OP_plus};
1233         bool WithStackValue = true;
1234         E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1235         DIGlobalVariableExpression *DGVE =
1236           DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1237         NewGV->addDebugInfo(DGVE);
1238      }
1239      EmitOneOrZero = false;
1240     }
1241   }
1242 
1243   if (EmitOneOrZero) {
1244      // FIXME: This will only emit address for debugger on which will
1245      // be written only 0 or 1.
1246      for(auto *GV : GVs)
1247        NewGV->addDebugInfo(GV);
1248    }
1249 
1250   while (!GV->use_empty()) {
1251     Instruction *UI = cast<Instruction>(GV->user_back());
1252     if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1253       // Change the store into a boolean store.
1254       bool StoringOther = SI->getOperand(0) == OtherVal;
1255       // Only do this if we weren't storing a loaded value.
1256       Value *StoreVal;
1257       if (StoringOther || SI->getOperand(0) == InitVal) {
1258         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1259                                     StoringOther);
1260       } else {
1261         // Otherwise, we are storing a previously loaded copy.  To do this,
1262         // change the copy from copying the original value to just copying the
1263         // bool.
1264         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1265 
1266         // If we've already replaced the input, StoredVal will be a cast or
1267         // select instruction.  If not, it will be a load of the original
1268         // global.
1269         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1270           assert(LI->getOperand(0) == GV && "Not a copy!");
1271           // Insert a new load, to preserve the saved value.
1272           StoreVal =
1273               new LoadInst(NewGV->getValueType(), NewGV, LI->getName() + ".b",
1274                            false, Align(1), LI->getOrdering(),
1275                            LI->getSyncScopeID(), LI->getIterator());
1276         } else {
1277           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1278                  "This is not a form that we understand!");
1279           StoreVal = StoredVal->getOperand(0);
1280           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1281         }
1282       }
1283       StoreInst *NSI =
1284           new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1285                         SI->getSyncScopeID(), SI->getIterator());
1286       NSI->setDebugLoc(SI->getDebugLoc());
1287     } else {
1288       // Change the load into a load of bool then a select.
1289       LoadInst *LI = cast<LoadInst>(UI);
1290       LoadInst *NLI = new LoadInst(
1291           NewGV->getValueType(), NewGV, LI->getName() + ".b", false, Align(1),
1292           LI->getOrdering(), LI->getSyncScopeID(), LI->getIterator());
1293       Instruction *NSI;
1294       if (IsOneZero)
1295         NSI = new ZExtInst(NLI, LI->getType(), "", LI->getIterator());
1296       else
1297         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI->getIterator());
1298       NSI->takeName(LI);
1299       // Since LI is split into two instructions, NLI and NSI both inherit the
1300       // same DebugLoc
1301       NLI->setDebugLoc(LI->getDebugLoc());
1302       NSI->setDebugLoc(LI->getDebugLoc());
1303       LI->replaceAllUsesWith(NSI);
1304     }
1305     UI->eraseFromParent();
1306   }
1307 
1308   // Retain the name of the old global variable. People who are debugging their
1309   // programs may expect these variables to be named the same.
1310   NewGV->takeName(GV);
1311   GV->eraseFromParent();
1312   return true;
1313 }
1314 
1315 static bool
1316 deleteIfDead(GlobalValue &GV,
1317              SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1318              function_ref<void(Function &)> DeleteFnCallback = nullptr) {
1319   GV.removeDeadConstantUsers();
1320 
1321   if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1322     return false;
1323 
1324   if (const Comdat *C = GV.getComdat())
1325     if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1326       return false;
1327 
1328   bool Dead;
1329   if (auto *F = dyn_cast<Function>(&GV))
1330     Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1331   else
1332     Dead = GV.use_empty();
1333   if (!Dead)
1334     return false;
1335 
1336   LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1337   if (auto *F = dyn_cast<Function>(&GV)) {
1338     if (DeleteFnCallback)
1339       DeleteFnCallback(*F);
1340   }
1341   GV.eraseFromParent();
1342   ++NumDeleted;
1343   return true;
1344 }
1345 
1346 static bool isPointerValueDeadOnEntryToFunction(
1347     const Function *F, GlobalValue *GV,
1348     function_ref<DominatorTree &(Function &)> LookupDomTree) {
1349   // Find all uses of GV. We expect them all to be in F, and if we can't
1350   // identify any of the uses we bail out.
1351   //
1352   // On each of these uses, identify if the memory that GV points to is
1353   // used/required/live at the start of the function. If it is not, for example
1354   // if the first thing the function does is store to the GV, the GV can
1355   // possibly be demoted.
1356   //
1357   // We don't do an exhaustive search for memory operations - simply look
1358   // through bitcasts as they're quite common and benign.
1359   const DataLayout &DL = GV->getDataLayout();
1360   SmallVector<LoadInst *, 4> Loads;
1361   SmallVector<StoreInst *, 4> Stores;
1362   for (auto *U : GV->users()) {
1363     Instruction *I = dyn_cast<Instruction>(U);
1364     if (!I)
1365       return false;
1366     assert(I->getParent()->getParent() == F);
1367 
1368     if (auto *LI = dyn_cast<LoadInst>(I))
1369       Loads.push_back(LI);
1370     else if (auto *SI = dyn_cast<StoreInst>(I))
1371       Stores.push_back(SI);
1372     else
1373       return false;
1374   }
1375 
1376   // We have identified all uses of GV into loads and stores. Now check if all
1377   // of them are known not to depend on the value of the global at the function
1378   // entry point. We do this by ensuring that every load is dominated by at
1379   // least one store.
1380   auto &DT = LookupDomTree(*const_cast<Function *>(F));
1381 
1382   // The below check is quadratic. Check we're not going to do too many tests.
1383   // FIXME: Even though this will always have worst-case quadratic time, we
1384   // could put effort into minimizing the average time by putting stores that
1385   // have been shown to dominate at least one load at the beginning of the
1386   // Stores array, making subsequent dominance checks more likely to succeed
1387   // early.
1388   //
1389   // The threshold here is fairly large because global->local demotion is a
1390   // very powerful optimization should it fire.
1391   const unsigned Threshold = 100;
1392   if (Loads.size() * Stores.size() > Threshold)
1393     return false;
1394 
1395   for (auto *L : Loads) {
1396     auto *LTy = L->getType();
1397     if (none_of(Stores, [&](const StoreInst *S) {
1398           auto *STy = S->getValueOperand()->getType();
1399           // The load is only dominated by the store if DomTree says so
1400           // and the number of bits loaded in L is less than or equal to
1401           // the number of bits stored in S.
1402           return DT.dominates(S, L) &&
1403                  DL.getTypeStoreSize(LTy).getFixedValue() <=
1404                      DL.getTypeStoreSize(STy).getFixedValue();
1405         }))
1406       return false;
1407   }
1408   // All loads have known dependences inside F, so the global can be localized.
1409   return true;
1410 }
1411 
1412 // For a global variable with one store, if the store dominates any loads,
1413 // those loads will always load the stored value (as opposed to the
1414 // initializer), even in the presence of recursion.
1415 static bool forwardStoredOnceStore(
1416     GlobalVariable *GV, const StoreInst *StoredOnceStore,
1417     function_ref<DominatorTree &(Function &)> LookupDomTree) {
1418   const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
1419   // We can do this optimization for non-constants in nosync + norecurse
1420   // functions, but globals used in exactly one norecurse functions are already
1421   // promoted to an alloca.
1422   if (!isa<Constant>(StoredOnceValue))
1423     return false;
1424   const Function *F = StoredOnceStore->getFunction();
1425   SmallVector<LoadInst *> Loads;
1426   for (User *U : GV->users()) {
1427     if (auto *LI = dyn_cast<LoadInst>(U)) {
1428       if (LI->getFunction() == F &&
1429           LI->getType() == StoredOnceValue->getType() && LI->isSimple())
1430         Loads.push_back(LI);
1431     }
1432   }
1433   // Only compute DT if we have any loads to examine.
1434   bool MadeChange = false;
1435   if (!Loads.empty()) {
1436     auto &DT = LookupDomTree(*const_cast<Function *>(F));
1437     for (auto *LI : Loads) {
1438       if (DT.dominates(StoredOnceStore, LI)) {
1439         LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
1440         LI->eraseFromParent();
1441         MadeChange = true;
1442       }
1443     }
1444   }
1445   return MadeChange;
1446 }
1447 
1448 /// Analyze the specified global variable and optimize
1449 /// it if possible.  If we make a change, return true.
1450 static bool
1451 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1452                       function_ref<TargetTransformInfo &(Function &)> GetTTI,
1453                       function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1454                       function_ref<DominatorTree &(Function &)> LookupDomTree) {
1455   auto &DL = GV->getDataLayout();
1456   // If this is a first class global and has only one accessing function and
1457   // this function is non-recursive, we replace the global with a local alloca
1458   // in this function.
1459   //
1460   // NOTE: It doesn't make sense to promote non-single-value types since we
1461   // are just replacing static memory to stack memory.
1462   //
1463   // If the global is in different address space, don't bring it to stack.
1464   if (!GS.HasMultipleAccessingFunctions &&
1465       GS.AccessingFunction &&
1466       GV->getValueType()->isSingleValueType() &&
1467       GV->getType()->getAddressSpace() == DL.getAllocaAddrSpace() &&
1468       !GV->isExternallyInitialized() &&
1469       GS.AccessingFunction->doesNotRecurse() &&
1470       isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1471                                           LookupDomTree)) {
1472     const DataLayout &DL = GV->getDataLayout();
1473 
1474     LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1475     BasicBlock::iterator FirstI =
1476         GS.AccessingFunction->getEntryBlock().begin().getNonConst();
1477     Type *ElemTy = GV->getValueType();
1478     // FIXME: Pass Global's alignment when globals have alignment
1479     AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(),
1480                                         nullptr, GV->getName(), FirstI);
1481     if (!isa<UndefValue>(GV->getInitializer()))
1482       new StoreInst(GV->getInitializer(), Alloca, FirstI);
1483 
1484     GV->replaceAllUsesWith(Alloca);
1485     GV->eraseFromParent();
1486     ++NumLocalized;
1487     return true;
1488   }
1489 
1490   bool Changed = false;
1491 
1492   // If the global is never loaded (but may be stored to), it is dead.
1493   // Delete it now.
1494   if (!GS.IsLoaded) {
1495     LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1496 
1497     if (isLeakCheckerRoot(GV)) {
1498       // Delete any constant stores to the global.
1499       Changed = CleanupPointerRootUsers(GV, GetTLI);
1500     } else {
1501       // Delete any stores we can find to the global.  We may not be able to
1502       // make it completely dead though.
1503       Changed = CleanupConstantGlobalUsers(GV, DL);
1504     }
1505 
1506     // If the global is dead now, delete it.
1507     if (GV->use_empty()) {
1508       GV->eraseFromParent();
1509       ++NumDeleted;
1510       Changed = true;
1511     }
1512     return Changed;
1513 
1514   }
1515   if (GS.StoredType <= GlobalStatus::InitializerStored) {
1516     LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1517 
1518     // Don't actually mark a global constant if it's atomic because atomic loads
1519     // are implemented by a trivial cmpxchg in some edge-cases and that usually
1520     // requires write access to the variable even if it's not actually changed.
1521     if (GS.Ordering == AtomicOrdering::NotAtomic) {
1522       assert(!GV->isConstant() && "Expected a non-constant global");
1523       GV->setConstant(true);
1524       Changed = true;
1525     }
1526 
1527     // Clean up any obviously simplifiable users now.
1528     Changed |= CleanupConstantGlobalUsers(GV, DL);
1529 
1530     // If the global is dead now, just nuke it.
1531     if (GV->use_empty()) {
1532       LLVM_DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1533                         << "all users and delete global!\n");
1534       GV->eraseFromParent();
1535       ++NumDeleted;
1536       return true;
1537     }
1538 
1539     // Fall through to the next check; see if we can optimize further.
1540     ++NumMarked;
1541   }
1542   if (!GV->getInitializer()->getType()->isSingleValueType()) {
1543     const DataLayout &DL = GV->getDataLayout();
1544     if (SRAGlobal(GV, DL))
1545       return true;
1546   }
1547   Value *StoredOnceValue = GS.getStoredOnceValue();
1548   if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
1549     Function &StoreFn =
1550         const_cast<Function &>(*GS.StoredOnceStore->getFunction());
1551     bool CanHaveNonUndefGlobalInitializer =
1552         GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
1553             GV->getType()->getAddressSpace());
1554     // If the initial value for the global was an undef value, and if only
1555     // one other value was stored into it, we can just change the
1556     // initializer to be the stored value, then delete all stores to the
1557     // global.  This allows us to mark it constant.
1558     // This is restricted to address spaces that allow globals to have
1559     // initializers. NVPTX, for example, does not support initializers for
1560     // shared memory (AS 3).
1561     auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
1562     if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
1563         DL.getTypeAllocSize(SOVConstant->getType()) ==
1564             DL.getTypeAllocSize(GV->getValueType()) &&
1565         CanHaveNonUndefGlobalInitializer) {
1566       if (SOVConstant->getType() == GV->getValueType()) {
1567         // Change the initializer in place.
1568         GV->setInitializer(SOVConstant);
1569       } else {
1570         // Create a new global with adjusted type.
1571         auto *NGV = new GlobalVariable(
1572             *GV->getParent(), SOVConstant->getType(), GV->isConstant(),
1573             GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
1574             GV->getAddressSpace());
1575         NGV->takeName(GV);
1576         NGV->copyAttributesFrom(GV);
1577         GV->replaceAllUsesWith(NGV);
1578         GV->eraseFromParent();
1579         GV = NGV;
1580       }
1581 
1582       // Clean up any obviously simplifiable users now.
1583       CleanupConstantGlobalUsers(GV, DL);
1584 
1585       if (GV->use_empty()) {
1586         LLVM_DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1587                           << "simplify all users and delete global!\n");
1588         GV->eraseFromParent();
1589         ++NumDeleted;
1590       }
1591       ++NumSubstitute;
1592       return true;
1593     }
1594 
1595     // Try to optimize globals based on the knowledge that only one value
1596     // (besides its initializer) is ever stored to the global.
1597     if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
1598       return true;
1599 
1600     // Try to forward the store to any loads. If we have more than one store, we
1601     // may have a store of the initializer between StoredOnceStore and a load.
1602     if (GS.NumStores == 1)
1603       if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
1604         return true;
1605 
1606     // Otherwise, if the global was not a boolean, we can shrink it to be a
1607     // boolean. Skip this optimization for AS that doesn't allow an initializer.
1608     if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
1609         (!isa<UndefValue>(GV->getInitializer()) ||
1610          CanHaveNonUndefGlobalInitializer)) {
1611       if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1612         ++NumShrunkToBool;
1613         return true;
1614       }
1615     }
1616   }
1617 
1618   return Changed;
1619 }
1620 
1621 /// Analyze the specified global variable and optimize it if possible.  If we
1622 /// make a change, return true.
1623 static bool
1624 processGlobal(GlobalValue &GV,
1625               function_ref<TargetTransformInfo &(Function &)> GetTTI,
1626               function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1627               function_ref<DominatorTree &(Function &)> LookupDomTree) {
1628   if (GV.getName().starts_with("llvm."))
1629     return false;
1630 
1631   GlobalStatus GS;
1632 
1633   if (GlobalStatus::analyzeGlobal(&GV, GS))
1634     return false;
1635 
1636   bool Changed = false;
1637   if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
1638     auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1639                                                : GlobalValue::UnnamedAddr::Local;
1640     if (NewUnnamedAddr != GV.getUnnamedAddr()) {
1641       GV.setUnnamedAddr(NewUnnamedAddr);
1642       NumUnnamed++;
1643       Changed = true;
1644     }
1645   }
1646 
1647   // Do more involved optimizations if the global is internal.
1648   if (!GV.hasLocalLinkage())
1649     return Changed;
1650 
1651   auto *GVar = dyn_cast<GlobalVariable>(&GV);
1652   if (!GVar)
1653     return Changed;
1654 
1655   if (GVar->isConstant() || !GVar->hasInitializer())
1656     return Changed;
1657 
1658   return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
1659          Changed;
1660 }
1661 
1662 /// Walk all of the direct calls of the specified function, changing them to
1663 /// FastCC.
1664 static void ChangeCalleesToFastCall(Function *F) {
1665   for (User *U : F->users()) {
1666     if (isa<BlockAddress>(U))
1667       continue;
1668     cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
1669   }
1670 }
1671 
1672 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
1673                                Attribute::AttrKind A) {
1674   unsigned AttrIndex;
1675   if (Attrs.hasAttrSomewhere(A, &AttrIndex))
1676     return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
1677   return Attrs;
1678 }
1679 
1680 static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
1681   F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
1682   for (User *U : F->users()) {
1683     if (isa<BlockAddress>(U))
1684       continue;
1685     CallBase *CB = cast<CallBase>(U);
1686     CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
1687   }
1688 }
1689 
1690 /// Return true if this is a calling convention that we'd like to change.  The
1691 /// idea here is that we don't want to mess with the convention if the user
1692 /// explicitly requested something with performance implications like coldcc,
1693 /// GHC, or anyregcc.
1694 static bool hasChangeableCCImpl(Function *F) {
1695   CallingConv::ID CC = F->getCallingConv();
1696 
1697   // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1698   if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
1699     return false;
1700 
1701   if (F->isVarArg())
1702     return false;
1703 
1704   // FIXME: Change CC for the whole chain of musttail calls when possible.
1705   //
1706   // Can't change CC of the function that either has musttail calls, or is a
1707   // musttail callee itself
1708   for (User *U : F->users()) {
1709     if (isa<BlockAddress>(U))
1710       continue;
1711     CallInst* CI = dyn_cast<CallInst>(U);
1712     if (!CI)
1713       continue;
1714 
1715     if (CI->isMustTailCall())
1716       return false;
1717   }
1718 
1719   for (BasicBlock &BB : *F)
1720     if (BB.getTerminatingMustTailCall())
1721       return false;
1722 
1723   return !F->hasAddressTaken();
1724 }
1725 
1726 using ChangeableCCCacheTy = SmallDenseMap<Function *, bool, 8>;
1727 static bool hasChangeableCC(Function *F,
1728                             ChangeableCCCacheTy &ChangeableCCCache) {
1729   auto Res = ChangeableCCCache.try_emplace(F, false);
1730   if (Res.second)
1731     Res.first->second = hasChangeableCCImpl(F);
1732   return Res.first->second;
1733 }
1734 
1735 /// Return true if the block containing the call site has a BlockFrequency of
1736 /// less than ColdCCRelFreq% of the entry block.
1737 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
1738   const BranchProbability ColdProb(ColdCCRelFreq, 100);
1739   auto *CallSiteBB = CB.getParent();
1740   auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
1741   auto CallerEntryFreq =
1742       CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
1743   return CallSiteFreq < CallerEntryFreq * ColdProb;
1744 }
1745 
1746 // This function checks if the input function F is cold at all call sites. It
1747 // also looks each call site's containing function, returning false if the
1748 // caller function contains other non cold calls. The input vector AllCallsCold
1749 // contains a list of functions that only have call sites in cold blocks.
1750 static bool
1751 isValidCandidateForColdCC(Function &F,
1752                           function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1753                           const std::vector<Function *> &AllCallsCold) {
1754 
1755   if (F.user_empty())
1756     return false;
1757 
1758   for (User *U : F.users()) {
1759     if (isa<BlockAddress>(U))
1760       continue;
1761 
1762     CallBase &CB = cast<CallBase>(*U);
1763     Function *CallerFunc = CB.getParent()->getParent();
1764     BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1765     if (!isColdCallSite(CB, CallerBFI))
1766       return false;
1767     if (!llvm::is_contained(AllCallsCold, CallerFunc))
1768       return false;
1769   }
1770   return true;
1771 }
1772 
1773 static void changeCallSitesToColdCC(Function *F) {
1774   for (User *U : F->users()) {
1775     if (isa<BlockAddress>(U))
1776       continue;
1777     cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
1778   }
1779 }
1780 
1781 // This function iterates over all the call instructions in the input Function
1782 // and checks that all call sites are in cold blocks and are allowed to use the
1783 // coldcc calling convention.
1784 static bool
1785 hasOnlyColdCalls(Function &F,
1786                  function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1787                  ChangeableCCCacheTy &ChangeableCCCache) {
1788   for (BasicBlock &BB : F) {
1789     for (Instruction &I : BB) {
1790       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1791         // Skip over isline asm instructions since they aren't function calls.
1792         if (CI->isInlineAsm())
1793           continue;
1794         Function *CalledFn = CI->getCalledFunction();
1795         if (!CalledFn)
1796           return false;
1797         // Skip over intrinsics since they won't remain as function calls.
1798         // Important to do this check before the linkage check below so we
1799         // won't bail out on debug intrinsics, possibly making the generated
1800         // code dependent on the presence of debug info.
1801         if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
1802           continue;
1803         if (!CalledFn->hasLocalLinkage())
1804           return false;
1805         // Check if it's valid to use coldcc calling convention.
1806         if (!hasChangeableCC(CalledFn, ChangeableCCCache))
1807           return false;
1808         BlockFrequencyInfo &CallerBFI = GetBFI(F);
1809         if (!isColdCallSite(*CI, CallerBFI))
1810           return false;
1811       }
1812     }
1813   }
1814   return true;
1815 }
1816 
1817 static bool hasMustTailCallers(Function *F) {
1818   for (User *U : F->users()) {
1819     CallBase *CB = dyn_cast<CallBase>(U);
1820     if (!CB) {
1821       assert(isa<BlockAddress>(U) &&
1822              "Expected either CallBase or BlockAddress");
1823       continue;
1824     }
1825     if (CB->isMustTailCall())
1826       return true;
1827   }
1828   return false;
1829 }
1830 
1831 static bool hasInvokeCallers(Function *F) {
1832   for (User *U : F->users())
1833     if (isa<InvokeInst>(U))
1834       return true;
1835   return false;
1836 }
1837 
1838 static void RemovePreallocated(Function *F) {
1839   RemoveAttribute(F, Attribute::Preallocated);
1840 
1841   auto *M = F->getParent();
1842 
1843   IRBuilder<> Builder(M->getContext());
1844 
1845   // Cannot modify users() while iterating over it, so make a copy.
1846   SmallVector<User *, 4> PreallocatedCalls(F->users());
1847   for (User *U : PreallocatedCalls) {
1848     CallBase *CB = dyn_cast<CallBase>(U);
1849     if (!CB)
1850       continue;
1851 
1852     assert(
1853         !CB->isMustTailCall() &&
1854         "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1855     // Create copy of call without "preallocated" operand bundle.
1856     SmallVector<OperandBundleDef, 1> OpBundles;
1857     CB->getOperandBundlesAsDefs(OpBundles);
1858     CallBase *PreallocatedSetup = nullptr;
1859     for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
1860       if (It->getTag() == "preallocated") {
1861         PreallocatedSetup = cast<CallBase>(*It->input_begin());
1862         OpBundles.erase(It);
1863         break;
1864       }
1865     }
1866     assert(PreallocatedSetup && "Did not find preallocated bundle");
1867     uint64_t ArgCount =
1868         cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
1869 
1870     assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
1871            "Unknown indirect call type");
1872     CallBase *NewCB = CallBase::Create(CB, OpBundles, CB->getIterator());
1873     CB->replaceAllUsesWith(NewCB);
1874     NewCB->takeName(CB);
1875     CB->eraseFromParent();
1876 
1877     Builder.SetInsertPoint(PreallocatedSetup);
1878     auto *StackSave = Builder.CreateStackSave();
1879     Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
1880     Builder.CreateStackRestore(StackSave);
1881 
1882     // Replace @llvm.call.preallocated.arg() with alloca.
1883     // Cannot modify users() while iterating over it, so make a copy.
1884     // @llvm.call.preallocated.arg() can be called with the same index multiple
1885     // times. So for each @llvm.call.preallocated.arg(), we see if we have
1886     // already created a Value* for the index, and if not, create an alloca and
1887     // bitcast right after the @llvm.call.preallocated.setup() so that it
1888     // dominates all uses.
1889     SmallVector<Value *, 2> ArgAllocas(ArgCount);
1890     SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
1891     for (auto *User : PreallocatedArgs) {
1892       auto *UseCall = cast<CallBase>(User);
1893       assert(UseCall->getCalledFunction()->getIntrinsicID() ==
1894                  Intrinsic::call_preallocated_arg &&
1895              "preallocated token use was not a llvm.call.preallocated.arg");
1896       uint64_t AllocArgIndex =
1897           cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
1898       Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
1899       if (!AllocaReplacement) {
1900         auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
1901         auto *ArgType =
1902             UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
1903         auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
1904         Builder.SetInsertPoint(InsertBefore);
1905         auto *Alloca =
1906             Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
1907         ArgAllocas[AllocArgIndex] = Alloca;
1908         AllocaReplacement = Alloca;
1909       }
1910 
1911       UseCall->replaceAllUsesWith(AllocaReplacement);
1912       UseCall->eraseFromParent();
1913     }
1914     // Remove @llvm.call.preallocated.setup().
1915     cast<Instruction>(PreallocatedSetup)->eraseFromParent();
1916   }
1917 }
1918 
1919 static bool
1920 OptimizeFunctions(Module &M,
1921                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1922                   function_ref<TargetTransformInfo &(Function &)> GetTTI,
1923                   function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1924                   function_ref<DominatorTree &(Function &)> LookupDomTree,
1925                   SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
1926                   function_ref<void(Function &F)> ChangedCFGCallback,
1927                   function_ref<void(Function &F)> DeleteFnCallback) {
1928 
1929   bool Changed = false;
1930 
1931   ChangeableCCCacheTy ChangeableCCCache;
1932   std::vector<Function *> AllCallsCold;
1933   for (Function &F : llvm::make_early_inc_range(M))
1934     if (hasOnlyColdCalls(F, GetBFI, ChangeableCCCache))
1935       AllCallsCold.push_back(&F);
1936 
1937   // Optimize functions.
1938   for (Function &F : llvm::make_early_inc_range(M)) {
1939     // Don't perform global opt pass on naked functions; we don't want fast
1940     // calling conventions for naked functions.
1941     if (F.hasFnAttribute(Attribute::Naked))
1942       continue;
1943 
1944     // Functions without names cannot be referenced outside this module.
1945     if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
1946       F.setLinkage(GlobalValue::InternalLinkage);
1947 
1948     if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
1949       Changed = true;
1950       continue;
1951     }
1952 
1953     // LLVM's definition of dominance allows instructions that are cyclic
1954     // in unreachable blocks, e.g.:
1955     // %pat = select i1 %condition, @global, i16* %pat
1956     // because any instruction dominates an instruction in a block that's
1957     // not reachable from entry.
1958     // So, remove unreachable blocks from the function, because a) there's
1959     // no point in analyzing them and b) GlobalOpt should otherwise grow
1960     // some more complicated logic to break these cycles.
1961     // Notify the analysis manager that we've modified the function's CFG.
1962     if (!F.isDeclaration()) {
1963       if (removeUnreachableBlocks(F)) {
1964         Changed = true;
1965         ChangedCFGCallback(F);
1966       }
1967     }
1968 
1969     Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
1970 
1971     if (!F.hasLocalLinkage())
1972       continue;
1973 
1974     // If we have an inalloca parameter that we can safely remove the
1975     // inalloca attribute from, do so. This unlocks optimizations that
1976     // wouldn't be safe in the presence of inalloca.
1977     // FIXME: We should also hoist alloca affected by this to the entry
1978     // block if possible.
1979     if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
1980         !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
1981       RemoveAttribute(&F, Attribute::InAlloca);
1982       Changed = true;
1983     }
1984 
1985     // FIXME: handle invokes
1986     // FIXME: handle musttail
1987     if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
1988       if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
1989           !hasInvokeCallers(&F)) {
1990         RemovePreallocated(&F);
1991         Changed = true;
1992       }
1993       continue;
1994     }
1995 
1996     if (hasChangeableCC(&F, ChangeableCCCache)) {
1997       NumInternalFunc++;
1998       TargetTransformInfo &TTI = GetTTI(F);
1999       // Change the calling convention to coldcc if either stress testing is
2000       // enabled or the target would like to use coldcc on functions which are
2001       // cold at all call sites and the callers contain no other non coldcc
2002       // calls.
2003       if (EnableColdCCStressTest ||
2004           (TTI.useColdCCForColdCall(F) &&
2005            isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
2006         ChangeableCCCache.erase(&F);
2007         F.setCallingConv(CallingConv::Cold);
2008         changeCallSitesToColdCC(&F);
2009         Changed = true;
2010         NumColdCC++;
2011       }
2012     }
2013 
2014     if (hasChangeableCC(&F, ChangeableCCCache)) {
2015       // If this function has a calling convention worth changing, is not a
2016       // varargs function, and is only called directly, promote it to use the
2017       // Fast calling convention.
2018       F.setCallingConv(CallingConv::Fast);
2019       ChangeCalleesToFastCall(&F);
2020       ++NumFastCallFns;
2021       Changed = true;
2022     }
2023 
2024     if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2025         !F.hasAddressTaken()) {
2026       // The function is not used by a trampoline intrinsic, so it is safe
2027       // to remove the 'nest' attribute.
2028       RemoveAttribute(&F, Attribute::Nest);
2029       ++NumNestRemoved;
2030       Changed = true;
2031     }
2032   }
2033   return Changed;
2034 }
2035 
2036 static bool
2037 OptimizeGlobalVars(Module &M,
2038                    function_ref<TargetTransformInfo &(Function &)> GetTTI,
2039                    function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2040                    function_ref<DominatorTree &(Function &)> LookupDomTree,
2041                    SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2042   bool Changed = false;
2043 
2044   for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
2045     // Global variables without names cannot be referenced outside this module.
2046     if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
2047       GV.setLinkage(GlobalValue::InternalLinkage);
2048     // Simplify the initializer.
2049     if (GV.hasInitializer())
2050       if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
2051         auto &DL = M.getDataLayout();
2052         // TLI is not used in the case of a Constant, so use default nullptr
2053         // for that optional parameter, since we don't have a Function to
2054         // provide GetTLI anyway.
2055         Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2056         if (New != C)
2057           GV.setInitializer(New);
2058       }
2059 
2060     if (deleteIfDead(GV, NotDiscardableComdats)) {
2061       Changed = true;
2062       continue;
2063     }
2064 
2065     Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
2066   }
2067   return Changed;
2068 }
2069 
2070 /// Evaluate static constructors in the function, if we can.  Return true if we
2071 /// can, false otherwise.
2072 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2073                                       TargetLibraryInfo *TLI) {
2074   // Skip external functions.
2075   if (F->isDeclaration())
2076     return false;
2077   // Call the function.
2078   Evaluator Eval(DL, TLI);
2079   Constant *RetValDummy;
2080   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2081                                            SmallVector<Constant*, 0>());
2082 
2083   if (EvalSuccess) {
2084     ++NumCtorsEvaluated;
2085 
2086     // We succeeded at evaluation: commit the result.
2087     auto NewInitializers = Eval.getMutatedInitializers();
2088     LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2089                       << F->getName() << "' to " << NewInitializers.size()
2090                       << " stores.\n");
2091     for (const auto &Pair : NewInitializers)
2092       Pair.first->setInitializer(Pair.second);
2093     for (GlobalVariable *GV : Eval.getInvariants())
2094       GV->setConstant(true);
2095   }
2096 
2097   return EvalSuccess;
2098 }
2099 
2100 static int compareNames(Constant *const *A, Constant *const *B) {
2101   Value *AStripped = (*A)->stripPointerCasts();
2102   Value *BStripped = (*B)->stripPointerCasts();
2103   return AStripped->getName().compare(BStripped->getName());
2104 }
2105 
2106 static void setUsedInitializer(GlobalVariable &V,
2107                                const SmallPtrSetImpl<GlobalValue *> &Init) {
2108   if (Init.empty()) {
2109     V.eraseFromParent();
2110     return;
2111   }
2112 
2113   // Get address space of pointers in the array of pointers.
2114   const Type *UsedArrayType = V.getValueType();
2115   const auto *VAT = cast<ArrayType>(UsedArrayType);
2116   const auto *VEPT = cast<PointerType>(VAT->getArrayElementType());
2117 
2118   // Type of pointer to the array of pointers.
2119   PointerType *PtrTy =
2120       PointerType::get(V.getContext(), VEPT->getAddressSpace());
2121 
2122   SmallVector<Constant *, 8> UsedArray;
2123   for (GlobalValue *GV : Init) {
2124     Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, PtrTy);
2125     UsedArray.push_back(Cast);
2126   }
2127 
2128   // Sort to get deterministic order.
2129   array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2130   ArrayType *ATy = ArrayType::get(PtrTy, UsedArray.size());
2131 
2132   Module *M = V.getParent();
2133   V.removeFromParent();
2134   GlobalVariable *NV =
2135       new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2136                          ConstantArray::get(ATy, UsedArray), "");
2137   NV->takeName(&V);
2138   NV->setSection("llvm.metadata");
2139   delete &V;
2140 }
2141 
2142 namespace {
2143 
2144 /// An easy to access representation of llvm.used and llvm.compiler.used.
2145 class LLVMUsed {
2146   SmallPtrSet<GlobalValue *, 4> Used;
2147   SmallPtrSet<GlobalValue *, 4> CompilerUsed;
2148   GlobalVariable *UsedV;
2149   GlobalVariable *CompilerUsedV;
2150 
2151 public:
2152   LLVMUsed(Module &M) {
2153     SmallVector<GlobalValue *, 4> Vec;
2154     UsedV = collectUsedGlobalVariables(M, Vec, false);
2155     Used = {Vec.begin(), Vec.end()};
2156     Vec.clear();
2157     CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
2158     CompilerUsed = {Vec.begin(), Vec.end()};
2159   }
2160 
2161   using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
2162   using used_iterator_range = iterator_range<iterator>;
2163 
2164   iterator usedBegin() { return Used.begin(); }
2165   iterator usedEnd() { return Used.end(); }
2166 
2167   used_iterator_range used() {
2168     return used_iterator_range(usedBegin(), usedEnd());
2169   }
2170 
2171   iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2172   iterator compilerUsedEnd() { return CompilerUsed.end(); }
2173 
2174   used_iterator_range compilerUsed() {
2175     return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2176   }
2177 
2178   bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2179 
2180   bool compilerUsedCount(GlobalValue *GV) const {
2181     return CompilerUsed.count(GV);
2182   }
2183 
2184   bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2185   bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2186   bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2187 
2188   bool compilerUsedInsert(GlobalValue *GV) {
2189     return CompilerUsed.insert(GV).second;
2190   }
2191 
2192   void syncVariablesAndSets() {
2193     if (UsedV)
2194       setUsedInitializer(*UsedV, Used);
2195     if (CompilerUsedV)
2196       setUsedInitializer(*CompilerUsedV, CompilerUsed);
2197   }
2198 };
2199 
2200 } // end anonymous namespace
2201 
2202 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2203   if (GA.use_empty()) // No use at all.
2204     return false;
2205 
2206   assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2207          "We should have removed the duplicated "
2208          "element from llvm.compiler.used");
2209   if (!GA.hasOneUse())
2210     // Strictly more than one use. So at least one is not in llvm.used and
2211     // llvm.compiler.used.
2212     return true;
2213 
2214   // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2215   return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2216 }
2217 
2218 static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) {
2219   if (!GV.hasLocalLinkage())
2220     return true;
2221 
2222   return U.usedCount(&GV) || U.compilerUsedCount(&GV);
2223 }
2224 
2225 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2226                              bool &RenameTarget) {
2227   if (GA.isWeakForLinker())
2228     return false;
2229 
2230   RenameTarget = false;
2231   bool Ret = false;
2232   if (hasUseOtherThanLLVMUsed(GA, U))
2233     Ret = true;
2234 
2235   // If the alias is externally visible, we may still be able to simplify it.
2236   if (!mayHaveOtherReferences(GA, U))
2237     return Ret;
2238 
2239   // If the aliasee has internal linkage and no other references (e.g.,
2240   // @llvm.used, @llvm.compiler.used), give it the name and linkage of the
2241   // alias, and delete the alias. This turns:
2242   //   define internal ... @f(...)
2243   //   @a = alias ... @f
2244   // into:
2245   //   define ... @a(...)
2246   Constant *Aliasee = GA.getAliasee();
2247   GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2248   if (mayHaveOtherReferences(*Target, U))
2249     return Ret;
2250 
2251   RenameTarget = true;
2252   return true;
2253 }
2254 
2255 static bool
2256 OptimizeGlobalAliases(Module &M,
2257                       SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2258   bool Changed = false;
2259   LLVMUsed Used(M);
2260 
2261   for (GlobalValue *GV : Used.used())
2262     Used.compilerUsedErase(GV);
2263 
2264   // Return whether GV is explicitly or implicitly dso_local and not replaceable
2265   // by another definition in the current linkage unit.
2266   auto IsModuleLocal = [](GlobalValue &GV) {
2267     return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
2268            (GV.isDSOLocal() || GV.isImplicitDSOLocal());
2269   };
2270 
2271   for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
2272     // Aliases without names cannot be referenced outside this module.
2273     if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
2274       J.setLinkage(GlobalValue::InternalLinkage);
2275 
2276     if (deleteIfDead(J, NotDiscardableComdats)) {
2277       Changed = true;
2278       continue;
2279     }
2280 
2281     // If the alias can change at link time, nothing can be done - bail out.
2282     if (!IsModuleLocal(J))
2283       continue;
2284 
2285     Constant *Aliasee = J.getAliasee();
2286     GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2287     // We can't trivially replace the alias with the aliasee if the aliasee is
2288     // non-trivial in some way. We also can't replace the alias with the aliasee
2289     // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
2290     // alias can be used to access the definition as if preemption did not
2291     // happen.
2292     // TODO: Try to handle non-zero GEPs of local aliasees.
2293     if (!Target || !IsModuleLocal(*Target))
2294       continue;
2295 
2296     Target->removeDeadConstantUsers();
2297 
2298     // Make all users of the alias use the aliasee instead.
2299     bool RenameTarget;
2300     if (!hasUsesToReplace(J, Used, RenameTarget))
2301       continue;
2302 
2303     J.replaceAllUsesWith(Aliasee);
2304     ++NumAliasesResolved;
2305     Changed = true;
2306 
2307     if (RenameTarget) {
2308       // Give the aliasee the name, linkage and other attributes of the alias.
2309       Target->takeName(&J);
2310       Target->setLinkage(J.getLinkage());
2311       Target->setDSOLocal(J.isDSOLocal());
2312       Target->setVisibility(J.getVisibility());
2313       Target->setDLLStorageClass(J.getDLLStorageClass());
2314 
2315       if (Used.usedErase(&J))
2316         Used.usedInsert(Target);
2317 
2318       if (Used.compilerUsedErase(&J))
2319         Used.compilerUsedInsert(Target);
2320     } else if (mayHaveOtherReferences(J, Used))
2321       continue;
2322 
2323     // Delete the alias.
2324     M.eraseAlias(&J);
2325     ++NumAliasesRemoved;
2326     Changed = true;
2327   }
2328 
2329   Used.syncVariablesAndSets();
2330 
2331   return Changed;
2332 }
2333 
2334 static Function *
2335 FindAtExitLibFunc(Module &M,
2336                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2337                   LibFunc Func) {
2338   // Hack to get a default TLI before we have actual Function.
2339   auto FuncIter = M.begin();
2340   if (FuncIter == M.end())
2341     return nullptr;
2342   auto *TLI = &GetTLI(*FuncIter);
2343 
2344   if (!TLI->has(Func))
2345     return nullptr;
2346 
2347   Function *Fn = M.getFunction(TLI->getName(Func));
2348   if (!Fn)
2349     return nullptr;
2350 
2351   // Now get the actual TLI for Fn.
2352   TLI = &GetTLI(*Fn);
2353 
2354   // Make sure that the function has the correct prototype.
2355   LibFunc F;
2356   if (!TLI->getLibFunc(*Fn, F) || F != Func)
2357     return nullptr;
2358 
2359   return Fn;
2360 }
2361 
2362 /// Returns whether the given function is an empty C++ destructor or atexit
2363 /// handler and can therefore be eliminated. Note that we assume that other
2364 /// optimization passes have already simplified the code so we simply check for
2365 /// 'ret'.
2366 static bool IsEmptyAtExitFunction(const Function &Fn) {
2367   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2368   // nounwind, but that doesn't seem worth doing.
2369   if (Fn.isDeclaration())
2370     return false;
2371 
2372   for (const auto &I : Fn.getEntryBlock()) {
2373     if (I.isDebugOrPseudoInst())
2374       continue;
2375     if (isa<ReturnInst>(I))
2376       return true;
2377     break;
2378   }
2379   return false;
2380 }
2381 
2382 static bool OptimizeEmptyGlobalAtExitDtors(Function *CXAAtExitFn, bool isCXX) {
2383   /// Itanium C++ ABI p3.3.5:
2384   ///
2385   ///   After constructing a global (or local static) object, that will require
2386   ///   destruction on exit, a termination function is registered as follows:
2387   ///
2388   ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2389   ///
2390   ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2391   ///   call f(p) when DSO d is unloaded, before all such termination calls
2392   ///   registered before this one. It returns zero if registration is
2393   ///   successful, nonzero on failure.
2394 
2395   // This pass will look for calls to __cxa_atexit or atexit where the function
2396   // is trivial and remove them.
2397   bool Changed = false;
2398 
2399   for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
2400     // We're only interested in calls. Theoretically, we could handle invoke
2401     // instructions as well, but neither llvm-gcc nor clang generate invokes
2402     // to __cxa_atexit.
2403     CallInst *CI = dyn_cast<CallInst>(U);
2404     if (!CI)
2405       continue;
2406 
2407     Function *DtorFn =
2408       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2409     if (!DtorFn || !IsEmptyAtExitFunction(*DtorFn))
2410       continue;
2411 
2412     // Just remove the call.
2413     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2414     CI->eraseFromParent();
2415 
2416     if (isCXX)
2417       ++NumCXXDtorsRemoved;
2418     else
2419       ++NumAtExitRemoved;
2420 
2421     Changed |= true;
2422   }
2423 
2424   return Changed;
2425 }
2426 
2427 static Function *hasSideeffectFreeStaticResolution(GlobalIFunc &IF) {
2428   if (IF.isInterposable())
2429     return nullptr;
2430 
2431   Function *Resolver = IF.getResolverFunction();
2432   if (!Resolver)
2433     return nullptr;
2434 
2435   if (Resolver->isInterposable())
2436     return nullptr;
2437 
2438   // Only handle functions that have been optimized into a single basic block.
2439   auto It = Resolver->begin();
2440   if (++It != Resolver->end())
2441     return nullptr;
2442 
2443   BasicBlock &BB = Resolver->getEntryBlock();
2444 
2445   if (any_of(BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
2446     return nullptr;
2447 
2448   auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
2449   if (!Ret)
2450     return nullptr;
2451 
2452   return dyn_cast<Function>(Ret->getReturnValue());
2453 }
2454 
2455 /// Find IFuncs that have resolvers that always point at the same statically
2456 /// known callee, and replace their callers with a direct call.
2457 static bool OptimizeStaticIFuncs(Module &M) {
2458   bool Changed = false;
2459   for (GlobalIFunc &IF : M.ifuncs())
2460     if (Function *Callee = hasSideeffectFreeStaticResolution(IF))
2461       if (!IF.use_empty() &&
2462           (!Callee->isDeclaration() ||
2463            none_of(IF.users(), [](User *U) { return isa<GlobalAlias>(U); }))) {
2464         IF.replaceAllUsesWith(Callee);
2465         NumIFuncsResolved++;
2466         Changed = true;
2467       }
2468   return Changed;
2469 }
2470 
2471 static bool
2472 DeleteDeadIFuncs(Module &M,
2473                  SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2474   bool Changed = false;
2475   for (GlobalIFunc &IF : make_early_inc_range(M.ifuncs()))
2476     if (deleteIfDead(IF, NotDiscardableComdats)) {
2477       NumIFuncsDeleted++;
2478       Changed = true;
2479     }
2480   return Changed;
2481 }
2482 
2483 static bool
2484 optimizeGlobalsInModule(Module &M, const DataLayout &DL,
2485                         function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2486                         function_ref<TargetTransformInfo &(Function &)> GetTTI,
2487                         function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2488                         function_ref<DominatorTree &(Function &)> LookupDomTree,
2489                         function_ref<void(Function &F)> ChangedCFGCallback,
2490                         function_ref<void(Function &F)> DeleteFnCallback) {
2491   SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2492   bool Changed = false;
2493   bool LocalChange = true;
2494   std::optional<uint32_t> FirstNotFullyEvaluatedPriority;
2495 
2496   while (LocalChange) {
2497     LocalChange = false;
2498 
2499     NotDiscardableComdats.clear();
2500     for (const GlobalVariable &GV : M.globals())
2501       if (const Comdat *C = GV.getComdat())
2502         if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2503           NotDiscardableComdats.insert(C);
2504     for (Function &F : M)
2505       if (const Comdat *C = F.getComdat())
2506         if (!F.isDefTriviallyDead())
2507           NotDiscardableComdats.insert(C);
2508     for (GlobalAlias &GA : M.aliases())
2509       if (const Comdat *C = GA.getComdat())
2510         if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2511           NotDiscardableComdats.insert(C);
2512 
2513     // Delete functions that are trivially dead, ccc -> fastcc
2514     LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2515                                      NotDiscardableComdats, ChangedCFGCallback,
2516                                      DeleteFnCallback);
2517 
2518     // Optimize global_ctors list.
2519     LocalChange |=
2520         optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
2521           if (FirstNotFullyEvaluatedPriority &&
2522               *FirstNotFullyEvaluatedPriority != Priority)
2523             return false;
2524           bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2525           if (!Evaluated)
2526             FirstNotFullyEvaluatedPriority = Priority;
2527           return Evaluated;
2528         });
2529 
2530     // Optimize non-address-taken globals.
2531     LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
2532                                       NotDiscardableComdats);
2533 
2534     // Resolve aliases, when possible.
2535     LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2536 
2537     // Try to remove trivial global destructors if they are not removed
2538     // already.
2539     if (Function *CXAAtExitFn =
2540             FindAtExitLibFunc(M, GetTLI, LibFunc_cxa_atexit))
2541       LocalChange |= OptimizeEmptyGlobalAtExitDtors(CXAAtExitFn, true);
2542 
2543     if (Function *AtExitFn = FindAtExitLibFunc(M, GetTLI, LibFunc_atexit))
2544       LocalChange |= OptimizeEmptyGlobalAtExitDtors(AtExitFn, false);
2545 
2546     // Optimize IFuncs whose callee's are statically known.
2547     LocalChange |= OptimizeStaticIFuncs(M);
2548 
2549     // Remove any IFuncs that are now dead.
2550     LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats);
2551 
2552     Changed |= LocalChange;
2553   }
2554 
2555   // TODO: Move all global ctors functions to the end of the module for code
2556   // layout.
2557 
2558   return Changed;
2559 }
2560 
2561 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2562     auto &DL = M.getDataLayout();
2563     auto &FAM =
2564         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2565     auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2566       return FAM.getResult<DominatorTreeAnalysis>(F);
2567     };
2568     auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2569       return FAM.getResult<TargetLibraryAnalysis>(F);
2570     };
2571     auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2572       return FAM.getResult<TargetIRAnalysis>(F);
2573     };
2574 
2575     auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2576       return FAM.getResult<BlockFrequencyAnalysis>(F);
2577     };
2578     auto ChangedCFGCallback = [&FAM](Function &F) {
2579       FAM.invalidate(F, PreservedAnalyses::none());
2580     };
2581     auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };
2582 
2583     if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
2584                                  ChangedCFGCallback, DeleteFnCallback))
2585       return PreservedAnalyses::all();
2586 
2587     PreservedAnalyses PA = PreservedAnalyses::none();
2588     // We made sure to clear analyses for deleted functions.
2589     PA.preserve<FunctionAnalysisManagerModuleProxy>();
2590     // The only place we modify the CFG is when calling
2591     // removeUnreachableBlocks(), but there we make sure to invalidate analyses
2592     // for modified functions.
2593     PA.preserveSet<CFGAnalyses>();
2594     return PA;
2595 }
2596