xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/IPO/GlobalOpt.cpp (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass transforms simple global variables that never have their address
10 // taken.  If obviously true, it marks read/write globals as constant, deletes
11 // variables only stored to, etc.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/IPO/GlobalOpt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallPtrSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/ADT/iterator_range.h"
23 #include "llvm/Analysis/BlockFrequencyInfo.h"
24 #include "llvm/Analysis/ConstantFolding.h"
25 #include "llvm/Analysis/MemoryBuiltins.h"
26 #include "llvm/Analysis/TargetLibraryInfo.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DebugInfoMetadata.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/GetElementPtrTypeIterator.h"
41 #include "llvm/IR/GlobalAlias.h"
42 #include "llvm/IR/GlobalValue.h"
43 #include "llvm/IR/GlobalVariable.h"
44 #include "llvm/IR/IRBuilder.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/Use.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/InitializePasses.h"
57 #include "llvm/Pass.h"
58 #include "llvm/Support/AtomicOrdering.h"
59 #include "llvm/Support/Casting.h"
60 #include "llvm/Support/CommandLine.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Transforms/IPO.h"
66 #include "llvm/Transforms/Utils/CtorUtils.h"
67 #include "llvm/Transforms/Utils/Evaluator.h"
68 #include "llvm/Transforms/Utils/GlobalStatus.h"
69 #include "llvm/Transforms/Utils/Local.h"
70 #include <cassert>
71 #include <cstdint>
72 #include <utility>
73 #include <vector>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "globalopt"
78 
79 STATISTIC(NumMarked    , "Number of globals marked constant");
80 STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
81 STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
82 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
83 STATISTIC(NumDeleted   , "Number of globals deleted");
84 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
85 STATISTIC(NumLocalized , "Number of globals localized");
86 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
87 STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
88 STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
89 STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
90 STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
91 STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
92 STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
93 STATISTIC(NumInternalFunc, "Number of internal functions");
94 STATISTIC(NumColdCC, "Number of functions marked coldcc");
95 
96 static cl::opt<bool>
97     EnableColdCCStressTest("enable-coldcc-stress-test",
98                            cl::desc("Enable stress test of coldcc by adding "
99                                     "calling conv to all internal functions."),
100                            cl::init(false), cl::Hidden);
101 
102 static cl::opt<int> ColdCCRelFreq(
103     "coldcc-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
104     cl::desc(
105         "Maximum block frequency, expressed as a percentage of caller's "
106         "entry frequency, for a call site to be considered cold for enabling"
107         "coldcc"));
108 
109 /// Is this global variable possibly used by a leak checker as a root?  If so,
110 /// we might not really want to eliminate the stores to it.
111 static bool isLeakCheckerRoot(GlobalVariable *GV) {
112   // A global variable is a root if it is a pointer, or could plausibly contain
113   // a pointer.  There are two challenges; one is that we could have a struct
114   // the has an inner member which is a pointer.  We recurse through the type to
115   // detect these (up to a point).  The other is that we may actually be a union
116   // of a pointer and another type, and so our LLVM type is an integer which
117   // gets converted into a pointer, or our type is an [i8 x #] with a pointer
118   // potentially contained here.
119 
120   if (GV->hasPrivateLinkage())
121     return false;
122 
123   SmallVector<Type *, 4> Types;
124   Types.push_back(GV->getValueType());
125 
126   unsigned Limit = 20;
127   do {
128     Type *Ty = Types.pop_back_val();
129     switch (Ty->getTypeID()) {
130       default: break;
131       case Type::PointerTyID:
132         return true;
133       case Type::FixedVectorTyID:
134       case Type::ScalableVectorTyID:
135         if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
136           return true;
137         break;
138       case Type::ArrayTyID:
139         Types.push_back(cast<ArrayType>(Ty)->getElementType());
140         break;
141       case Type::StructTyID: {
142         StructType *STy = cast<StructType>(Ty);
143         if (STy->isOpaque()) return true;
144         for (StructType::element_iterator I = STy->element_begin(),
145                  E = STy->element_end(); I != E; ++I) {
146           Type *InnerTy = *I;
147           if (isa<PointerType>(InnerTy)) return true;
148           if (isa<StructType>(InnerTy) || isa<ArrayType>(InnerTy) ||
149               isa<VectorType>(InnerTy))
150             Types.push_back(InnerTy);
151         }
152         break;
153       }
154     }
155     if (--Limit == 0) return true;
156   } while (!Types.empty());
157   return false;
158 }
159 
160 /// Given a value that is stored to a global but never read, determine whether
161 /// it's safe to remove the store and the chain of computation that feeds the
162 /// store.
163 static bool IsSafeComputationToRemove(
164     Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
165   do {
166     if (isa<Constant>(V))
167       return true;
168     if (!V->hasOneUse())
169       return false;
170     if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
171         isa<GlobalValue>(V))
172       return false;
173     if (isAllocationFn(V, GetTLI))
174       return true;
175 
176     Instruction *I = cast<Instruction>(V);
177     if (I->mayHaveSideEffects())
178       return false;
179     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
180       if (!GEP->hasAllConstantIndices())
181         return false;
182     } else if (I->getNumOperands() != 1) {
183       return false;
184     }
185 
186     V = I->getOperand(0);
187   } while (true);
188 }
189 
190 /// This GV is a pointer root.  Loop over all users of the global and clean up
191 /// any that obviously don't assign the global a value that isn't dynamically
192 /// allocated.
193 static bool
194 CleanupPointerRootUsers(GlobalVariable *GV,
195                         function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
196   // A brief explanation of leak checkers.  The goal is to find bugs where
197   // pointers are forgotten, causing an accumulating growth in memory
198   // usage over time.  The common strategy for leak checkers is to explicitly
199   // allow the memory pointed to by globals at exit.  This is popular because it
200   // also solves another problem where the main thread of a C++ program may shut
201   // down before other threads that are still expecting to use those globals. To
202   // handle that case, we expect the program may create a singleton and never
203   // destroy it.
204 
205   bool Changed = false;
206 
207   // If Dead[n].first is the only use of a malloc result, we can delete its
208   // chain of computation and the store to the global in Dead[n].second.
209   SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
210 
211   // Constants can't be pointers to dynamically allocated memory.
212   for (User *U : llvm::make_early_inc_range(GV->users())) {
213     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
214       Value *V = SI->getValueOperand();
215       if (isa<Constant>(V)) {
216         Changed = true;
217         SI->eraseFromParent();
218       } else if (Instruction *I = dyn_cast<Instruction>(V)) {
219         if (I->hasOneUse())
220           Dead.push_back(std::make_pair(I, SI));
221       }
222     } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
223       if (isa<Constant>(MSI->getValue())) {
224         Changed = true;
225         MSI->eraseFromParent();
226       } else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
227         if (I->hasOneUse())
228           Dead.push_back(std::make_pair(I, MSI));
229       }
230     } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
231       GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
232       if (MemSrc && MemSrc->isConstant()) {
233         Changed = true;
234         MTI->eraseFromParent();
235       } else if (Instruction *I = dyn_cast<Instruction>(MemSrc)) {
236         if (I->hasOneUse())
237           Dead.push_back(std::make_pair(I, MTI));
238       }
239     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
240       if (CE->use_empty()) {
241         CE->destroyConstant();
242         Changed = true;
243       }
244     } else if (Constant *C = dyn_cast<Constant>(U)) {
245       if (isSafeToDestroyConstant(C)) {
246         C->destroyConstant();
247         // This could have invalidated UI, start over from scratch.
248         Dead.clear();
249         CleanupPointerRootUsers(GV, GetTLI);
250         return true;
251       }
252     }
253   }
254 
255   for (int i = 0, e = Dead.size(); i != e; ++i) {
256     if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
257       Dead[i].second->eraseFromParent();
258       Instruction *I = Dead[i].first;
259       do {
260         if (isAllocationFn(I, GetTLI))
261           break;
262         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
263         if (!J)
264           break;
265         I->eraseFromParent();
266         I = J;
267       } while (true);
268       I->eraseFromParent();
269       Changed = true;
270     }
271   }
272 
273   return Changed;
274 }
275 
276 /// We just marked GV constant.  Loop over all users of the global, cleaning up
277 /// the obvious ones.  This is largely just a quick scan over the use list to
278 /// clean up the easy and obvious cruft.  This returns true if it made a change.
279 static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
280                                        const DataLayout &DL) {
281   Constant *Init = GV->getInitializer();
282   SmallVector<User *, 8> WorkList(GV->users());
283   SmallPtrSet<User *, 8> Visited;
284   bool Changed = false;
285 
286   SmallVector<WeakTrackingVH> MaybeDeadInsts;
287   auto EraseFromParent = [&](Instruction *I) {
288     for (Value *Op : I->operands())
289       if (auto *OpI = dyn_cast<Instruction>(Op))
290         MaybeDeadInsts.push_back(OpI);
291     I->eraseFromParent();
292     Changed = true;
293   };
294   while (!WorkList.empty()) {
295     User *U = WorkList.pop_back_val();
296     if (!Visited.insert(U).second)
297       continue;
298 
299     if (auto *BO = dyn_cast<BitCastOperator>(U))
300       append_range(WorkList, BO->users());
301     if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
302       append_range(WorkList, ASC->users());
303     else if (auto *GEP = dyn_cast<GEPOperator>(U))
304       append_range(WorkList, GEP->users());
305     else if (auto *LI = dyn_cast<LoadInst>(U)) {
306       // A load from a uniform value is always the same, regardless of any
307       // applied offset.
308       Type *Ty = LI->getType();
309       if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty)) {
310         LI->replaceAllUsesWith(Res);
311         EraseFromParent(LI);
312         continue;
313       }
314 
315       Value *PtrOp = LI->getPointerOperand();
316       APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
317       PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
318           DL, Offset, /* AllowNonInbounds */ true);
319       if (PtrOp == GV) {
320         if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
321           LI->replaceAllUsesWith(Value);
322           EraseFromParent(LI);
323         }
324       }
325     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
326       // Store must be unreachable or storing Init into the global.
327       EraseFromParent(SI);
328     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
329       if (getUnderlyingObject(MI->getRawDest()) == GV)
330         EraseFromParent(MI);
331     }
332   }
333 
334   Changed |=
335       RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
336   GV->removeDeadConstantUsers();
337   return Changed;
338 }
339 
340 /// Look at all uses of the global and determine which (offset, type) pairs it
341 /// can be split into.
342 static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
343                             const DataLayout &DL) {
344   SmallVector<Use *, 16> Worklist;
345   SmallPtrSet<Use *, 16> Visited;
346   auto AppendUses = [&](Value *V) {
347     for (Use &U : V->uses())
348       if (Visited.insert(&U).second)
349         Worklist.push_back(&U);
350   };
351   AppendUses(GV);
352   while (!Worklist.empty()) {
353     Use *U = Worklist.pop_back_val();
354     User *V = U->getUser();
355 
356     auto *GEP = dyn_cast<GEPOperator>(V);
357     if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
358         (GEP && GEP->hasAllConstantIndices())) {
359       AppendUses(V);
360       continue;
361     }
362 
363     if (Value *Ptr = getLoadStorePointerOperand(V)) {
364       // This is storing the global address into somewhere, not storing into
365       // the global.
366       if (isa<StoreInst>(V) && U->getOperandNo() == 0)
367         return false;
368 
369       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
370       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
371                                                    /* AllowNonInbounds */ true);
372       if (Ptr != GV || Offset.getActiveBits() >= 64)
373         return false;
374 
375       // TODO: We currently require that all accesses at a given offset must
376       // use the same type. This could be relaxed.
377       Type *Ty = getLoadStoreType(V);
378       auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
379       if (Ty != It->second)
380         return false;
381       continue;
382     }
383 
384     // Ignore dead constant users.
385     if (auto *C = dyn_cast<Constant>(V)) {
386       if (!isSafeToDestroyConstant(C))
387         return false;
388       continue;
389     }
390 
391     // Unknown user.
392     return false;
393   }
394 
395   return true;
396 }
397 
398 /// Copy over the debug info for a variable to its SRA replacements.
399 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
400                                  uint64_t FragmentOffsetInBits,
401                                  uint64_t FragmentSizeInBits,
402                                  uint64_t VarSize) {
403   SmallVector<DIGlobalVariableExpression *, 1> GVs;
404   GV->getDebugInfo(GVs);
405   for (auto *GVE : GVs) {
406     DIVariable *Var = GVE->getVariable();
407     DIExpression *Expr = GVE->getExpression();
408     // If the FragmentSize is smaller than the variable,
409     // emit a fragment expression.
410     if (FragmentSizeInBits < VarSize) {
411       if (auto E = DIExpression::createFragmentExpression(
412               Expr, FragmentOffsetInBits, FragmentSizeInBits))
413         Expr = *E;
414       else
415         return;
416     }
417     auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
418     NGV->addDebugInfo(NGVE);
419   }
420 }
421 
422 /// Perform scalar replacement of aggregates on the specified global variable.
423 /// This opens the door for other optimizations by exposing the behavior of the
424 /// program in a more fine-grained way.  We have determined that this
425 /// transformation is safe already.  We return the first global variable we
426 /// insert so that the caller can reprocess it.
427 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
428   assert(GV->hasLocalLinkage());
429 
430   // Collect types to split into.
431   DenseMap<uint64_t, Type *> Types;
432   if (!collectSRATypes(Types, GV, DL) || Types.empty())
433     return nullptr;
434 
435   // Make sure we don't SRA back to the same type.
436   if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
437     return nullptr;
438 
439   // Don't perform SRA if we would have to split into many globals.
440   if (Types.size() > 16)
441     return nullptr;
442 
443   // Sort by offset.
444   SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
445   append_range(TypesVector, Types);
446   sort(TypesVector,
447        [](const auto &A, const auto &B) { return A.first < B.first; });
448 
449   // Check that the types are non-overlapping.
450   uint64_t Offset = 0;
451   for (const auto &Pair : TypesVector) {
452     // Overlaps with previous type.
453     if (Pair.first < Offset)
454       return nullptr;
455 
456     Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
457   }
458 
459   // Some accesses go beyond the end of the global, don't bother.
460   if (Offset > DL.getTypeAllocSize(GV->getValueType()))
461     return nullptr;
462 
463   // Collect initializers for new globals.
464   Constant *OrigInit = GV->getInitializer();
465   DenseMap<uint64_t, Constant *> Initializers;
466   for (const auto &Pair : Types) {
467     Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
468                                                   APInt(64, Pair.first), DL);
469     if (!NewInit) {
470       LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
471                         << *GV << " with type " << *Pair.second << " at offset "
472                         << Pair.first << "\n");
473       return nullptr;
474     }
475     Initializers.insert({Pair.first, NewInit});
476   }
477 
478   LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
479 
480   // Get the alignment of the global, either explicit or target-specific.
481   Align StartAlignment =
482       DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
483   uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());
484 
485   // Create replacement globals.
486   DenseMap<uint64_t, GlobalVariable *> NewGlobals;
487   unsigned NameSuffix = 0;
488   for (auto &Pair : TypesVector) {
489     uint64_t Offset = Pair.first;
490     Type *Ty = Pair.second;
491     GlobalVariable *NGV = new GlobalVariable(
492         *GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
493         Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
494         GV->getThreadLocalMode(), GV->getAddressSpace());
495     NGV->copyAttributesFrom(GV);
496     NewGlobals.insert({Offset, NGV});
497 
498     // Calculate the known alignment of the field.  If the original aggregate
499     // had 256 byte alignment for example, something might depend on that:
500     // propagate info to each field.
501     Align NewAlign = commonAlignment(StartAlignment, Offset);
502     if (NewAlign > DL.getABITypeAlign(Ty))
503       NGV->setAlignment(NewAlign);
504 
505     // Copy over the debug info for the variable.
506     transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
507                          VarSize);
508   }
509 
510   // Replace uses of the original global with uses of the new global.
511   SmallVector<Value *, 16> Worklist;
512   SmallPtrSet<Value *, 16> Visited;
513   SmallVector<WeakTrackingVH, 16> DeadInsts;
514   auto AppendUsers = [&](Value *V) {
515     for (User *U : V->users())
516       if (Visited.insert(U).second)
517         Worklist.push_back(U);
518   };
519   AppendUsers(GV);
520   while (!Worklist.empty()) {
521     Value *V = Worklist.pop_back_val();
522     if (isa<BitCastOperator>(V) || isa<AddrSpaceCastOperator>(V) ||
523         isa<GEPOperator>(V)) {
524       AppendUsers(V);
525       if (isa<Instruction>(V))
526         DeadInsts.push_back(V);
527       continue;
528     }
529 
530     if (Value *Ptr = getLoadStorePointerOperand(V)) {
531       APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
532       Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
533                                                    /* AllowNonInbounds */ true);
534       assert(Ptr == GV && "Load/store must be from/to global");
535       GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
536       assert(NGV && "Must have replacement global for this offset");
537 
538       // Update the pointer operand and recalculate alignment.
539       Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
540       Align NewAlign =
541           getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));
542 
543       if (auto *LI = dyn_cast<LoadInst>(V)) {
544         LI->setOperand(0, NGV);
545         LI->setAlignment(NewAlign);
546       } else {
547         auto *SI = cast<StoreInst>(V);
548         SI->setOperand(1, NGV);
549         SI->setAlignment(NewAlign);
550       }
551       continue;
552     }
553 
554     assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
555            "Other users can only be dead constants");
556   }
557 
558   // Delete old instructions and global.
559   RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
560   GV->removeDeadConstantUsers();
561   GV->eraseFromParent();
562   ++NumSRA;
563 
564   assert(NewGlobals.size() > 0);
565   return NewGlobals.begin()->second;
566 }
567 
568 /// Return true if all users of the specified value will trap if the value is
569 /// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
570 /// reprocessing them.
571 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
572                                         SmallPtrSetImpl<const PHINode*> &PHIs) {
573   for (const User *U : V->users()) {
574     if (const Instruction *I = dyn_cast<Instruction>(U)) {
575       // If null pointer is considered valid, then all uses are non-trapping.
576       // Non address-space 0 globals have already been pruned by the caller.
577       if (NullPointerIsDefined(I->getFunction()))
578         return false;
579     }
580     if (isa<LoadInst>(U)) {
581       // Will trap.
582     } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
583       if (SI->getOperand(0) == V) {
584         //cerr << "NONTRAPPING USE: " << *U;
585         return false;  // Storing the value.
586       }
587     } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
588       if (CI->getCalledOperand() != V) {
589         //cerr << "NONTRAPPING USE: " << *U;
590         return false;  // Not calling the ptr
591       }
592     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
593       if (II->getCalledOperand() != V) {
594         //cerr << "NONTRAPPING USE: " << *U;
595         return false;  // Not calling the ptr
596       }
597     } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
598       if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
599     } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
600       if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
601     } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
602       // If we've already seen this phi node, ignore it, it has already been
603       // checked.
604       if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
605         return false;
606     } else if (isa<ICmpInst>(U) &&
607                !ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
608                isa<LoadInst>(U->getOperand(0)) &&
609                isa<ConstantPointerNull>(U->getOperand(1))) {
610       assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
611                                   ->getPointerOperand()
612                                   ->stripPointerCasts()) &&
613              "Should be GlobalVariable");
614       // This and only this kind of non-signed ICmpInst is to be replaced with
615       // the comparing of the value of the created global init bool later in
616       // optimizeGlobalAddressOfAllocation for the global variable.
617     } else {
618       //cerr << "NONTRAPPING USE: " << *U;
619       return false;
620     }
621   }
622   return true;
623 }
624 
625 /// Return true if all uses of any loads from GV will trap if the loaded value
626 /// is null.  Note that this also permits comparisons of the loaded value
627 /// against null, as a special case.
628 static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
629   SmallVector<const Value *, 4> Worklist;
630   Worklist.push_back(GV);
631   while (!Worklist.empty()) {
632     const Value *P = Worklist.pop_back_val();
633     for (auto *U : P->users()) {
634       if (auto *LI = dyn_cast<LoadInst>(U)) {
635         SmallPtrSet<const PHINode *, 8> PHIs;
636         if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
637           return false;
638       } else if (auto *SI = dyn_cast<StoreInst>(U)) {
639         // Ignore stores to the global.
640         if (SI->getPointerOperand() != P)
641           return false;
642       } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
643         if (CE->stripPointerCasts() != GV)
644           return false;
645         // Check further the ConstantExpr.
646         Worklist.push_back(CE);
647       } else {
648         // We don't know or understand this user, bail out.
649         return false;
650       }
651     }
652   }
653 
654   return true;
655 }
656 
657 /// Get all the loads/store uses for global variable \p GV.
658 static void allUsesOfLoadAndStores(GlobalVariable *GV,
659                                    SmallVector<Value *, 4> &Uses) {
660   SmallVector<Value *, 4> Worklist;
661   Worklist.push_back(GV);
662   while (!Worklist.empty()) {
663     auto *P = Worklist.pop_back_val();
664     for (auto *U : P->users()) {
665       if (auto *CE = dyn_cast<ConstantExpr>(U)) {
666         Worklist.push_back(CE);
667         continue;
668       }
669 
670       assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
671              "Expect only load or store instructions");
672       Uses.push_back(U);
673     }
674   }
675 }
676 
677 static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
678   bool Changed = false;
679   for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
680     Instruction *I = cast<Instruction>(*UI++);
681     // Uses are non-trapping if null pointer is considered valid.
682     // Non address-space 0 globals are already pruned by the caller.
683     if (NullPointerIsDefined(I->getFunction()))
684       return false;
685     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
686       LI->setOperand(0, NewV);
687       Changed = true;
688     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
689       if (SI->getOperand(1) == V) {
690         SI->setOperand(1, NewV);
691         Changed = true;
692       }
693     } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
694       CallBase *CB = cast<CallBase>(I);
695       if (CB->getCalledOperand() == V) {
696         // Calling through the pointer!  Turn into a direct call, but be careful
697         // that the pointer is not also being passed as an argument.
698         CB->setCalledOperand(NewV);
699         Changed = true;
700         bool PassedAsArg = false;
701         for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
702           if (CB->getArgOperand(i) == V) {
703             PassedAsArg = true;
704             CB->setArgOperand(i, NewV);
705           }
706 
707         if (PassedAsArg) {
708           // Being passed as an argument also.  Be careful to not invalidate UI!
709           UI = V->user_begin();
710         }
711       }
712     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
713       Changed |= OptimizeAwayTrappingUsesOfValue(CI,
714                                 ConstantExpr::getCast(CI->getOpcode(),
715                                                       NewV, CI->getType()));
716       if (CI->use_empty()) {
717         Changed = true;
718         CI->eraseFromParent();
719       }
720     } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
721       // Should handle GEP here.
722       SmallVector<Constant*, 8> Idxs;
723       Idxs.reserve(GEPI->getNumOperands()-1);
724       for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
725            i != e; ++i)
726         if (Constant *C = dyn_cast<Constant>(*i))
727           Idxs.push_back(C);
728         else
729           break;
730       if (Idxs.size() == GEPI->getNumOperands()-1)
731         Changed |= OptimizeAwayTrappingUsesOfValue(
732             GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
733                                                  NewV, Idxs));
734       if (GEPI->use_empty()) {
735         Changed = true;
736         GEPI->eraseFromParent();
737       }
738     }
739   }
740 
741   return Changed;
742 }
743 
744 /// The specified global has only one non-null value stored into it.  If there
745 /// are uses of the loaded value that would trap if the loaded value is
746 /// dynamically null, then we know that they cannot be reachable with a null
747 /// optimize away the load.
748 static bool OptimizeAwayTrappingUsesOfLoads(
749     GlobalVariable *GV, Constant *LV, const DataLayout &DL,
750     function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
751   bool Changed = false;
752 
753   // Keep track of whether we are able to remove all the uses of the global
754   // other than the store that defines it.
755   bool AllNonStoreUsesGone = true;
756 
757   // Replace all uses of loads with uses of uses of the stored value.
758   for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
759     if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
760       Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
761       // If we were able to delete all uses of the loads
762       if (LI->use_empty()) {
763         LI->eraseFromParent();
764         Changed = true;
765       } else {
766         AllNonStoreUsesGone = false;
767       }
768     } else if (isa<StoreInst>(GlobalUser)) {
769       // Ignore the store that stores "LV" to the global.
770       assert(GlobalUser->getOperand(1) == GV &&
771              "Must be storing *to* the global");
772     } else {
773       AllNonStoreUsesGone = false;
774 
775       // If we get here we could have other crazy uses that are transitively
776       // loaded.
777       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
778               isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
779               isa<BitCastInst>(GlobalUser) ||
780               isa<GetElementPtrInst>(GlobalUser)) &&
781              "Only expect load and stores!");
782     }
783   }
784 
785   if (Changed) {
786     LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
787                       << "\n");
788     ++NumGlobUses;
789   }
790 
791   // If we nuked all of the loads, then none of the stores are needed either,
792   // nor is the global.
793   if (AllNonStoreUsesGone) {
794     if (isLeakCheckerRoot(GV)) {
795       Changed |= CleanupPointerRootUsers(GV, GetTLI);
796     } else {
797       Changed = true;
798       CleanupConstantGlobalUsers(GV, DL);
799     }
800     if (GV->use_empty()) {
801       LLVM_DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
802       Changed = true;
803       GV->eraseFromParent();
804       ++NumDeleted;
805     }
806   }
807   return Changed;
808 }
809 
810 /// Walk the use list of V, constant folding all of the instructions that are
811 /// foldable.
812 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
813                                 TargetLibraryInfo *TLI) {
814   for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
815     if (Instruction *I = dyn_cast<Instruction>(*UI++))
816       if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
817         I->replaceAllUsesWith(NewC);
818 
819         // Advance UI to the next non-I use to avoid invalidating it!
820         // Instructions could multiply use V.
821         while (UI != E && *UI == I)
822           ++UI;
823         if (isInstructionTriviallyDead(I, TLI))
824           I->eraseFromParent();
825       }
826 }
827 
828 /// This function takes the specified global variable, and transforms the
829 /// program as if it always contained the result of the specified malloc.
830 /// Because it is always the result of the specified malloc, there is no reason
831 /// to actually DO the malloc.  Instead, turn the malloc into a global, and any
832 /// loads of GV as uses of the new global.
833 static GlobalVariable *
834 OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
835                                   uint64_t AllocSize, Constant *InitVal,
836                                   const DataLayout &DL,
837                                   TargetLibraryInfo *TLI) {
838   LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI
839                     << '\n');
840 
841   // Create global of type [AllocSize x i8].
842   Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
843                                     AllocSize);
844 
845   // Create the new global variable.  The contents of the allocated memory is
846   // undefined initially, so initialize with an undef value.
847   GlobalVariable *NewGV = new GlobalVariable(
848       *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
849       UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
850       GV->getThreadLocalMode());
851 
852   // Initialize the global at the point of the original call.  Note that this
853   // is a different point from the initialization referred to below for the
854   // nullability handling.  Sublety: We have not proven the original global was
855   // only initialized once.  As such, we can not fold this into the initializer
856   // of the new global as may need to re-init the storage multiple times.
857   if (!isa<UndefValue>(InitVal)) {
858     IRBuilder<> Builder(CI->getNextNode());
859     // TODO: Use alignment above if align!=1
860     Builder.CreateMemSet(NewGV, InitVal, AllocSize, None);
861   }
862 
863   // Update users of the allocation to use the new global instead.
864   BitCastInst *TheBC = nullptr;
865   while (!CI->use_empty()) {
866     Instruction *User = cast<Instruction>(CI->user_back());
867     if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
868       if (BCI->getType() == NewGV->getType()) {
869         BCI->replaceAllUsesWith(NewGV);
870         BCI->eraseFromParent();
871       } else {
872         BCI->setOperand(0, NewGV);
873       }
874     } else {
875       if (!TheBC)
876         TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
877       User->replaceUsesOfWith(CI, TheBC);
878     }
879   }
880 
881   SmallPtrSet<Constant *, 1> RepValues;
882   RepValues.insert(NewGV);
883 
884   // If there is a comparison against null, we will insert a global bool to
885   // keep track of whether the global was initialized yet or not.
886   GlobalVariable *InitBool =
887     new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
888                        GlobalValue::InternalLinkage,
889                        ConstantInt::getFalse(GV->getContext()),
890                        GV->getName()+".init", GV->getThreadLocalMode());
891   bool InitBoolUsed = false;
892 
893   // Loop over all instruction uses of GV, processing them in turn.
894   SmallVector<Value *, 4> Guses;
895   allUsesOfLoadAndStores(GV, Guses);
896   for (auto *U : Guses) {
897     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
898       // The global is initialized when the store to it occurs. If the stored
899       // value is null value, the global bool is set to false, otherwise true.
900       new StoreInst(ConstantInt::getBool(
901                         GV->getContext(),
902                         !isa<ConstantPointerNull>(SI->getValueOperand())),
903                     InitBool, false, Align(1), SI->getOrdering(),
904                     SI->getSyncScopeID(), SI);
905       SI->eraseFromParent();
906       continue;
907     }
908 
909     LoadInst *LI = cast<LoadInst>(U);
910     while (!LI->use_empty()) {
911       Use &LoadUse = *LI->use_begin();
912       ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
913       if (!ICI) {
914         auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
915         RepValues.insert(CE);
916         LoadUse.set(CE);
917         continue;
918       }
919 
920       // Replace the cmp X, 0 with a use of the bool value.
921       Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
922                                InitBool->getName() + ".val", false, Align(1),
923                                LI->getOrdering(), LI->getSyncScopeID(), LI);
924       InitBoolUsed = true;
925       switch (ICI->getPredicate()) {
926       default: llvm_unreachable("Unknown ICmp Predicate!");
927       case ICmpInst::ICMP_ULT: // X < null -> always false
928         LV = ConstantInt::getFalse(GV->getContext());
929         break;
930       case ICmpInst::ICMP_UGE: // X >= null -> always true
931         LV = ConstantInt::getTrue(GV->getContext());
932         break;
933       case ICmpInst::ICMP_ULE:
934       case ICmpInst::ICMP_EQ:
935         LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
936         break;
937       case ICmpInst::ICMP_NE:
938       case ICmpInst::ICMP_UGT:
939         break;  // no change.
940       }
941       ICI->replaceAllUsesWith(LV);
942       ICI->eraseFromParent();
943     }
944     LI->eraseFromParent();
945   }
946 
947   // If the initialization boolean was used, insert it, otherwise delete it.
948   if (!InitBoolUsed) {
949     while (!InitBool->use_empty())  // Delete initializations
950       cast<StoreInst>(InitBool->user_back())->eraseFromParent();
951     delete InitBool;
952   } else
953     GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
954 
955   // Now the GV is dead, nuke it and the allocation..
956   GV->eraseFromParent();
957   CI->eraseFromParent();
958 
959   // To further other optimizations, loop over all users of NewGV and try to
960   // constant prop them.  This will promote GEP instructions with constant
961   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
962   for (auto *CE : RepValues)
963     ConstantPropUsersOf(CE, DL, TLI);
964 
965   return NewGV;
966 }
967 
968 /// Scan the use-list of GV checking to make sure that there are no complex uses
969 /// of GV.  We permit simple things like dereferencing the pointer, but not
970 /// storing through the address, unless it is to the specified global.
971 static bool
972 valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
973                                           const GlobalVariable *GV) {
974   SmallPtrSet<const Value *, 4> Visited;
975   SmallVector<const Value *, 4> Worklist;
976   Worklist.push_back(CI);
977 
978   while (!Worklist.empty()) {
979     const Value *V = Worklist.pop_back_val();
980     if (!Visited.insert(V).second)
981       continue;
982 
983     for (const Use &VUse : V->uses()) {
984       const User *U = VUse.getUser();
985       if (isa<LoadInst>(U) || isa<CmpInst>(U))
986         continue; // Fine, ignore.
987 
988       if (auto *SI = dyn_cast<StoreInst>(U)) {
989         if (SI->getValueOperand() == V &&
990             SI->getPointerOperand()->stripPointerCasts() != GV)
991           return false; // Storing the pointer not into GV... bad.
992         continue; // Otherwise, storing through it, or storing into GV... fine.
993       }
994 
995       if (auto *BCI = dyn_cast<BitCastInst>(U)) {
996         Worklist.push_back(BCI);
997         continue;
998       }
999 
1000       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
1001         Worklist.push_back(GEPI);
1002         continue;
1003       }
1004 
1005       return false;
1006     }
1007   }
1008 
1009   return true;
1010 }
1011 
1012 /// If we have a global that is only initialized with a fixed size allocation
1013 /// try to transform the program to use global memory instead of heap
1014 /// allocated memory. This eliminates dynamic allocation, avoids an indirection
1015 /// accessing the data, and exposes the resultant global to further GlobalOpt.
1016 static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
1017                                                    CallInst *CI,
1018                                                    AtomicOrdering Ordering,
1019                                                    const DataLayout &DL,
1020                                                    TargetLibraryInfo *TLI) {
1021   if (!isAllocRemovable(CI, TLI))
1022     // Must be able to remove the call when we get done..
1023     return false;
1024 
1025   Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
1026   Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
1027   if (!InitVal)
1028     // Must be able to emit a memset for initialization
1029     return false;
1030 
1031   uint64_t AllocSize;
1032   if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
1033     return false;
1034 
1035   // Restrict this transformation to only working on small allocations
1036   // (2048 bytes currently), as we don't want to introduce a 16M global or
1037   // something.
1038   if (AllocSize >= 2048)
1039     return false;
1040 
1041   // We can't optimize this global unless all uses of it are *known* to be
1042   // of the malloc value, not of the null initializer value (consider a use
1043   // that compares the global's value against zero to see if the malloc has
1044   // been reached).  To do this, we check to see if all uses of the global
1045   // would trap if the global were null: this proves that they must all
1046   // happen after the malloc.
1047   if (!allUsesOfLoadedValueWillTrapIfNull(GV))
1048     return false;
1049 
1050   // We can't optimize this if the malloc itself is used in a complex way,
1051   // for example, being stored into multiple globals.  This allows the
1052   // malloc to be stored into the specified global, loaded, gep, icmp'd.
1053   // These are all things we could transform to using the global for.
1054   if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
1055     return false;
1056 
1057   OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
1058   return true;
1059 }
1060 
1061 // Try to optimize globals based on the knowledge that only one value (besides
1062 // its initializer) is ever stored to the global.
1063 static bool
1064 optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1065                          AtomicOrdering Ordering, const DataLayout &DL,
1066                          function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
1067   // Ignore no-op GEPs and bitcasts.
1068   StoredOnceVal = StoredOnceVal->stripPointerCasts();
1069 
1070   // If we are dealing with a pointer global that is initialized to null and
1071   // only has one (non-null) value stored into it, then we can optimize any
1072   // users of the loaded value (often calls and loads) that would trap if the
1073   // value was null.
1074   if (GV->getInitializer()->getType()->isPointerTy() &&
1075       GV->getInitializer()->isNullValue() &&
1076       StoredOnceVal->getType()->isPointerTy() &&
1077       !NullPointerIsDefined(
1078           nullptr /* F */,
1079           GV->getInitializer()->getType()->getPointerAddressSpace())) {
1080     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
1081       if (GV->getInitializer()->getType() != SOVC->getType())
1082         SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1083 
1084       // Optimize away any trapping uses of the loaded value.
1085       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
1086         return true;
1087     } else if (isAllocationFn(StoredOnceVal, GetTLI)) {
1088       if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
1089         auto *TLI = &GetTLI(*CI->getFunction());
1090         if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, Ordering, DL, TLI))
1091           return true;
1092       }
1093     }
1094   }
1095 
1096   return false;
1097 }
1098 
1099 /// At this point, we have learned that the only two values ever stored into GV
1100 /// are its initializer and OtherVal.  See if we can shrink the global into a
1101 /// boolean and select between the two values whenever it is used.  This exposes
1102 /// the values to other scalar optimizations.
1103 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1104   Type *GVElType = GV->getValueType();
1105 
1106   // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1107   // an FP value, pointer or vector, don't do this optimization because a select
1108   // between them is very expensive and unlikely to lead to later
1109   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1110   // where v1 and v2 both require constant pool loads, a big loss.
1111   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1112       GVElType->isFloatingPointTy() ||
1113       GVElType->isPointerTy() || GVElType->isVectorTy())
1114     return false;
1115 
1116   // Walk the use list of the global seeing if all the uses are load or store.
1117   // If there is anything else, bail out.
1118   for (User *U : GV->users()) {
1119     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
1120       return false;
1121     if (getLoadStoreType(U) != GVElType)
1122       return false;
1123   }
1124 
1125   LLVM_DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
1126 
1127   // Create the new global, initializing it to false.
1128   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1129                                              false,
1130                                              GlobalValue::InternalLinkage,
1131                                         ConstantInt::getFalse(GV->getContext()),
1132                                              GV->getName()+".b",
1133                                              GV->getThreadLocalMode(),
1134                                              GV->getType()->getAddressSpace());
1135   NewGV->copyAttributesFrom(GV);
1136   GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1137 
1138   Constant *InitVal = GV->getInitializer();
1139   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1140          "No reason to shrink to bool!");
1141 
1142   SmallVector<DIGlobalVariableExpression *, 1> GVs;
1143   GV->getDebugInfo(GVs);
1144 
1145   // If initialized to zero and storing one into the global, we can use a cast
1146   // instead of a select to synthesize the desired value.
1147   bool IsOneZero = false;
1148   bool EmitOneOrZero = true;
1149   auto *CI = dyn_cast<ConstantInt>(OtherVal);
1150   if (CI && CI->getValue().getActiveBits() <= 64) {
1151     IsOneZero = InitVal->isNullValue() && CI->isOne();
1152 
1153     auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
1154     if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
1155       uint64_t ValInit = CIInit->getZExtValue();
1156       uint64_t ValOther = CI->getZExtValue();
1157       uint64_t ValMinus = ValOther - ValInit;
1158 
1159       for(auto *GVe : GVs){
1160         DIGlobalVariable *DGV = GVe->getVariable();
1161         DIExpression *E = GVe->getExpression();
1162         const DataLayout &DL = GV->getParent()->getDataLayout();
1163         unsigned SizeInOctets =
1164             DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;
1165 
1166         // It is expected that the address of global optimized variable is on
1167         // top of the stack. After optimization, value of that variable will
1168         // be ether 0 for initial value or 1 for other value. The following
1169         // expression should return constant integer value depending on the
1170         // value at global object address:
1171         // val * (ValOther - ValInit) + ValInit:
1172         // DW_OP_deref DW_OP_constu <ValMinus>
1173         // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1174         SmallVector<uint64_t, 12> Ops = {
1175             dwarf::DW_OP_deref_size, SizeInOctets,
1176             dwarf::DW_OP_constu, ValMinus,
1177             dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
1178             dwarf::DW_OP_plus};
1179         bool WithStackValue = true;
1180         E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
1181         DIGlobalVariableExpression *DGVE =
1182           DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1183         NewGV->addDebugInfo(DGVE);
1184      }
1185      EmitOneOrZero = false;
1186     }
1187   }
1188 
1189   if (EmitOneOrZero) {
1190      // FIXME: This will only emit address for debugger on which will
1191      // be written only 0 or 1.
1192      for(auto *GV : GVs)
1193        NewGV->addDebugInfo(GV);
1194    }
1195 
1196   while (!GV->use_empty()) {
1197     Instruction *UI = cast<Instruction>(GV->user_back());
1198     if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
1199       // Change the store into a boolean store.
1200       bool StoringOther = SI->getOperand(0) == OtherVal;
1201       // Only do this if we weren't storing a loaded value.
1202       Value *StoreVal;
1203       if (StoringOther || SI->getOperand(0) == InitVal) {
1204         StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1205                                     StoringOther);
1206       } else {
1207         // Otherwise, we are storing a previously loaded copy.  To do this,
1208         // change the copy from copying the original value to just copying the
1209         // bool.
1210         Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1211 
1212         // If we've already replaced the input, StoredVal will be a cast or
1213         // select instruction.  If not, it will be a load of the original
1214         // global.
1215         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
1216           assert(LI->getOperand(0) == GV && "Not a copy!");
1217           // Insert a new load, to preserve the saved value.
1218           StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
1219                                   LI->getName() + ".b", false, Align(1),
1220                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
1221         } else {
1222           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1223                  "This is not a form that we understand!");
1224           StoreVal = StoredVal->getOperand(0);
1225           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1226         }
1227       }
1228       StoreInst *NSI =
1229           new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
1230                         SI->getSyncScopeID(), SI);
1231       NSI->setDebugLoc(SI->getDebugLoc());
1232     } else {
1233       // Change the load into a load of bool then a select.
1234       LoadInst *LI = cast<LoadInst>(UI);
1235       LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
1236                                    LI->getName() + ".b", false, Align(1),
1237                                    LI->getOrdering(), LI->getSyncScopeID(), LI);
1238       Instruction *NSI;
1239       if (IsOneZero)
1240         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1241       else
1242         NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1243       NSI->takeName(LI);
1244       // Since LI is split into two instructions, NLI and NSI both inherit the
1245       // same DebugLoc
1246       NLI->setDebugLoc(LI->getDebugLoc());
1247       NSI->setDebugLoc(LI->getDebugLoc());
1248       LI->replaceAllUsesWith(NSI);
1249     }
1250     UI->eraseFromParent();
1251   }
1252 
1253   // Retain the name of the old global variable. People who are debugging their
1254   // programs may expect these variables to be named the same.
1255   NewGV->takeName(GV);
1256   GV->eraseFromParent();
1257   return true;
1258 }
1259 
1260 static bool deleteIfDead(
1261     GlobalValue &GV, SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1262   GV.removeDeadConstantUsers();
1263 
1264   if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
1265     return false;
1266 
1267   if (const Comdat *C = GV.getComdat())
1268     if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
1269       return false;
1270 
1271   bool Dead;
1272   if (auto *F = dyn_cast<Function>(&GV))
1273     Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
1274   else
1275     Dead = GV.use_empty();
1276   if (!Dead)
1277     return false;
1278 
1279   LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1280   GV.eraseFromParent();
1281   ++NumDeleted;
1282   return true;
1283 }
1284 
1285 static bool isPointerValueDeadOnEntryToFunction(
1286     const Function *F, GlobalValue *GV,
1287     function_ref<DominatorTree &(Function &)> LookupDomTree) {
1288   // Find all uses of GV. We expect them all to be in F, and if we can't
1289   // identify any of the uses we bail out.
1290   //
1291   // On each of these uses, identify if the memory that GV points to is
1292   // used/required/live at the start of the function. If it is not, for example
1293   // if the first thing the function does is store to the GV, the GV can
1294   // possibly be demoted.
1295   //
1296   // We don't do an exhaustive search for memory operations - simply look
1297   // through bitcasts as they're quite common and benign.
1298   const DataLayout &DL = GV->getParent()->getDataLayout();
1299   SmallVector<LoadInst *, 4> Loads;
1300   SmallVector<StoreInst *, 4> Stores;
1301   for (auto *U : GV->users()) {
1302     if (Operator::getOpcode(U) == Instruction::BitCast) {
1303       for (auto *UU : U->users()) {
1304         if (auto *LI = dyn_cast<LoadInst>(UU))
1305           Loads.push_back(LI);
1306         else if (auto *SI = dyn_cast<StoreInst>(UU))
1307           Stores.push_back(SI);
1308         else
1309           return false;
1310       }
1311       continue;
1312     }
1313 
1314     Instruction *I = dyn_cast<Instruction>(U);
1315     if (!I)
1316       return false;
1317     assert(I->getParent()->getParent() == F);
1318 
1319     if (auto *LI = dyn_cast<LoadInst>(I))
1320       Loads.push_back(LI);
1321     else if (auto *SI = dyn_cast<StoreInst>(I))
1322       Stores.push_back(SI);
1323     else
1324       return false;
1325   }
1326 
1327   // We have identified all uses of GV into loads and stores. Now check if all
1328   // of them are known not to depend on the value of the global at the function
1329   // entry point. We do this by ensuring that every load is dominated by at
1330   // least one store.
1331   auto &DT = LookupDomTree(*const_cast<Function *>(F));
1332 
1333   // The below check is quadratic. Check we're not going to do too many tests.
1334   // FIXME: Even though this will always have worst-case quadratic time, we
1335   // could put effort into minimizing the average time by putting stores that
1336   // have been shown to dominate at least one load at the beginning of the
1337   // Stores array, making subsequent dominance checks more likely to succeed
1338   // early.
1339   //
1340   // The threshold here is fairly large because global->local demotion is a
1341   // very powerful optimization should it fire.
1342   const unsigned Threshold = 100;
1343   if (Loads.size() * Stores.size() > Threshold)
1344     return false;
1345 
1346   for (auto *L : Loads) {
1347     auto *LTy = L->getType();
1348     if (none_of(Stores, [&](const StoreInst *S) {
1349           auto *STy = S->getValueOperand()->getType();
1350           // The load is only dominated by the store if DomTree says so
1351           // and the number of bits loaded in L is less than or equal to
1352           // the number of bits stored in S.
1353           return DT.dominates(S, L) &&
1354                  DL.getTypeStoreSize(LTy).getFixedSize() <=
1355                      DL.getTypeStoreSize(STy).getFixedSize();
1356         }))
1357       return false;
1358   }
1359   // All loads have known dependences inside F, so the global can be localized.
1360   return true;
1361 }
1362 
1363 /// C may have non-instruction users. Can all of those users be turned into
1364 /// instructions?
1365 static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1366   // We don't do this exhaustively. The most common pattern that we really need
1367   // to care about is a constant GEP or constant bitcast - so just looking
1368   // through one single ConstantExpr.
1369   //
1370   // The set of constants that this function returns true for must be able to be
1371   // handled by makeAllConstantUsesInstructions.
1372   for (auto *U : C->users()) {
1373     if (isa<Instruction>(U))
1374       continue;
1375     if (!isa<ConstantExpr>(U))
1376       // Non instruction, non-constantexpr user; cannot convert this.
1377       return false;
1378     for (auto *UU : U->users())
1379       if (!isa<Instruction>(UU))
1380         // A constantexpr used by another constant. We don't try and recurse any
1381         // further but just bail out at this point.
1382         return false;
1383   }
1384 
1385   return true;
1386 }
1387 
1388 /// C may have non-instruction users, and
1389 /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1390 /// non-instruction users to instructions.
1391 static void makeAllConstantUsesInstructions(Constant *C) {
1392   SmallVector<ConstantExpr*,4> Users;
1393   for (auto *U : C->users()) {
1394     if (isa<ConstantExpr>(U))
1395       Users.push_back(cast<ConstantExpr>(U));
1396     else
1397       // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1398       // should not have returned true for C.
1399       assert(
1400           isa<Instruction>(U) &&
1401           "Can't transform non-constantexpr non-instruction to instruction!");
1402   }
1403 
1404   SmallVector<Value*,4> UUsers;
1405   for (auto *U : Users) {
1406     UUsers.clear();
1407     append_range(UUsers, U->users());
1408     for (auto *UU : UUsers) {
1409       Instruction *UI = cast<Instruction>(UU);
1410       Instruction *NewU = U->getAsInstruction(UI);
1411       UI->replaceUsesOfWith(U, NewU);
1412     }
1413     // We've replaced all the uses, so destroy the constant. (destroyConstant
1414     // will update value handles and metadata.)
1415     U->destroyConstant();
1416   }
1417 }
1418 
1419 /// Analyze the specified global variable and optimize
1420 /// it if possible.  If we make a change, return true.
1421 static bool
1422 processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
1423                       function_ref<TargetTransformInfo &(Function &)> GetTTI,
1424                       function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1425                       function_ref<DominatorTree &(Function &)> LookupDomTree) {
1426   auto &DL = GV->getParent()->getDataLayout();
1427   // If this is a first class global and has only one accessing function and
1428   // this function is non-recursive, we replace the global with a local alloca
1429   // in this function.
1430   //
1431   // NOTE: It doesn't make sense to promote non-single-value types since we
1432   // are just replacing static memory to stack memory.
1433   //
1434   // If the global is in different address space, don't bring it to stack.
1435   if (!GS.HasMultipleAccessingFunctions &&
1436       GS.AccessingFunction &&
1437       GV->getValueType()->isSingleValueType() &&
1438       GV->getType()->getAddressSpace() == 0 &&
1439       !GV->isExternallyInitialized() &&
1440       allNonInstructionUsersCanBeMadeInstructions(GV) &&
1441       GS.AccessingFunction->doesNotRecurse() &&
1442       isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1443                                           LookupDomTree)) {
1444     const DataLayout &DL = GV->getParent()->getDataLayout();
1445 
1446     LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1447     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1448                                                    ->getEntryBlock().begin());
1449     Type *ElemTy = GV->getValueType();
1450     // FIXME: Pass Global's alignment when globals have alignment
1451     AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1452                                         GV->getName(), &FirstI);
1453     if (!isa<UndefValue>(GV->getInitializer()))
1454       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1455 
1456     makeAllConstantUsesInstructions(GV);
1457 
1458     GV->replaceAllUsesWith(Alloca);
1459     GV->eraseFromParent();
1460     ++NumLocalized;
1461     return true;
1462   }
1463 
1464   bool Changed = false;
1465 
1466   // If the global is never loaded (but may be stored to), it is dead.
1467   // Delete it now.
1468   if (!GS.IsLoaded) {
1469     LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1470 
1471     if (isLeakCheckerRoot(GV)) {
1472       // Delete any constant stores to the global.
1473       Changed = CleanupPointerRootUsers(GV, GetTLI);
1474     } else {
1475       // Delete any stores we can find to the global.  We may not be able to
1476       // make it completely dead though.
1477       Changed = CleanupConstantGlobalUsers(GV, DL);
1478     }
1479 
1480     // If the global is dead now, delete it.
1481     if (GV->use_empty()) {
1482       GV->eraseFromParent();
1483       ++NumDeleted;
1484       Changed = true;
1485     }
1486     return Changed;
1487 
1488   }
1489   if (GS.StoredType <= GlobalStatus::InitializerStored) {
1490     LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1491 
1492     // Don't actually mark a global constant if it's atomic because atomic loads
1493     // are implemented by a trivial cmpxchg in some edge-cases and that usually
1494     // requires write access to the variable even if it's not actually changed.
1495     if (GS.Ordering == AtomicOrdering::NotAtomic) {
1496       assert(!GV->isConstant() && "Expected a non-constant global");
1497       GV->setConstant(true);
1498       Changed = true;
1499     }
1500 
1501     // Clean up any obviously simplifiable users now.
1502     Changed |= CleanupConstantGlobalUsers(GV, DL);
1503 
1504     // If the global is dead now, just nuke it.
1505     if (GV->use_empty()) {
1506       LLVM_DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1507                         << "all users and delete global!\n");
1508       GV->eraseFromParent();
1509       ++NumDeleted;
1510       return true;
1511     }
1512 
1513     // Fall through to the next check; see if we can optimize further.
1514     ++NumMarked;
1515   }
1516   if (!GV->getInitializer()->getType()->isSingleValueType()) {
1517     const DataLayout &DL = GV->getParent()->getDataLayout();
1518     if (SRAGlobal(GV, DL))
1519       return true;
1520   }
1521   Value *StoredOnceValue = GS.getStoredOnceValue();
1522   if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
1523     // Avoid speculating constant expressions that might trap (div/rem).
1524     auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
1525     if (SOVConstant && SOVConstant->canTrap())
1526       return Changed;
1527 
1528     Function &StoreFn =
1529         const_cast<Function &>(*GS.StoredOnceStore->getFunction());
1530     bool CanHaveNonUndefGlobalInitializer =
1531         GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
1532             GV->getType()->getAddressSpace());
1533     // If the initial value for the global was an undef value, and if only
1534     // one other value was stored into it, we can just change the
1535     // initializer to be the stored value, then delete all stores to the
1536     // global.  This allows us to mark it constant.
1537     // This is restricted to address spaces that allow globals to have
1538     // initializers. NVPTX, for example, does not support initializers for
1539     // shared memory (AS 3).
1540     if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
1541         DL.getTypeAllocSize(SOVConstant->getType()) ==
1542             DL.getTypeAllocSize(GV->getValueType()) &&
1543         CanHaveNonUndefGlobalInitializer) {
1544       if (SOVConstant->getType() == GV->getValueType()) {
1545         // Change the initializer in place.
1546         GV->setInitializer(SOVConstant);
1547       } else {
1548         // Create a new global with adjusted type.
1549         auto *NGV = new GlobalVariable(
1550             *GV->getParent(), SOVConstant->getType(), GV->isConstant(),
1551             GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
1552             GV->getAddressSpace());
1553         NGV->takeName(GV);
1554         NGV->copyAttributesFrom(GV);
1555         GV->replaceAllUsesWith(ConstantExpr::getBitCast(NGV, GV->getType()));
1556         GV->eraseFromParent();
1557         GV = NGV;
1558       }
1559 
1560       // Clean up any obviously simplifiable users now.
1561       CleanupConstantGlobalUsers(GV, DL);
1562 
1563       if (GV->use_empty()) {
1564         LLVM_DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1565                           << "simplify all users and delete global!\n");
1566         GV->eraseFromParent();
1567         ++NumDeleted;
1568       }
1569       ++NumSubstitute;
1570       return true;
1571     }
1572 
1573     // Try to optimize globals based on the knowledge that only one value
1574     // (besides its initializer) is ever stored to the global.
1575     if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
1576       return true;
1577 
1578     // Otherwise, if the global was not a boolean, we can shrink it to be a
1579     // boolean. Skip this optimization for AS that doesn't allow an initializer.
1580     if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
1581         (!isa<UndefValue>(GV->getInitializer()) ||
1582          CanHaveNonUndefGlobalInitializer)) {
1583       if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
1584         ++NumShrunkToBool;
1585         return true;
1586       }
1587     }
1588   }
1589 
1590   return Changed;
1591 }
1592 
1593 /// Analyze the specified global variable and optimize it if possible.  If we
1594 /// make a change, return true.
1595 static bool
1596 processGlobal(GlobalValue &GV,
1597               function_ref<TargetTransformInfo &(Function &)> GetTTI,
1598               function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1599               function_ref<DominatorTree &(Function &)> LookupDomTree) {
1600   if (GV.getName().startswith("llvm."))
1601     return false;
1602 
1603   GlobalStatus GS;
1604 
1605   if (GlobalStatus::analyzeGlobal(&GV, GS))
1606     return false;
1607 
1608   bool Changed = false;
1609   if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
1610     auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
1611                                                : GlobalValue::UnnamedAddr::Local;
1612     if (NewUnnamedAddr != GV.getUnnamedAddr()) {
1613       GV.setUnnamedAddr(NewUnnamedAddr);
1614       NumUnnamed++;
1615       Changed = true;
1616     }
1617   }
1618 
1619   // Do more involved optimizations if the global is internal.
1620   if (!GV.hasLocalLinkage())
1621     return Changed;
1622 
1623   auto *GVar = dyn_cast<GlobalVariable>(&GV);
1624   if (!GVar)
1625     return Changed;
1626 
1627   if (GVar->isConstant() || !GVar->hasInitializer())
1628     return Changed;
1629 
1630   return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
1631          Changed;
1632 }
1633 
1634 /// Walk all of the direct calls of the specified function, changing them to
1635 /// FastCC.
1636 static void ChangeCalleesToFastCall(Function *F) {
1637   for (User *U : F->users()) {
1638     if (isa<BlockAddress>(U))
1639       continue;
1640     cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
1641   }
1642 }
1643 
1644 static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
1645                                Attribute::AttrKind A) {
1646   unsigned AttrIndex;
1647   if (Attrs.hasAttrSomewhere(A, &AttrIndex))
1648     return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
1649   return Attrs;
1650 }
1651 
1652 static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
1653   F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
1654   for (User *U : F->users()) {
1655     if (isa<BlockAddress>(U))
1656       continue;
1657     CallBase *CB = cast<CallBase>(U);
1658     CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
1659   }
1660 }
1661 
1662 /// Return true if this is a calling convention that we'd like to change.  The
1663 /// idea here is that we don't want to mess with the convention if the user
1664 /// explicitly requested something with performance implications like coldcc,
1665 /// GHC, or anyregcc.
1666 static bool hasChangeableCC(Function *F) {
1667   CallingConv::ID CC = F->getCallingConv();
1668 
1669   // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
1670   if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
1671     return false;
1672 
1673   // FIXME: Change CC for the whole chain of musttail calls when possible.
1674   //
1675   // Can't change CC of the function that either has musttail calls, or is a
1676   // musttail callee itself
1677   for (User *U : F->users()) {
1678     if (isa<BlockAddress>(U))
1679       continue;
1680     CallInst* CI = dyn_cast<CallInst>(U);
1681     if (!CI)
1682       continue;
1683 
1684     if (CI->isMustTailCall())
1685       return false;
1686   }
1687 
1688   for (BasicBlock &BB : *F)
1689     if (BB.getTerminatingMustTailCall())
1690       return false;
1691 
1692   return true;
1693 }
1694 
1695 /// Return true if the block containing the call site has a BlockFrequency of
1696 /// less than ColdCCRelFreq% of the entry block.
1697 static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
1698   const BranchProbability ColdProb(ColdCCRelFreq, 100);
1699   auto *CallSiteBB = CB.getParent();
1700   auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
1701   auto CallerEntryFreq =
1702       CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
1703   return CallSiteFreq < CallerEntryFreq * ColdProb;
1704 }
1705 
1706 // This function checks if the input function F is cold at all call sites. It
1707 // also looks each call site's containing function, returning false if the
1708 // caller function contains other non cold calls. The input vector AllCallsCold
1709 // contains a list of functions that only have call sites in cold blocks.
1710 static bool
1711 isValidCandidateForColdCC(Function &F,
1712                           function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1713                           const std::vector<Function *> &AllCallsCold) {
1714 
1715   if (F.user_empty())
1716     return false;
1717 
1718   for (User *U : F.users()) {
1719     if (isa<BlockAddress>(U))
1720       continue;
1721 
1722     CallBase &CB = cast<CallBase>(*U);
1723     Function *CallerFunc = CB.getParent()->getParent();
1724     BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
1725     if (!isColdCallSite(CB, CallerBFI))
1726       return false;
1727     if (!llvm::is_contained(AllCallsCold, CallerFunc))
1728       return false;
1729   }
1730   return true;
1731 }
1732 
1733 static void changeCallSitesToColdCC(Function *F) {
1734   for (User *U : F->users()) {
1735     if (isa<BlockAddress>(U))
1736       continue;
1737     cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
1738   }
1739 }
1740 
1741 // This function iterates over all the call instructions in the input Function
1742 // and checks that all call sites are in cold blocks and are allowed to use the
1743 // coldcc calling convention.
1744 static bool
1745 hasOnlyColdCalls(Function &F,
1746                  function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
1747   for (BasicBlock &BB : F) {
1748     for (Instruction &I : BB) {
1749       if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1750         // Skip over isline asm instructions since they aren't function calls.
1751         if (CI->isInlineAsm())
1752           continue;
1753         Function *CalledFn = CI->getCalledFunction();
1754         if (!CalledFn)
1755           return false;
1756         if (!CalledFn->hasLocalLinkage())
1757           return false;
1758         // Skip over instrinsics since they won't remain as function calls.
1759         if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
1760           continue;
1761         // Check if it's valid to use coldcc calling convention.
1762         if (!hasChangeableCC(CalledFn) || CalledFn->isVarArg() ||
1763             CalledFn->hasAddressTaken())
1764           return false;
1765         BlockFrequencyInfo &CallerBFI = GetBFI(F);
1766         if (!isColdCallSite(*CI, CallerBFI))
1767           return false;
1768       }
1769     }
1770   }
1771   return true;
1772 }
1773 
1774 static bool hasMustTailCallers(Function *F) {
1775   for (User *U : F->users()) {
1776     CallBase *CB = dyn_cast<CallBase>(U);
1777     if (!CB) {
1778       assert(isa<BlockAddress>(U) &&
1779              "Expected either CallBase or BlockAddress");
1780       continue;
1781     }
1782     if (CB->isMustTailCall())
1783       return true;
1784   }
1785   return false;
1786 }
1787 
1788 static bool hasInvokeCallers(Function *F) {
1789   for (User *U : F->users())
1790     if (isa<InvokeInst>(U))
1791       return true;
1792   return false;
1793 }
1794 
1795 static void RemovePreallocated(Function *F) {
1796   RemoveAttribute(F, Attribute::Preallocated);
1797 
1798   auto *M = F->getParent();
1799 
1800   IRBuilder<> Builder(M->getContext());
1801 
1802   // Cannot modify users() while iterating over it, so make a copy.
1803   SmallVector<User *, 4> PreallocatedCalls(F->users());
1804   for (User *U : PreallocatedCalls) {
1805     CallBase *CB = dyn_cast<CallBase>(U);
1806     if (!CB)
1807       continue;
1808 
1809     assert(
1810         !CB->isMustTailCall() &&
1811         "Shouldn't call RemotePreallocated() on a musttail preallocated call");
1812     // Create copy of call without "preallocated" operand bundle.
1813     SmallVector<OperandBundleDef, 1> OpBundles;
1814     CB->getOperandBundlesAsDefs(OpBundles);
1815     CallBase *PreallocatedSetup = nullptr;
1816     for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
1817       if (It->getTag() == "preallocated") {
1818         PreallocatedSetup = cast<CallBase>(*It->input_begin());
1819         OpBundles.erase(It);
1820         break;
1821       }
1822     }
1823     assert(PreallocatedSetup && "Did not find preallocated bundle");
1824     uint64_t ArgCount =
1825         cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();
1826 
1827     assert((isa<CallInst>(CB) || isa<InvokeInst>(CB)) &&
1828            "Unknown indirect call type");
1829     CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
1830     CB->replaceAllUsesWith(NewCB);
1831     NewCB->takeName(CB);
1832     CB->eraseFromParent();
1833 
1834     Builder.SetInsertPoint(PreallocatedSetup);
1835     auto *StackSave =
1836         Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1837 
1838     Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
1839     Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1840                        StackSave);
1841 
1842     // Replace @llvm.call.preallocated.arg() with alloca.
1843     // Cannot modify users() while iterating over it, so make a copy.
1844     // @llvm.call.preallocated.arg() can be called with the same index multiple
1845     // times. So for each @llvm.call.preallocated.arg(), we see if we have
1846     // already created a Value* for the index, and if not, create an alloca and
1847     // bitcast right after the @llvm.call.preallocated.setup() so that it
1848     // dominates all uses.
1849     SmallVector<Value *, 2> ArgAllocas(ArgCount);
1850     SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
1851     for (auto *User : PreallocatedArgs) {
1852       auto *UseCall = cast<CallBase>(User);
1853       assert(UseCall->getCalledFunction()->getIntrinsicID() ==
1854                  Intrinsic::call_preallocated_arg &&
1855              "preallocated token use was not a llvm.call.preallocated.arg");
1856       uint64_t AllocArgIndex =
1857           cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
1858       Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
1859       if (!AllocaReplacement) {
1860         auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
1861         auto *ArgType =
1862             UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
1863         auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
1864         Builder.SetInsertPoint(InsertBefore);
1865         auto *Alloca =
1866             Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
1867         auto *BitCast = Builder.CreateBitCast(
1868             Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
1869         ArgAllocas[AllocArgIndex] = BitCast;
1870         AllocaReplacement = BitCast;
1871       }
1872 
1873       UseCall->replaceAllUsesWith(AllocaReplacement);
1874       UseCall->eraseFromParent();
1875     }
1876     // Remove @llvm.call.preallocated.setup().
1877     cast<Instruction>(PreallocatedSetup)->eraseFromParent();
1878   }
1879 }
1880 
1881 static bool
1882 OptimizeFunctions(Module &M,
1883                   function_ref<TargetLibraryInfo &(Function &)> GetTLI,
1884                   function_ref<TargetTransformInfo &(Function &)> GetTTI,
1885                   function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
1886                   function_ref<DominatorTree &(Function &)> LookupDomTree,
1887                   SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
1888 
1889   bool Changed = false;
1890 
1891   std::vector<Function *> AllCallsCold;
1892   for (Function &F : llvm::make_early_inc_range(M))
1893     if (hasOnlyColdCalls(F, GetBFI))
1894       AllCallsCold.push_back(&F);
1895 
1896   // Optimize functions.
1897   for (Function &F : llvm::make_early_inc_range(M)) {
1898     // Don't perform global opt pass on naked functions; we don't want fast
1899     // calling conventions for naked functions.
1900     if (F.hasFnAttribute(Attribute::Naked))
1901       continue;
1902 
1903     // Functions without names cannot be referenced outside this module.
1904     if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
1905       F.setLinkage(GlobalValue::InternalLinkage);
1906 
1907     if (deleteIfDead(F, NotDiscardableComdats)) {
1908       Changed = true;
1909       continue;
1910     }
1911 
1912     // LLVM's definition of dominance allows instructions that are cyclic
1913     // in unreachable blocks, e.g.:
1914     // %pat = select i1 %condition, @global, i16* %pat
1915     // because any instruction dominates an instruction in a block that's
1916     // not reachable from entry.
1917     // So, remove unreachable blocks from the function, because a) there's
1918     // no point in analyzing them and b) GlobalOpt should otherwise grow
1919     // some more complicated logic to break these cycles.
1920     // Removing unreachable blocks might invalidate the dominator so we
1921     // recalculate it.
1922     if (!F.isDeclaration()) {
1923       if (removeUnreachableBlocks(F)) {
1924         auto &DT = LookupDomTree(F);
1925         DT.recalculate(F);
1926         Changed = true;
1927       }
1928     }
1929 
1930     Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
1931 
1932     if (!F.hasLocalLinkage())
1933       continue;
1934 
1935     // If we have an inalloca parameter that we can safely remove the
1936     // inalloca attribute from, do so. This unlocks optimizations that
1937     // wouldn't be safe in the presence of inalloca.
1938     // FIXME: We should also hoist alloca affected by this to the entry
1939     // block if possible.
1940     if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
1941         !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
1942       RemoveAttribute(&F, Attribute::InAlloca);
1943       Changed = true;
1944     }
1945 
1946     // FIXME: handle invokes
1947     // FIXME: handle musttail
1948     if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
1949       if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
1950           !hasInvokeCallers(&F)) {
1951         RemovePreallocated(&F);
1952         Changed = true;
1953       }
1954       continue;
1955     }
1956 
1957     if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
1958       NumInternalFunc++;
1959       TargetTransformInfo &TTI = GetTTI(F);
1960       // Change the calling convention to coldcc if either stress testing is
1961       // enabled or the target would like to use coldcc on functions which are
1962       // cold at all call sites and the callers contain no other non coldcc
1963       // calls.
1964       if (EnableColdCCStressTest ||
1965           (TTI.useColdCCForColdCall(F) &&
1966            isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
1967         F.setCallingConv(CallingConv::Cold);
1968         changeCallSitesToColdCC(&F);
1969         Changed = true;
1970         NumColdCC++;
1971       }
1972     }
1973 
1974     if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
1975       // If this function has a calling convention worth changing, is not a
1976       // varargs function, and is only called directly, promote it to use the
1977       // Fast calling convention.
1978       F.setCallingConv(CallingConv::Fast);
1979       ChangeCalleesToFastCall(&F);
1980       ++NumFastCallFns;
1981       Changed = true;
1982     }
1983 
1984     if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
1985         !F.hasAddressTaken()) {
1986       // The function is not used by a trampoline intrinsic, so it is safe
1987       // to remove the 'nest' attribute.
1988       RemoveAttribute(&F, Attribute::Nest);
1989       ++NumNestRemoved;
1990       Changed = true;
1991     }
1992   }
1993   return Changed;
1994 }
1995 
1996 static bool
1997 OptimizeGlobalVars(Module &M,
1998                    function_ref<TargetTransformInfo &(Function &)> GetTTI,
1999                    function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2000                    function_ref<DominatorTree &(Function &)> LookupDomTree,
2001                    SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2002   bool Changed = false;
2003 
2004   for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
2005     // Global variables without names cannot be referenced outside this module.
2006     if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
2007       GV.setLinkage(GlobalValue::InternalLinkage);
2008     // Simplify the initializer.
2009     if (GV.hasInitializer())
2010       if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
2011         auto &DL = M.getDataLayout();
2012         // TLI is not used in the case of a Constant, so use default nullptr
2013         // for that optional parameter, since we don't have a Function to
2014         // provide GetTLI anyway.
2015         Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
2016         if (New != C)
2017           GV.setInitializer(New);
2018       }
2019 
2020     if (deleteIfDead(GV, NotDiscardableComdats)) {
2021       Changed = true;
2022       continue;
2023     }
2024 
2025     Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
2026   }
2027   return Changed;
2028 }
2029 
2030 /// Evaluate static constructors in the function, if we can.  Return true if we
2031 /// can, false otherwise.
2032 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2033                                       TargetLibraryInfo *TLI) {
2034   // Call the function.
2035   Evaluator Eval(DL, TLI);
2036   Constant *RetValDummy;
2037   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2038                                            SmallVector<Constant*, 0>());
2039 
2040   if (EvalSuccess) {
2041     ++NumCtorsEvaluated;
2042 
2043     // We succeeded at evaluation: commit the result.
2044     auto NewInitializers = Eval.getMutatedInitializers();
2045     LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2046                       << F->getName() << "' to " << NewInitializers.size()
2047                       << " stores.\n");
2048     for (const auto &Pair : NewInitializers)
2049       Pair.first->setInitializer(Pair.second);
2050     for (GlobalVariable *GV : Eval.getInvariants())
2051       GV->setConstant(true);
2052   }
2053 
2054   return EvalSuccess;
2055 }
2056 
2057 static int compareNames(Constant *const *A, Constant *const *B) {
2058   Value *AStripped = (*A)->stripPointerCasts();
2059   Value *BStripped = (*B)->stripPointerCasts();
2060   return AStripped->getName().compare(BStripped->getName());
2061 }
2062 
2063 static void setUsedInitializer(GlobalVariable &V,
2064                                const SmallPtrSetImpl<GlobalValue *> &Init) {
2065   if (Init.empty()) {
2066     V.eraseFromParent();
2067     return;
2068   }
2069 
2070   // Type of pointer to the array of pointers.
2071   PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2072 
2073   SmallVector<Constant *, 8> UsedArray;
2074   for (GlobalValue *GV : Init) {
2075     Constant *Cast
2076       = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2077     UsedArray.push_back(Cast);
2078   }
2079   // Sort to get deterministic order.
2080   array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2081   ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2082 
2083   Module *M = V.getParent();
2084   V.removeFromParent();
2085   GlobalVariable *NV =
2086       new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
2087                          ConstantArray::get(ATy, UsedArray), "");
2088   NV->takeName(&V);
2089   NV->setSection("llvm.metadata");
2090   delete &V;
2091 }
2092 
2093 namespace {
2094 
2095 /// An easy to access representation of llvm.used and llvm.compiler.used.
2096 class LLVMUsed {
2097   SmallPtrSet<GlobalValue *, 4> Used;
2098   SmallPtrSet<GlobalValue *, 4> CompilerUsed;
2099   GlobalVariable *UsedV;
2100   GlobalVariable *CompilerUsedV;
2101 
2102 public:
2103   LLVMUsed(Module &M) {
2104     SmallVector<GlobalValue *, 4> Vec;
2105     UsedV = collectUsedGlobalVariables(M, Vec, false);
2106     Used = {Vec.begin(), Vec.end()};
2107     Vec.clear();
2108     CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
2109     CompilerUsed = {Vec.begin(), Vec.end()};
2110   }
2111 
2112   using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
2113   using used_iterator_range = iterator_range<iterator>;
2114 
2115   iterator usedBegin() { return Used.begin(); }
2116   iterator usedEnd() { return Used.end(); }
2117 
2118   used_iterator_range used() {
2119     return used_iterator_range(usedBegin(), usedEnd());
2120   }
2121 
2122   iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2123   iterator compilerUsedEnd() { return CompilerUsed.end(); }
2124 
2125   used_iterator_range compilerUsed() {
2126     return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2127   }
2128 
2129   bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2130 
2131   bool compilerUsedCount(GlobalValue *GV) const {
2132     return CompilerUsed.count(GV);
2133   }
2134 
2135   bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2136   bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2137   bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2138 
2139   bool compilerUsedInsert(GlobalValue *GV) {
2140     return CompilerUsed.insert(GV).second;
2141   }
2142 
2143   void syncVariablesAndSets() {
2144     if (UsedV)
2145       setUsedInitializer(*UsedV, Used);
2146     if (CompilerUsedV)
2147       setUsedInitializer(*CompilerUsedV, CompilerUsed);
2148   }
2149 };
2150 
2151 } // end anonymous namespace
2152 
2153 static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2154   if (GA.use_empty()) // No use at all.
2155     return false;
2156 
2157   assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2158          "We should have removed the duplicated "
2159          "element from llvm.compiler.used");
2160   if (!GA.hasOneUse())
2161     // Strictly more than one use. So at least one is not in llvm.used and
2162     // llvm.compiler.used.
2163     return true;
2164 
2165   // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2166   return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
2167 }
2168 
2169 static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2170                                                const LLVMUsed &U) {
2171   unsigned N = 2;
2172   assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2173          "We should have removed the duplicated "
2174          "element from llvm.compiler.used");
2175   if (U.usedCount(&V) || U.compilerUsedCount(&V))
2176     ++N;
2177   return V.hasNUsesOrMore(N);
2178 }
2179 
2180 static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2181   if (!GA.hasLocalLinkage())
2182     return true;
2183 
2184   return U.usedCount(&GA) || U.compilerUsedCount(&GA);
2185 }
2186 
2187 static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2188                              bool &RenameTarget) {
2189   RenameTarget = false;
2190   bool Ret = false;
2191   if (hasUseOtherThanLLVMUsed(GA, U))
2192     Ret = true;
2193 
2194   // If the alias is externally visible, we may still be able to simplify it.
2195   if (!mayHaveOtherReferences(GA, U))
2196     return Ret;
2197 
2198   // If the aliasee has internal linkage, give it the name and linkage
2199   // of the alias, and delete the alias.  This turns:
2200   //   define internal ... @f(...)
2201   //   @a = alias ... @f
2202   // into:
2203   //   define ... @a(...)
2204   Constant *Aliasee = GA.getAliasee();
2205   GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2206   if (!Target->hasLocalLinkage())
2207     return Ret;
2208 
2209   // Do not perform the transform if multiple aliases potentially target the
2210   // aliasee. This check also ensures that it is safe to replace the section
2211   // and other attributes of the aliasee with those of the alias.
2212   if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
2213     return Ret;
2214 
2215   RenameTarget = true;
2216   return true;
2217 }
2218 
2219 static bool
2220 OptimizeGlobalAliases(Module &M,
2221                       SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
2222   bool Changed = false;
2223   LLVMUsed Used(M);
2224 
2225   for (GlobalValue *GV : Used.used())
2226     Used.compilerUsedErase(GV);
2227 
2228   // Return whether GV is explicitly or implicitly dso_local and not replaceable
2229   // by another definition in the current linkage unit.
2230   auto IsModuleLocal = [](GlobalValue &GV) {
2231     return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
2232            (GV.isDSOLocal() || GV.isImplicitDSOLocal());
2233   };
2234 
2235   for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
2236     // Aliases without names cannot be referenced outside this module.
2237     if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
2238       J.setLinkage(GlobalValue::InternalLinkage);
2239 
2240     if (deleteIfDead(J, NotDiscardableComdats)) {
2241       Changed = true;
2242       continue;
2243     }
2244 
2245     // If the alias can change at link time, nothing can be done - bail out.
2246     if (!IsModuleLocal(J))
2247       continue;
2248 
2249     Constant *Aliasee = J.getAliasee();
2250     GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2251     // We can't trivially replace the alias with the aliasee if the aliasee is
2252     // non-trivial in some way. We also can't replace the alias with the aliasee
2253     // if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
2254     // alias can be used to access the definition as if preemption did not
2255     // happen.
2256     // TODO: Try to handle non-zero GEPs of local aliasees.
2257     if (!Target || !IsModuleLocal(*Target))
2258       continue;
2259 
2260     Target->removeDeadConstantUsers();
2261 
2262     // Make all users of the alias use the aliasee instead.
2263     bool RenameTarget;
2264     if (!hasUsesToReplace(J, Used, RenameTarget))
2265       continue;
2266 
2267     J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
2268     ++NumAliasesResolved;
2269     Changed = true;
2270 
2271     if (RenameTarget) {
2272       // Give the aliasee the name, linkage and other attributes of the alias.
2273       Target->takeName(&J);
2274       Target->setLinkage(J.getLinkage());
2275       Target->setDSOLocal(J.isDSOLocal());
2276       Target->setVisibility(J.getVisibility());
2277       Target->setDLLStorageClass(J.getDLLStorageClass());
2278 
2279       if (Used.usedErase(&J))
2280         Used.usedInsert(Target);
2281 
2282       if (Used.compilerUsedErase(&J))
2283         Used.compilerUsedInsert(Target);
2284     } else if (mayHaveOtherReferences(J, Used))
2285       continue;
2286 
2287     // Delete the alias.
2288     M.getAliasList().erase(&J);
2289     ++NumAliasesRemoved;
2290     Changed = true;
2291   }
2292 
2293   Used.syncVariablesAndSets();
2294 
2295   return Changed;
2296 }
2297 
2298 static Function *
2299 FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
2300   // Hack to get a default TLI before we have actual Function.
2301   auto FuncIter = M.begin();
2302   if (FuncIter == M.end())
2303     return nullptr;
2304   auto *TLI = &GetTLI(*FuncIter);
2305 
2306   LibFunc F = LibFunc_cxa_atexit;
2307   if (!TLI->has(F))
2308     return nullptr;
2309 
2310   Function *Fn = M.getFunction(TLI->getName(F));
2311   if (!Fn)
2312     return nullptr;
2313 
2314   // Now get the actual TLI for Fn.
2315   TLI = &GetTLI(*Fn);
2316 
2317   // Make sure that the function has the correct prototype.
2318   if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
2319     return nullptr;
2320 
2321   return Fn;
2322 }
2323 
2324 /// Returns whether the given function is an empty C++ destructor and can
2325 /// therefore be eliminated.
2326 /// Note that we assume that other optimization passes have already simplified
2327 /// the code so we simply check for 'ret'.
2328 static bool cxxDtorIsEmpty(const Function &Fn) {
2329   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2330   // nounwind, but that doesn't seem worth doing.
2331   if (Fn.isDeclaration())
2332     return false;
2333 
2334   for (auto &I : Fn.getEntryBlock()) {
2335     if (I.isDebugOrPseudoInst())
2336       continue;
2337     if (isa<ReturnInst>(I))
2338       return true;
2339     break;
2340   }
2341   return false;
2342 }
2343 
2344 static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2345   /// Itanium C++ ABI p3.3.5:
2346   ///
2347   ///   After constructing a global (or local static) object, that will require
2348   ///   destruction on exit, a termination function is registered as follows:
2349   ///
2350   ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2351   ///
2352   ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2353   ///   call f(p) when DSO d is unloaded, before all such termination calls
2354   ///   registered before this one. It returns zero if registration is
2355   ///   successful, nonzero on failure.
2356 
2357   // This pass will look for calls to __cxa_atexit where the function is trivial
2358   // and remove them.
2359   bool Changed = false;
2360 
2361   for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
2362     // We're only interested in calls. Theoretically, we could handle invoke
2363     // instructions as well, but neither llvm-gcc nor clang generate invokes
2364     // to __cxa_atexit.
2365     CallInst *CI = dyn_cast<CallInst>(U);
2366     if (!CI)
2367       continue;
2368 
2369     Function *DtorFn =
2370       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2371     if (!DtorFn || !cxxDtorIsEmpty(*DtorFn))
2372       continue;
2373 
2374     // Just remove the call.
2375     CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2376     CI->eraseFromParent();
2377 
2378     ++NumCXXDtorsRemoved;
2379 
2380     Changed |= true;
2381   }
2382 
2383   return Changed;
2384 }
2385 
2386 static bool optimizeGlobalsInModule(
2387     Module &M, const DataLayout &DL,
2388     function_ref<TargetLibraryInfo &(Function &)> GetTLI,
2389     function_ref<TargetTransformInfo &(Function &)> GetTTI,
2390     function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2391     function_ref<DominatorTree &(Function &)> LookupDomTree) {
2392   SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
2393   bool Changed = false;
2394   bool LocalChange = true;
2395   while (LocalChange) {
2396     LocalChange = false;
2397 
2398     NotDiscardableComdats.clear();
2399     for (const GlobalVariable &GV : M.globals())
2400       if (const Comdat *C = GV.getComdat())
2401         if (!GV.isDiscardableIfUnused() || !GV.use_empty())
2402           NotDiscardableComdats.insert(C);
2403     for (Function &F : M)
2404       if (const Comdat *C = F.getComdat())
2405         if (!F.isDefTriviallyDead())
2406           NotDiscardableComdats.insert(C);
2407     for (GlobalAlias &GA : M.aliases())
2408       if (const Comdat *C = GA.getComdat())
2409         if (!GA.isDiscardableIfUnused() || !GA.use_empty())
2410           NotDiscardableComdats.insert(C);
2411 
2412     // Delete functions that are trivially dead, ccc -> fastcc
2413     LocalChange |= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
2414                                      NotDiscardableComdats);
2415 
2416     // Optimize global_ctors list.
2417     LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2418       return EvaluateStaticConstructor(F, DL, &GetTLI(*F));
2419     });
2420 
2421     // Optimize non-address-taken globals.
2422     LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
2423                                       NotDiscardableComdats);
2424 
2425     // Resolve aliases, when possible.
2426     LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2427 
2428     // Try to remove trivial global destructors if they are not removed
2429     // already.
2430     Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
2431     if (CXAAtExitFn)
2432       LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2433 
2434     Changed |= LocalChange;
2435   }
2436 
2437   // TODO: Move all global ctors functions to the end of the module for code
2438   // layout.
2439 
2440   return Changed;
2441 }
2442 
2443 PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2444     auto &DL = M.getDataLayout();
2445     auto &FAM =
2446         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2447     auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2448       return FAM.getResult<DominatorTreeAnalysis>(F);
2449     };
2450     auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2451       return FAM.getResult<TargetLibraryAnalysis>(F);
2452     };
2453     auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
2454       return FAM.getResult<TargetIRAnalysis>(F);
2455     };
2456 
2457     auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
2458       return FAM.getResult<BlockFrequencyAnalysis>(F);
2459     };
2460 
2461     if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree))
2462       return PreservedAnalyses::all();
2463     return PreservedAnalyses::none();
2464 }
2465 
2466 namespace {
2467 
2468 struct GlobalOptLegacyPass : public ModulePass {
2469   static char ID; // Pass identification, replacement for typeid
2470 
2471   GlobalOptLegacyPass() : ModulePass(ID) {
2472     initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
2473   }
2474 
2475   bool runOnModule(Module &M) override {
2476     if (skipModule(M))
2477       return false;
2478 
2479     auto &DL = M.getDataLayout();
2480     auto LookupDomTree = [this](Function &F) -> DominatorTree & {
2481       return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2482     };
2483     auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
2484       return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2485     };
2486     auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
2487       return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2488     };
2489 
2490     auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
2491       return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2492     };
2493 
2494     return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI,
2495                                    LookupDomTree);
2496   }
2497 
2498   void getAnalysisUsage(AnalysisUsage &AU) const override {
2499     AU.addRequired<TargetLibraryInfoWrapperPass>();
2500     AU.addRequired<TargetTransformInfoWrapperPass>();
2501     AU.addRequired<DominatorTreeWrapperPass>();
2502     AU.addRequired<BlockFrequencyInfoWrapperPass>();
2503   }
2504 };
2505 
2506 } // end anonymous namespace
2507 
2508 char GlobalOptLegacyPass::ID = 0;
2509 
2510 INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
2511                       "Global Variable Optimizer", false, false)
2512 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2513 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2514 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
2515 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2516 INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
2517                     "Global Variable Optimizer", false, false)
2518 
2519 ModulePass *llvm::createGlobalOptimizerPass() {
2520   return new GlobalOptLegacyPass();
2521 }
2522