10b57cec5SDimitry Andric //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the visit functions for load, store and alloca.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "InstCombineInternal.h"
140b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h"
150b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
160b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
175ffd83dbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
180b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h"
190b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
200b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
210b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
220b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
230b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
24e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
255ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
260b57cec5SDimitry Andric using namespace llvm;
270b57cec5SDimitry Andric using namespace PatternMatch;
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric #define DEBUG_TYPE "instcombine"
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric STATISTIC(NumDeadStore, "Number of dead stores eliminated");
320b57cec5SDimitry Andric STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
330b57cec5SDimitry Andric
34bdd1243dSDimitry Andric static cl::opt<unsigned> MaxCopiedFromConstantUsers(
3506c3fb27SDimitry Andric "instcombine-max-copied-from-constant-users", cl::init(300),
36bdd1243dSDimitry Andric cl::desc("Maximum users to visit in copy from constant transform"),
37bdd1243dSDimitry Andric cl::Hidden);
38bdd1243dSDimitry Andric
395f757f3fSDimitry Andric namespace llvm {
405f757f3fSDimitry Andric cl::opt<bool> EnableInferAlignmentPass(
415f757f3fSDimitry Andric "enable-infer-alignment-pass", cl::init(true), cl::Hidden, cl::ZeroOrMore,
425f757f3fSDimitry Andric cl::desc("Enable the InferAlignment pass, disabling alignment inference in "
435f757f3fSDimitry Andric "InstCombine"));
445f757f3fSDimitry Andric }
455f757f3fSDimitry Andric
46bdd1243dSDimitry Andric /// isOnlyCopiedFromConstantMemory - Recursively walk the uses of a (derived)
470b57cec5SDimitry Andric /// pointer to an alloca. Ignore any reads of the pointer, return false if we
480b57cec5SDimitry Andric /// see any stores or other unknown uses. If we see pointer arithmetic, keep
490b57cec5SDimitry Andric /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
500b57cec5SDimitry Andric /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
51bdd1243dSDimitry Andric /// the alloca, and if the source pointer is a pointer to a constant memory
52bdd1243dSDimitry Andric /// location, we can optimize this.
530b57cec5SDimitry Andric static bool
isOnlyCopiedFromConstantMemory(AAResults * AA,AllocaInst * V,MemTransferInst * & TheCopy,SmallVectorImpl<Instruction * > & ToDelete)54bdd1243dSDimitry Andric isOnlyCopiedFromConstantMemory(AAResults *AA, AllocaInst *V,
55bdd1243dSDimitry Andric MemTransferInst *&TheCopy,
560b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ToDelete) {
570b57cec5SDimitry Andric // We track lifetime intrinsics as we encounter them. If we decide to go
58bdd1243dSDimitry Andric // ahead and replace the value with the memory location, this lets the caller
59bdd1243dSDimitry Andric // quickly eliminate the markers.
600b57cec5SDimitry Andric
61bdd1243dSDimitry Andric using ValueAndIsOffset = PointerIntPair<Value *, 1, bool>;
62bdd1243dSDimitry Andric SmallVector<ValueAndIsOffset, 32> Worklist;
63bdd1243dSDimitry Andric SmallPtrSet<ValueAndIsOffset, 32> Visited;
64bdd1243dSDimitry Andric Worklist.emplace_back(V, false);
65bdd1243dSDimitry Andric while (!Worklist.empty()) {
66bdd1243dSDimitry Andric ValueAndIsOffset Elem = Worklist.pop_back_val();
67bdd1243dSDimitry Andric if (!Visited.insert(Elem).second)
68bdd1243dSDimitry Andric continue;
69bdd1243dSDimitry Andric if (Visited.size() > MaxCopiedFromConstantUsers)
70bdd1243dSDimitry Andric return false;
71bdd1243dSDimitry Andric
72bdd1243dSDimitry Andric const auto [Value, IsOffset] = Elem;
73bdd1243dSDimitry Andric for (auto &U : Value->uses()) {
740b57cec5SDimitry Andric auto *I = cast<Instruction>(U.getUser());
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) {
770b57cec5SDimitry Andric // Ignore non-volatile loads, they are always ok.
780b57cec5SDimitry Andric if (!LI->isSimple()) return false;
790b57cec5SDimitry Andric continue;
800b57cec5SDimitry Andric }
810b57cec5SDimitry Andric
82bdd1243dSDimitry Andric if (isa<PHINode, SelectInst>(I)) {
83bdd1243dSDimitry Andric // We set IsOffset=true, to forbid the memcpy from occurring after the
84bdd1243dSDimitry Andric // phi: If one of the phi operands is not based on the alloca, we
85bdd1243dSDimitry Andric // would incorrectly omit a write.
86bdd1243dSDimitry Andric Worklist.emplace_back(I, true);
87bdd1243dSDimitry Andric continue;
88bdd1243dSDimitry Andric }
89bdd1243dSDimitry Andric if (isa<BitCastInst, AddrSpaceCastInst>(I)) {
900b57cec5SDimitry Andric // If uses of the bitcast are ok, we are ok.
91bdd1243dSDimitry Andric Worklist.emplace_back(I, IsOffset);
920b57cec5SDimitry Andric continue;
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
950b57cec5SDimitry Andric // If the GEP has all zero indices, it doesn't offset the pointer. If it
960b57cec5SDimitry Andric // doesn't, it does.
97bdd1243dSDimitry Andric Worklist.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
980b57cec5SDimitry Andric continue;
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric
1010b57cec5SDimitry Andric if (auto *Call = dyn_cast<CallBase>(I)) {
1020b57cec5SDimitry Andric // If this is the function being called then we treat it like a load and
1030b57cec5SDimitry Andric // ignore it.
1040b57cec5SDimitry Andric if (Call->isCallee(&U))
1050b57cec5SDimitry Andric continue;
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andric unsigned DataOpNo = Call->getDataOperandNo(&U);
1080b57cec5SDimitry Andric bool IsArgOperand = Call->isArgOperand(&U);
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric // Inalloca arguments are clobbered by the call.
1110b57cec5SDimitry Andric if (IsArgOperand && Call->isInAllocaArgument(DataOpNo))
1120b57cec5SDimitry Andric return false;
1130b57cec5SDimitry Andric
114bdd1243dSDimitry Andric // If this call site doesn't modify the memory, then we know it is just
115bdd1243dSDimitry Andric // a load (but one that potentially returns the value itself), so we can
1160b57cec5SDimitry Andric // ignore it if we know that the value isn't captured.
117bdd1243dSDimitry Andric bool NoCapture = Call->doesNotCapture(DataOpNo);
118bdd1243dSDimitry Andric if ((Call->onlyReadsMemory() && (Call->use_empty() || NoCapture)) ||
119bdd1243dSDimitry Andric (Call->onlyReadsMemory(DataOpNo) && NoCapture))
1200b57cec5SDimitry Andric continue;
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric // If this is being passed as a byval argument, the caller is making a
1230b57cec5SDimitry Andric // copy, so it is only a read of the alloca.
1240b57cec5SDimitry Andric if (IsArgOperand && Call->isByValArgument(DataOpNo))
1250b57cec5SDimitry Andric continue;
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric
1280b57cec5SDimitry Andric // Lifetime intrinsics can be handled by the caller.
1290b57cec5SDimitry Andric if (I->isLifetimeStartOrEnd()) {
1300b57cec5SDimitry Andric assert(I->use_empty() && "Lifetime markers have no result to use!");
1310b57cec5SDimitry Andric ToDelete.push_back(I);
1320b57cec5SDimitry Andric continue;
1330b57cec5SDimitry Andric }
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric // If this is isn't our memcpy/memmove, reject it as something we can't
1360b57cec5SDimitry Andric // handle.
1370b57cec5SDimitry Andric MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
1380b57cec5SDimitry Andric if (!MI)
1390b57cec5SDimitry Andric return false;
1400b57cec5SDimitry Andric
141bdd1243dSDimitry Andric // If the transfer is volatile, reject it.
142bdd1243dSDimitry Andric if (MI->isVolatile())
143bdd1243dSDimitry Andric return false;
144bdd1243dSDimitry Andric
1450b57cec5SDimitry Andric // If the transfer is using the alloca as a source of the transfer, then
1460b57cec5SDimitry Andric // ignore it since it is a load (unless the transfer is volatile).
147bdd1243dSDimitry Andric if (U.getOperandNo() == 1)
1480b57cec5SDimitry Andric continue;
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric // If we already have seen a copy, reject the second one.
1510b57cec5SDimitry Andric if (TheCopy) return false;
1520b57cec5SDimitry Andric
1530b57cec5SDimitry Andric // If the pointer has been offset from the start of the alloca, we can't
1540b57cec5SDimitry Andric // safely handle this.
1550b57cec5SDimitry Andric if (IsOffset) return false;
1560b57cec5SDimitry Andric
1570b57cec5SDimitry Andric // If the memintrinsic isn't using the alloca as the dest, reject it.
1580b57cec5SDimitry Andric if (U.getOperandNo() != 0) return false;
1590b57cec5SDimitry Andric
160bdd1243dSDimitry Andric // If the source of the memcpy/move is not constant, reject it.
161bdd1243dSDimitry Andric if (isModSet(AA->getModRefInfoMask(MI->getSource())))
1620b57cec5SDimitry Andric return false;
1630b57cec5SDimitry Andric
1640b57cec5SDimitry Andric // Otherwise, the transform is safe. Remember the copy instruction.
1650b57cec5SDimitry Andric TheCopy = MI;
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric return true;
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric
171bdd1243dSDimitry Andric /// isOnlyCopiedFromConstantMemory - Return true if the specified alloca is only
172bdd1243dSDimitry Andric /// modified by a copy from a constant memory location. If we can prove this, we
173bdd1243dSDimitry Andric /// can replace any uses of the alloca with uses of the memory location
174bdd1243dSDimitry Andric /// directly.
1750b57cec5SDimitry Andric static MemTransferInst *
isOnlyCopiedFromConstantMemory(AAResults * AA,AllocaInst * AI,SmallVectorImpl<Instruction * > & ToDelete)1765ffd83dbSDimitry Andric isOnlyCopiedFromConstantMemory(AAResults *AA,
1775ffd83dbSDimitry Andric AllocaInst *AI,
1780b57cec5SDimitry Andric SmallVectorImpl<Instruction *> &ToDelete) {
1790b57cec5SDimitry Andric MemTransferInst *TheCopy = nullptr;
1805ffd83dbSDimitry Andric if (isOnlyCopiedFromConstantMemory(AA, AI, TheCopy, ToDelete))
1810b57cec5SDimitry Andric return TheCopy;
1820b57cec5SDimitry Andric return nullptr;
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
1850b57cec5SDimitry Andric /// Returns true if V is dereferenceable for size of alloca.
isDereferenceableForAllocaSize(const Value * V,const AllocaInst * AI,const DataLayout & DL)1860b57cec5SDimitry Andric static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
1870b57cec5SDimitry Andric const DataLayout &DL) {
1880b57cec5SDimitry Andric if (AI->isArrayAllocation())
1890b57cec5SDimitry Andric return false;
1900b57cec5SDimitry Andric uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
1910b57cec5SDimitry Andric if (!AllocaSize)
1920b57cec5SDimitry Andric return false;
1930eae32dcSDimitry Andric return isDereferenceableAndAlignedPointer(V, AI->getAlign(),
1940b57cec5SDimitry Andric APInt(64, AllocaSize), DL);
1950b57cec5SDimitry Andric }
1960b57cec5SDimitry Andric
simplifyAllocaArraySize(InstCombinerImpl & IC,AllocaInst & AI,DominatorTree & DT)197e8d8bef9SDimitry Andric static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
198bdd1243dSDimitry Andric AllocaInst &AI, DominatorTree &DT) {
1990b57cec5SDimitry Andric // Check for array size of 1 (scalar allocation).
2000b57cec5SDimitry Andric if (!AI.isArrayAllocation()) {
2010b57cec5SDimitry Andric // i32 1 is the canonical array size for scalar allocations.
2020b57cec5SDimitry Andric if (AI.getArraySize()->getType()->isIntegerTy(32))
2030b57cec5SDimitry Andric return nullptr;
2040b57cec5SDimitry Andric
2050b57cec5SDimitry Andric // Canonicalize it.
2065ffd83dbSDimitry Andric return IC.replaceOperand(AI, 0, IC.Builder.getInt32(1));
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric
2090b57cec5SDimitry Andric // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
2100b57cec5SDimitry Andric if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
2110b57cec5SDimitry Andric if (C->getValue().getActiveBits() <= 64) {
2120b57cec5SDimitry Andric Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
2130eae32dcSDimitry Andric AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
2140eae32dcSDimitry Andric nullptr, AI.getName());
2155ffd83dbSDimitry Andric New->setAlignment(AI.getAlign());
2165f757f3fSDimitry Andric New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
2170b57cec5SDimitry Andric
218bdd1243dSDimitry Andric replaceAllDbgUsesWith(AI, *New, *New, DT);
2195f757f3fSDimitry Andric return IC.replaceInstUsesWith(AI, New);
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric }
2220b57cec5SDimitry Andric
2230b57cec5SDimitry Andric if (isa<UndefValue>(AI.getArraySize()))
2240b57cec5SDimitry Andric return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
2250b57cec5SDimitry Andric
22606c3fb27SDimitry Andric // Ensure that the alloca array size argument has type equal to the offset
22706c3fb27SDimitry Andric // size of the alloca() pointer, which, in the tyical case, is intptr_t,
22806c3fb27SDimitry Andric // so that any casting is exposed early.
22906c3fb27SDimitry Andric Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
23006c3fb27SDimitry Andric if (AI.getArraySize()->getType() != PtrIdxTy) {
23106c3fb27SDimitry Andric Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
2325ffd83dbSDimitry Andric return IC.replaceOperand(AI, 0, V);
2330b57cec5SDimitry Andric }
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric return nullptr;
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric namespace {
2390b57cec5SDimitry Andric // If I and V are pointers in different address space, it is not allowed to
2400b57cec5SDimitry Andric // use replaceAllUsesWith since I and V have different types. A
2410b57cec5SDimitry Andric // non-target-specific transformation should not use addrspacecast on V since
2420b57cec5SDimitry Andric // the two address space may be disjoint depending on target.
2430b57cec5SDimitry Andric //
2440b57cec5SDimitry Andric // This class chases down uses of the old pointer until reaching the load
2450b57cec5SDimitry Andric // instructions, then replaces the old pointer in the load instructions with
2460b57cec5SDimitry Andric // the new pointer. If during the chasing it sees bitcast or GEP, it will
2470b57cec5SDimitry Andric // create new bitcast or GEP with the new pointer and use them in the load
2480b57cec5SDimitry Andric // instruction.
2490b57cec5SDimitry Andric class PointerReplacer {
2500b57cec5SDimitry Andric public:
PointerReplacer(InstCombinerImpl & IC,Instruction & Root,unsigned SrcAS)25106c3fb27SDimitry Andric PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
25206c3fb27SDimitry Andric : IC(IC), Root(Root), FromAS(SrcAS) {}
253e8d8bef9SDimitry Andric
254bdd1243dSDimitry Andric bool collectUsers();
255bdd1243dSDimitry Andric void replacePointer(Value *V);
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric private:
258bdd1243dSDimitry Andric bool collectUsersRecursive(Instruction &I);
2590b57cec5SDimitry Andric void replace(Instruction *I);
2600b57cec5SDimitry Andric Value *getReplacement(Value *I);
isAvailable(Instruction * I) const261bdd1243dSDimitry Andric bool isAvailable(Instruction *I) const {
262bdd1243dSDimitry Andric return I == &Root || Worklist.contains(I);
263bdd1243dSDimitry Andric }
2640b57cec5SDimitry Andric
isEqualOrValidAddrSpaceCast(const Instruction * I,unsigned FromAS) const26506c3fb27SDimitry Andric bool isEqualOrValidAddrSpaceCast(const Instruction *I,
26606c3fb27SDimitry Andric unsigned FromAS) const {
26706c3fb27SDimitry Andric const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
26806c3fb27SDimitry Andric if (!ASC)
26906c3fb27SDimitry Andric return false;
27006c3fb27SDimitry Andric unsigned ToAS = ASC->getDestAddressSpace();
27106c3fb27SDimitry Andric return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
27206c3fb27SDimitry Andric }
27306c3fb27SDimitry Andric
274bdd1243dSDimitry Andric SmallPtrSet<Instruction *, 32> ValuesToRevisit;
275e8d8bef9SDimitry Andric SmallSetVector<Instruction *, 4> Worklist;
2760b57cec5SDimitry Andric MapVector<Value *, Value *> WorkMap;
277e8d8bef9SDimitry Andric InstCombinerImpl &IC;
278bdd1243dSDimitry Andric Instruction &Root;
27906c3fb27SDimitry Andric unsigned FromAS;
2800b57cec5SDimitry Andric };
2810b57cec5SDimitry Andric } // end anonymous namespace
2820b57cec5SDimitry Andric
collectUsers()283bdd1243dSDimitry Andric bool PointerReplacer::collectUsers() {
284bdd1243dSDimitry Andric if (!collectUsersRecursive(Root))
285bdd1243dSDimitry Andric return false;
286bdd1243dSDimitry Andric
287bdd1243dSDimitry Andric // Ensure that all outstanding (indirect) users of I
288bdd1243dSDimitry Andric // are inserted into the Worklist. Return false
289bdd1243dSDimitry Andric // otherwise.
290bdd1243dSDimitry Andric for (auto *Inst : ValuesToRevisit)
291bdd1243dSDimitry Andric if (!Worklist.contains(Inst))
292bdd1243dSDimitry Andric return false;
293bdd1243dSDimitry Andric return true;
294bdd1243dSDimitry Andric }
295bdd1243dSDimitry Andric
collectUsersRecursive(Instruction & I)296bdd1243dSDimitry Andric bool PointerReplacer::collectUsersRecursive(Instruction &I) {
297bdd1243dSDimitry Andric for (auto *U : I.users()) {
2986e75b2fbSDimitry Andric auto *Inst = cast<Instruction>(&*U);
2996e75b2fbSDimitry Andric if (auto *Load = dyn_cast<LoadInst>(Inst)) {
300e8d8bef9SDimitry Andric if (Load->isVolatile())
301e8d8bef9SDimitry Andric return false;
302e8d8bef9SDimitry Andric Worklist.insert(Load);
303bdd1243dSDimitry Andric } else if (auto *PHI = dyn_cast<PHINode>(Inst)) {
304bdd1243dSDimitry Andric // All incoming values must be instructions for replacability
305bdd1243dSDimitry Andric if (any_of(PHI->incoming_values(),
306bdd1243dSDimitry Andric [](Value *V) { return !isa<Instruction>(V); }))
307bdd1243dSDimitry Andric return false;
308bdd1243dSDimitry Andric
309bdd1243dSDimitry Andric // If at least one incoming value of the PHI is not in Worklist,
310bdd1243dSDimitry Andric // store the PHI for revisiting and skip this iteration of the
311bdd1243dSDimitry Andric // loop.
312bdd1243dSDimitry Andric if (any_of(PHI->incoming_values(), [this](Value *V) {
313bdd1243dSDimitry Andric return !isAvailable(cast<Instruction>(V));
314bdd1243dSDimitry Andric })) {
315bdd1243dSDimitry Andric ValuesToRevisit.insert(Inst);
316bdd1243dSDimitry Andric continue;
317bdd1243dSDimitry Andric }
318bdd1243dSDimitry Andric
319bdd1243dSDimitry Andric Worklist.insert(PHI);
320bdd1243dSDimitry Andric if (!collectUsersRecursive(*PHI))
321bdd1243dSDimitry Andric return false;
322bdd1243dSDimitry Andric } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
323bdd1243dSDimitry Andric if (!isa<Instruction>(SI->getTrueValue()) ||
324bdd1243dSDimitry Andric !isa<Instruction>(SI->getFalseValue()))
325bdd1243dSDimitry Andric return false;
326bdd1243dSDimitry Andric
327bdd1243dSDimitry Andric if (!isAvailable(cast<Instruction>(SI->getTrueValue())) ||
328bdd1243dSDimitry Andric !isAvailable(cast<Instruction>(SI->getFalseValue()))) {
329bdd1243dSDimitry Andric ValuesToRevisit.insert(Inst);
330bdd1243dSDimitry Andric continue;
331bdd1243dSDimitry Andric }
332bdd1243dSDimitry Andric Worklist.insert(SI);
333bdd1243dSDimitry Andric if (!collectUsersRecursive(*SI))
334bdd1243dSDimitry Andric return false;
335*0fca6ea1SDimitry Andric } else if (isa<GetElementPtrInst>(Inst)) {
336e8d8bef9SDimitry Andric Worklist.insert(Inst);
337bdd1243dSDimitry Andric if (!collectUsersRecursive(*Inst))
338e8d8bef9SDimitry Andric return false;
3396e75b2fbSDimitry Andric } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
3406e75b2fbSDimitry Andric if (MI->isVolatile())
3416e75b2fbSDimitry Andric return false;
342e8d8bef9SDimitry Andric Worklist.insert(Inst);
34306c3fb27SDimitry Andric } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
34406c3fb27SDimitry Andric Worklist.insert(Inst);
345*0fca6ea1SDimitry Andric if (!collectUsersRecursive(*Inst))
346*0fca6ea1SDimitry Andric return false;
347fe6060f1SDimitry Andric } else if (Inst->isLifetimeStartOrEnd()) {
348fe6060f1SDimitry Andric continue;
3490b57cec5SDimitry Andric } else {
350*0fca6ea1SDimitry Andric // TODO: For arbitrary uses with address space mismatches, should we check
351*0fca6ea1SDimitry Andric // if we can introduce a valid addrspacecast?
352e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle pointer user: " << *U << '\n');
353e8d8bef9SDimitry Andric return false;
3540b57cec5SDimitry Andric }
3550b57cec5SDimitry Andric }
3560b57cec5SDimitry Andric
357e8d8bef9SDimitry Andric return true;
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
getReplacement(Value * V)360e8d8bef9SDimitry Andric Value *PointerReplacer::getReplacement(Value *V) { return WorkMap.lookup(V); }
361e8d8bef9SDimitry Andric
replace(Instruction * I)3620b57cec5SDimitry Andric void PointerReplacer::replace(Instruction *I) {
3630b57cec5SDimitry Andric if (getReplacement(I))
3640b57cec5SDimitry Andric return;
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric if (auto *LT = dyn_cast<LoadInst>(I)) {
3670b57cec5SDimitry Andric auto *V = getReplacement(LT->getPointerOperand());
3680b57cec5SDimitry Andric assert(V && "Operand not replaced");
369e8d8bef9SDimitry Andric auto *NewI = new LoadInst(LT->getType(), V, "", LT->isVolatile(),
370e8d8bef9SDimitry Andric LT->getAlign(), LT->getOrdering(),
371e8d8bef9SDimitry Andric LT->getSyncScopeID());
3720b57cec5SDimitry Andric NewI->takeName(LT);
373e8d8bef9SDimitry Andric copyMetadataForLoad(*NewI, *LT);
374e8d8bef9SDimitry Andric
3755f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, LT->getIterator());
3760b57cec5SDimitry Andric IC.replaceInstUsesWith(*LT, NewI);
3770b57cec5SDimitry Andric WorkMap[LT] = NewI;
378bdd1243dSDimitry Andric } else if (auto *PHI = dyn_cast<PHINode>(I)) {
379bdd1243dSDimitry Andric Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType();
380bdd1243dSDimitry Andric auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(),
381*0fca6ea1SDimitry Andric PHI->getName(), PHI->getIterator());
382bdd1243dSDimitry Andric for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I)
383bdd1243dSDimitry Andric NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)),
384bdd1243dSDimitry Andric PHI->getIncomingBlock(I));
385bdd1243dSDimitry Andric WorkMap[PHI] = NewPHI;
3860b57cec5SDimitry Andric } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
3870b57cec5SDimitry Andric auto *V = getReplacement(GEP->getPointerOperand());
3880b57cec5SDimitry Andric assert(V && "Operand not replaced");
389*0fca6ea1SDimitry Andric SmallVector<Value *, 8> Indices(GEP->indices());
39004eeddc0SDimitry Andric auto *NewI =
39104eeddc0SDimitry Andric GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
3925f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, GEP->getIterator());
3930b57cec5SDimitry Andric NewI->takeName(GEP);
394*0fca6ea1SDimitry Andric NewI->setNoWrapFlags(GEP->getNoWrapFlags());
3950b57cec5SDimitry Andric WorkMap[GEP] = NewI;
396bdd1243dSDimitry Andric } else if (auto *SI = dyn_cast<SelectInst>(I)) {
397*0fca6ea1SDimitry Andric Value *TrueValue = SI->getTrueValue();
398*0fca6ea1SDimitry Andric Value *FalseValue = SI->getFalseValue();
399*0fca6ea1SDimitry Andric if (Value *Replacement = getReplacement(TrueValue))
400*0fca6ea1SDimitry Andric TrueValue = Replacement;
401*0fca6ea1SDimitry Andric if (Value *Replacement = getReplacement(FalseValue))
402*0fca6ea1SDimitry Andric FalseValue = Replacement;
403*0fca6ea1SDimitry Andric auto *NewSI = SelectInst::Create(SI->getCondition(), TrueValue, FalseValue,
404*0fca6ea1SDimitry Andric SI->getName(), nullptr, SI);
4055f757f3fSDimitry Andric IC.InsertNewInstWith(NewSI, SI->getIterator());
406bdd1243dSDimitry Andric NewSI->takeName(SI);
407bdd1243dSDimitry Andric WorkMap[SI] = NewSI;
408e8d8bef9SDimitry Andric } else if (auto *MemCpy = dyn_cast<MemTransferInst>(I)) {
409*0fca6ea1SDimitry Andric auto *DestV = MemCpy->getRawDest();
410*0fca6ea1SDimitry Andric auto *SrcV = MemCpy->getRawSource();
411*0fca6ea1SDimitry Andric
412*0fca6ea1SDimitry Andric if (auto *DestReplace = getReplacement(DestV))
413*0fca6ea1SDimitry Andric DestV = DestReplace;
414*0fca6ea1SDimitry Andric if (auto *SrcReplace = getReplacement(SrcV))
415*0fca6ea1SDimitry Andric SrcV = SrcReplace;
416e8d8bef9SDimitry Andric
417e8d8bef9SDimitry Andric IC.Builder.SetInsertPoint(MemCpy);
418e8d8bef9SDimitry Andric auto *NewI = IC.Builder.CreateMemTransferInst(
419*0fca6ea1SDimitry Andric MemCpy->getIntrinsicID(), DestV, MemCpy->getDestAlign(), SrcV,
420*0fca6ea1SDimitry Andric MemCpy->getSourceAlign(), MemCpy->getLength(), MemCpy->isVolatile());
421349cc55cSDimitry Andric AAMDNodes AAMD = MemCpy->getAAMetadata();
422e8d8bef9SDimitry Andric if (AAMD)
423e8d8bef9SDimitry Andric NewI->setAAMetadata(AAMD);
424e8d8bef9SDimitry Andric
425e8d8bef9SDimitry Andric IC.eraseInstFromFunction(*MemCpy);
426e8d8bef9SDimitry Andric WorkMap[MemCpy] = NewI;
42706c3fb27SDimitry Andric } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
42806c3fb27SDimitry Andric auto *V = getReplacement(ASC->getPointerOperand());
42906c3fb27SDimitry Andric assert(V && "Operand not replaced");
43006c3fb27SDimitry Andric assert(isEqualOrValidAddrSpaceCast(
43106c3fb27SDimitry Andric ASC, V->getType()->getPointerAddressSpace()) &&
43206c3fb27SDimitry Andric "Invalid address space cast!");
433*0fca6ea1SDimitry Andric
43406c3fb27SDimitry Andric if (V->getType()->getPointerAddressSpace() !=
43506c3fb27SDimitry Andric ASC->getType()->getPointerAddressSpace()) {
43606c3fb27SDimitry Andric auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
43706c3fb27SDimitry Andric NewI->takeName(ASC);
4385f757f3fSDimitry Andric IC.InsertNewInstWith(NewI, ASC->getIterator());
439*0fca6ea1SDimitry Andric WorkMap[ASC] = NewI;
440*0fca6ea1SDimitry Andric } else {
441*0fca6ea1SDimitry Andric WorkMap[ASC] = V;
44206c3fb27SDimitry Andric }
443*0fca6ea1SDimitry Andric
4440b57cec5SDimitry Andric } else {
4450b57cec5SDimitry Andric llvm_unreachable("should never reach here");
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric }
4480b57cec5SDimitry Andric
replacePointer(Value * V)449bdd1243dSDimitry Andric void PointerReplacer::replacePointer(Value *V) {
4500b57cec5SDimitry Andric #ifndef NDEBUG
451bdd1243dSDimitry Andric auto *PT = cast<PointerType>(Root.getType());
4520b57cec5SDimitry Andric auto *NT = cast<PointerType>(V->getType());
45306c3fb27SDimitry Andric assert(PT != NT && "Invalid usage");
4540b57cec5SDimitry Andric #endif
455bdd1243dSDimitry Andric WorkMap[&Root] = V;
456e8d8bef9SDimitry Andric
457e8d8bef9SDimitry Andric for (Instruction *Workitem : Worklist)
458e8d8bef9SDimitry Andric replace(Workitem);
4590b57cec5SDimitry Andric }
4600b57cec5SDimitry Andric
visitAllocaInst(AllocaInst & AI)461e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
462bdd1243dSDimitry Andric if (auto *I = simplifyAllocaArraySize(*this, AI, DT))
4630b57cec5SDimitry Andric return I;
4640b57cec5SDimitry Andric
4650b57cec5SDimitry Andric if (AI.getAllocatedType()->isSized()) {
4660b57cec5SDimitry Andric // Move all alloca's of zero byte objects to the entry block and merge them
4670b57cec5SDimitry Andric // together. Note that we only do this for alloca's, because malloc should
4680b57cec5SDimitry Andric // allocate and return a unique pointer, even for a zero byte allocation.
469bdd1243dSDimitry Andric if (DL.getTypeAllocSize(AI.getAllocatedType()).getKnownMinValue() == 0) {
4700b57cec5SDimitry Andric // For a zero sized alloca there is no point in doing an array allocation.
4710b57cec5SDimitry Andric // This is helpful if the array size is a complicated expression not used
4720b57cec5SDimitry Andric // elsewhere.
4735ffd83dbSDimitry Andric if (AI.isArrayAllocation())
4745ffd83dbSDimitry Andric return replaceOperand(AI, 0,
4755ffd83dbSDimitry Andric ConstantInt::get(AI.getArraySize()->getType(), 1));
4760b57cec5SDimitry Andric
4770b57cec5SDimitry Andric // Get the first instruction in the entry block.
4780b57cec5SDimitry Andric BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
4790b57cec5SDimitry Andric Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
4800b57cec5SDimitry Andric if (FirstInst != &AI) {
4810b57cec5SDimitry Andric // If the entry block doesn't start with a zero-size alloca then move
4820b57cec5SDimitry Andric // this one to the start of the entry block. There is no problem with
4830b57cec5SDimitry Andric // dominance as the array size was forced to a constant earlier already.
4840b57cec5SDimitry Andric AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
4850b57cec5SDimitry Andric if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
4865ffd83dbSDimitry Andric DL.getTypeAllocSize(EntryAI->getAllocatedType())
487bdd1243dSDimitry Andric .getKnownMinValue() != 0) {
4880b57cec5SDimitry Andric AI.moveBefore(FirstInst);
4890b57cec5SDimitry Andric return &AI;
4900b57cec5SDimitry Andric }
4910b57cec5SDimitry Andric
4920b57cec5SDimitry Andric // Replace this zero-sized alloca with the one at the start of the entry
4930b57cec5SDimitry Andric // block after ensuring that the address will be aligned enough for both
4940b57cec5SDimitry Andric // types.
4955ffd83dbSDimitry Andric const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
4960b57cec5SDimitry Andric EntryAI->setAlignment(MaxAlign);
4970b57cec5SDimitry Andric return replaceInstUsesWith(AI, EntryAI);
4980b57cec5SDimitry Andric }
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric }
5010b57cec5SDimitry Andric
5020b57cec5SDimitry Andric // Check to see if this allocation is only modified by a memcpy/memmove from
503bdd1243dSDimitry Andric // a memory location whose alignment is equal to or exceeds that of the
504bdd1243dSDimitry Andric // allocation. If this is the case, we can change all users to use the
505bdd1243dSDimitry Andric // constant memory location instead. This is commonly produced by the CFE by
506bdd1243dSDimitry Andric // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
507bdd1243dSDimitry Andric // is only subsequently read.
5080b57cec5SDimitry Andric SmallVector<Instruction *, 4> ToDelete;
5095ffd83dbSDimitry Andric if (MemTransferInst *Copy = isOnlyCopiedFromConstantMemory(AA, &AI, ToDelete)) {
510e8d8bef9SDimitry Andric Value *TheSrc = Copy->getSource();
5115ffd83dbSDimitry Andric Align AllocaAlign = AI.getAlign();
5125ffd83dbSDimitry Andric Align SourceAlign = getOrEnforceKnownAlignment(
513e8d8bef9SDimitry Andric TheSrc, AllocaAlign, DL, &AI, &AC, &DT);
5145ffd83dbSDimitry Andric if (AllocaAlign <= SourceAlign &&
515fe6060f1SDimitry Andric isDereferenceableForAllocaSize(TheSrc, &AI, DL) &&
516fe6060f1SDimitry Andric !isa<Instruction>(TheSrc)) {
517fe6060f1SDimitry Andric // FIXME: Can we sink instructions without violating dominance when TheSrc
518fe6060f1SDimitry Andric // is an instruction instead of a constant or argument?
5190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
5200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
521e8d8bef9SDimitry Andric unsigned SrcAddrSpace = TheSrc->getType()->getPointerAddressSpace();
522bdd1243dSDimitry Andric if (AI.getAddressSpace() == SrcAddrSpace) {
523e8d8bef9SDimitry Andric for (Instruction *Delete : ToDelete)
524e8d8bef9SDimitry Andric eraseInstFromFunction(*Delete);
525e8d8bef9SDimitry Andric
5265f757f3fSDimitry Andric Instruction *NewI = replaceInstUsesWith(AI, TheSrc);
5270b57cec5SDimitry Andric eraseInstFromFunction(*Copy);
5280b57cec5SDimitry Andric ++NumGlobalCopies;
5290b57cec5SDimitry Andric return NewI;
5305ffd83dbSDimitry Andric }
5315ffd83dbSDimitry Andric
53206c3fb27SDimitry Andric PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
533bdd1243dSDimitry Andric if (PtrReplacer.collectUsers()) {
534e8d8bef9SDimitry Andric for (Instruction *Delete : ToDelete)
535e8d8bef9SDimitry Andric eraseInstFromFunction(*Delete);
536e8d8bef9SDimitry Andric
5375f757f3fSDimitry Andric PtrReplacer.replacePointer(TheSrc);
5380b57cec5SDimitry Andric ++NumGlobalCopies;
5390b57cec5SDimitry Andric }
5400b57cec5SDimitry Andric }
541e8d8bef9SDimitry Andric }
5420b57cec5SDimitry Andric
5430b57cec5SDimitry Andric // At last, use the generic allocation site handler to aggressively remove
5440b57cec5SDimitry Andric // unused allocas.
5450b57cec5SDimitry Andric return visitAllocSite(AI);
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric // Are we allowed to form a atomic load or store of this type?
isSupportedAtomicType(Type * Ty)5490b57cec5SDimitry Andric static bool isSupportedAtomicType(Type *Ty) {
5500b57cec5SDimitry Andric return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
5510b57cec5SDimitry Andric }
5520b57cec5SDimitry Andric
5530b57cec5SDimitry Andric /// Helper to combine a load to a new type.
5540b57cec5SDimitry Andric ///
5550b57cec5SDimitry Andric /// This just does the work of combining a load to a new type. It handles
5560b57cec5SDimitry Andric /// metadata, etc., and returns the new instruction. The \c NewTy should be the
5570b57cec5SDimitry Andric /// loaded *value* type. This will convert it to a pointer, cast the operand to
5580b57cec5SDimitry Andric /// that pointer type, load it, etc.
5590b57cec5SDimitry Andric ///
5600b57cec5SDimitry Andric /// Note that this will create all of the instructions with whatever insert
561e8d8bef9SDimitry Andric /// point the \c InstCombinerImpl currently is using.
combineLoadToNewType(LoadInst & LI,Type * NewTy,const Twine & Suffix)562e8d8bef9SDimitry Andric LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
563480093f4SDimitry Andric const Twine &Suffix) {
5640b57cec5SDimitry Andric assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
5650b57cec5SDimitry Andric "can't fold an atomic load to requested type");
5660b57cec5SDimitry Andric
5675f757f3fSDimitry Andric LoadInst *NewLoad =
5685f757f3fSDimitry Andric Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
5695f757f3fSDimitry Andric LI.isVolatile(), LI.getName() + Suffix);
5700b57cec5SDimitry Andric NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
5718bcb0991SDimitry Andric copyMetadataForLoad(*NewLoad, LI);
5720b57cec5SDimitry Andric return NewLoad;
5730b57cec5SDimitry Andric }
5740b57cec5SDimitry Andric
5750b57cec5SDimitry Andric /// Combine a store to a new type.
5760b57cec5SDimitry Andric ///
5770b57cec5SDimitry Andric /// Returns the newly created store instruction.
combineStoreToNewValue(InstCombinerImpl & IC,StoreInst & SI,Value * V)578e8d8bef9SDimitry Andric static StoreInst *combineStoreToNewValue(InstCombinerImpl &IC, StoreInst &SI,
579e8d8bef9SDimitry Andric Value *V) {
5800b57cec5SDimitry Andric assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
5810b57cec5SDimitry Andric "can't fold an atomic store of requested type");
5820b57cec5SDimitry Andric
5830b57cec5SDimitry Andric Value *Ptr = SI.getPointerOperand();
5840b57cec5SDimitry Andric SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
5850b57cec5SDimitry Andric SI.getAllMetadata(MD);
5860b57cec5SDimitry Andric
5875f757f3fSDimitry Andric StoreInst *NewStore =
5885f757f3fSDimitry Andric IC.Builder.CreateAlignedStore(V, Ptr, SI.getAlign(), SI.isVolatile());
5890b57cec5SDimitry Andric NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
5900b57cec5SDimitry Andric for (const auto &MDPair : MD) {
5910b57cec5SDimitry Andric unsigned ID = MDPair.first;
5920b57cec5SDimitry Andric MDNode *N = MDPair.second;
5930b57cec5SDimitry Andric // Note, essentially every kind of metadata should be preserved here! This
5940b57cec5SDimitry Andric // routine is supposed to clone a store instruction changing *only its
5950b57cec5SDimitry Andric // type*. The only metadata it makes sense to drop is metadata which is
5960b57cec5SDimitry Andric // invalidated when the pointer type changes. This should essentially
5970b57cec5SDimitry Andric // never be the case in LLVM, but we explicitly switch over only known
5980b57cec5SDimitry Andric // metadata to be conservatively correct. If you are adding metadata to
5990b57cec5SDimitry Andric // LLVM which pertains to stores, you almost certainly want to add it
6000b57cec5SDimitry Andric // here.
6010b57cec5SDimitry Andric switch (ID) {
6020b57cec5SDimitry Andric case LLVMContext::MD_dbg:
603bdd1243dSDimitry Andric case LLVMContext::MD_DIAssignID:
6040b57cec5SDimitry Andric case LLVMContext::MD_tbaa:
6050b57cec5SDimitry Andric case LLVMContext::MD_prof:
6060b57cec5SDimitry Andric case LLVMContext::MD_fpmath:
6070b57cec5SDimitry Andric case LLVMContext::MD_tbaa_struct:
6080b57cec5SDimitry Andric case LLVMContext::MD_alias_scope:
6090b57cec5SDimitry Andric case LLVMContext::MD_noalias:
6100b57cec5SDimitry Andric case LLVMContext::MD_nontemporal:
6110b57cec5SDimitry Andric case LLVMContext::MD_mem_parallel_loop_access:
6120b57cec5SDimitry Andric case LLVMContext::MD_access_group:
6130b57cec5SDimitry Andric // All of these directly apply.
6140b57cec5SDimitry Andric NewStore->setMetadata(ID, N);
6150b57cec5SDimitry Andric break;
6160b57cec5SDimitry Andric case LLVMContext::MD_invariant_load:
6170b57cec5SDimitry Andric case LLVMContext::MD_nonnull:
618e8d8bef9SDimitry Andric case LLVMContext::MD_noundef:
6190b57cec5SDimitry Andric case LLVMContext::MD_range:
6200b57cec5SDimitry Andric case LLVMContext::MD_align:
6210b57cec5SDimitry Andric case LLVMContext::MD_dereferenceable:
6220b57cec5SDimitry Andric case LLVMContext::MD_dereferenceable_or_null:
6230b57cec5SDimitry Andric // These don't apply for stores.
6240b57cec5SDimitry Andric break;
6250b57cec5SDimitry Andric }
6260b57cec5SDimitry Andric }
6270b57cec5SDimitry Andric
6280b57cec5SDimitry Andric return NewStore;
6290b57cec5SDimitry Andric }
6300b57cec5SDimitry Andric
6310b57cec5SDimitry Andric /// Combine loads to match the type of their uses' value after looking
6320b57cec5SDimitry Andric /// through intervening bitcasts.
6330b57cec5SDimitry Andric ///
6340b57cec5SDimitry Andric /// The core idea here is that if the result of a load is used in an operation,
6350b57cec5SDimitry Andric /// we should load the type most conducive to that operation. For example, when
6360b57cec5SDimitry Andric /// loading an integer and converting that immediately to a pointer, we should
6370b57cec5SDimitry Andric /// instead directly load a pointer.
6380b57cec5SDimitry Andric ///
6390b57cec5SDimitry Andric /// However, this routine must never change the width of a load or the number of
6400b57cec5SDimitry Andric /// loads as that would introduce a semantic change. This combine is expected to
6410b57cec5SDimitry Andric /// be a semantic no-op which just allows loads to more closely model the types
6420b57cec5SDimitry Andric /// of their consuming operations.
6430b57cec5SDimitry Andric ///
6440b57cec5SDimitry Andric /// Currently, we also refuse to change the precise type used for an atomic load
6450b57cec5SDimitry Andric /// or a volatile load. This is debatable, and might be reasonable to change
6460b57cec5SDimitry Andric /// later. However, it is risky in case some backend or other part of LLVM is
6470b57cec5SDimitry Andric /// relying on the exact type loaded to select appropriate atomic operations.
combineLoadToOperationType(InstCombinerImpl & IC,LoadInst & Load)648e8d8bef9SDimitry Andric static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
649bdd1243dSDimitry Andric LoadInst &Load) {
6500b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and ordered
6510b57cec5SDimitry Andric // atomic loads here but it isn't clear that this is important.
652bdd1243dSDimitry Andric if (!Load.isUnordered())
6530b57cec5SDimitry Andric return nullptr;
6540b57cec5SDimitry Andric
655bdd1243dSDimitry Andric if (Load.use_empty())
6560b57cec5SDimitry Andric return nullptr;
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric // swifterror values can't be bitcasted.
659bdd1243dSDimitry Andric if (Load.getPointerOperand()->isSwiftError())
6600b57cec5SDimitry Andric return nullptr;
6610b57cec5SDimitry Andric
662e8d8bef9SDimitry Andric // Fold away bit casts of the loaded value by loading the desired type.
663e8d8bef9SDimitry Andric // Note that we should not do this for pointer<->integer casts,
664e8d8bef9SDimitry Andric // because that would result in type punning.
665bdd1243dSDimitry Andric if (Load.hasOneUse()) {
666e8d8bef9SDimitry Andric // Don't transform when the type is x86_amx, it makes the pass that lower
667e8d8bef9SDimitry Andric // x86_amx type happy.
668bdd1243dSDimitry Andric Type *LoadTy = Load.getType();
669bdd1243dSDimitry Andric if (auto *BC = dyn_cast<BitCastInst>(Load.user_back())) {
670bdd1243dSDimitry Andric assert(!LoadTy->isX86_AMXTy() && "Load from x86_amx* should not happen!");
671e8d8bef9SDimitry Andric if (BC->getType()->isX86_AMXTy())
672e8d8bef9SDimitry Andric return nullptr;
6730b57cec5SDimitry Andric }
6740b57cec5SDimitry Andric
675bdd1243dSDimitry Andric if (auto *CastUser = dyn_cast<CastInst>(Load.user_back())) {
676bdd1243dSDimitry Andric Type *DestTy = CastUser->getDestTy();
677bdd1243dSDimitry Andric if (CastUser->isNoopCast(IC.getDataLayout()) &&
678bdd1243dSDimitry Andric LoadTy->isPtrOrPtrVectorTy() == DestTy->isPtrOrPtrVectorTy() &&
679bdd1243dSDimitry Andric (!Load.isAtomic() || isSupportedAtomicType(DestTy))) {
680bdd1243dSDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(Load, DestTy);
681bdd1243dSDimitry Andric CastUser->replaceAllUsesWith(NewLoad);
682bdd1243dSDimitry Andric IC.eraseInstFromFunction(*CastUser);
683bdd1243dSDimitry Andric return &Load;
684bdd1243dSDimitry Andric }
6850b57cec5SDimitry Andric }
686e8d8bef9SDimitry Andric }
6870b57cec5SDimitry Andric
6880b57cec5SDimitry Andric // FIXME: We should also canonicalize loads of vectors when their elements are
6890b57cec5SDimitry Andric // cast to other types.
6900b57cec5SDimitry Andric return nullptr;
6910b57cec5SDimitry Andric }
6920b57cec5SDimitry Andric
unpackLoadToAggregate(InstCombinerImpl & IC,LoadInst & LI)693e8d8bef9SDimitry Andric static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
6940b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and atomic
6950b57cec5SDimitry Andric // stores here but it isn't clear that this is important.
6960b57cec5SDimitry Andric if (!LI.isSimple())
6970b57cec5SDimitry Andric return nullptr;
6980b57cec5SDimitry Andric
6990b57cec5SDimitry Andric Type *T = LI.getType();
7000b57cec5SDimitry Andric if (!T->isAggregateType())
7010b57cec5SDimitry Andric return nullptr;
7020b57cec5SDimitry Andric
7030b57cec5SDimitry Andric StringRef Name = LI.getName();
7040b57cec5SDimitry Andric
7050b57cec5SDimitry Andric if (auto *ST = dyn_cast<StructType>(T)) {
7060b57cec5SDimitry Andric // If the struct only have one element, we unpack.
7070b57cec5SDimitry Andric auto NumElements = ST->getNumElements();
7080b57cec5SDimitry Andric if (NumElements == 1) {
709480093f4SDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
7100b57cec5SDimitry Andric ".unpack");
711349cc55cSDimitry Andric NewLoad->setAAMetadata(LI.getAAMetadata());
7120b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
713bdd1243dSDimitry Andric PoisonValue::get(T), NewLoad, 0, Name));
7140b57cec5SDimitry Andric }
7150b57cec5SDimitry Andric
7160b57cec5SDimitry Andric // We don't want to break loads with padding here as we'd loose
7170b57cec5SDimitry Andric // the knowledge that padding exists for the rest of the pipeline.
7180b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout();
7190b57cec5SDimitry Andric auto *SL = DL.getStructLayout(ST);
72006c3fb27SDimitry Andric
72106c3fb27SDimitry Andric // Don't unpack for structure with scalable vector.
72206c3fb27SDimitry Andric if (SL->getSizeInBits().isScalable())
72306c3fb27SDimitry Andric return nullptr;
72406c3fb27SDimitry Andric
7250b57cec5SDimitry Andric if (SL->hasPadding())
7260b57cec5SDimitry Andric return nullptr;
7270b57cec5SDimitry Andric
7285ffd83dbSDimitry Andric const auto Align = LI.getAlign();
7290b57cec5SDimitry Andric auto *Addr = LI.getPointerOperand();
7300b57cec5SDimitry Andric auto *IdxType = Type::getInt32Ty(T->getContext());
7310b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0);
7320b57cec5SDimitry Andric
733bdd1243dSDimitry Andric Value *V = PoisonValue::get(T);
7340b57cec5SDimitry Andric for (unsigned i = 0; i < NumElements; i++) {
7350b57cec5SDimitry Andric Value *Indices[2] = {
7360b57cec5SDimitry Andric Zero,
7370b57cec5SDimitry Andric ConstantInt::get(IdxType, i),
7380b57cec5SDimitry Andric };
739bdd1243dSDimitry Andric auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices),
7400b57cec5SDimitry Andric Name + ".elt");
7415ffd83dbSDimitry Andric auto *L = IC.Builder.CreateAlignedLoad(
7425ffd83dbSDimitry Andric ST->getElementType(i), Ptr,
7435ffd83dbSDimitry Andric commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
7440b57cec5SDimitry Andric // Propagate AA metadata. It'll still be valid on the narrowed load.
745349cc55cSDimitry Andric L->setAAMetadata(LI.getAAMetadata());
7460b57cec5SDimitry Andric V = IC.Builder.CreateInsertValue(V, L, i);
7470b57cec5SDimitry Andric }
7480b57cec5SDimitry Andric
7490b57cec5SDimitry Andric V->setName(Name);
7500b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, V);
7510b57cec5SDimitry Andric }
7520b57cec5SDimitry Andric
7530b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(T)) {
7540b57cec5SDimitry Andric auto *ET = AT->getElementType();
7550b57cec5SDimitry Andric auto NumElements = AT->getNumElements();
7560b57cec5SDimitry Andric if (NumElements == 1) {
757480093f4SDimitry Andric LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
758349cc55cSDimitry Andric NewLoad->setAAMetadata(LI.getAAMetadata());
7590b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
760bdd1243dSDimitry Andric PoisonValue::get(T), NewLoad, 0, Name));
7610b57cec5SDimitry Andric }
7620b57cec5SDimitry Andric
7630b57cec5SDimitry Andric // Bail out if the array is too large. Ideally we would like to optimize
7640b57cec5SDimitry Andric // arrays of arbitrary size but this has a terrible impact on compile time.
7650b57cec5SDimitry Andric // The threshold here is chosen arbitrarily, maybe needs a little bit of
7660b57cec5SDimitry Andric // tuning.
7670b57cec5SDimitry Andric if (NumElements > IC.MaxArraySizeForCombine)
7680b57cec5SDimitry Andric return nullptr;
7690b57cec5SDimitry Andric
7700b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout();
7715f757f3fSDimitry Andric TypeSize EltSize = DL.getTypeAllocSize(ET);
7725ffd83dbSDimitry Andric const auto Align = LI.getAlign();
7730b57cec5SDimitry Andric
7740b57cec5SDimitry Andric auto *Addr = LI.getPointerOperand();
7750b57cec5SDimitry Andric auto *IdxType = Type::getInt64Ty(T->getContext());
7760b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0);
7770b57cec5SDimitry Andric
778bdd1243dSDimitry Andric Value *V = PoisonValue::get(T);
779*0fca6ea1SDimitry Andric TypeSize Offset = TypeSize::getZero();
7800b57cec5SDimitry Andric for (uint64_t i = 0; i < NumElements; i++) {
7810b57cec5SDimitry Andric Value *Indices[2] = {
7820b57cec5SDimitry Andric Zero,
7830b57cec5SDimitry Andric ConstantInt::get(IdxType, i),
7840b57cec5SDimitry Andric };
785bdd1243dSDimitry Andric auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
7860b57cec5SDimitry Andric Name + ".elt");
7875f757f3fSDimitry Andric auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
7885ffd83dbSDimitry Andric auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
7895f757f3fSDimitry Andric EltAlign, Name + ".unpack");
790349cc55cSDimitry Andric L->setAAMetadata(LI.getAAMetadata());
7910b57cec5SDimitry Andric V = IC.Builder.CreateInsertValue(V, L, i);
7920b57cec5SDimitry Andric Offset += EltSize;
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric
7950b57cec5SDimitry Andric V->setName(Name);
7960b57cec5SDimitry Andric return IC.replaceInstUsesWith(LI, V);
7970b57cec5SDimitry Andric }
7980b57cec5SDimitry Andric
7990b57cec5SDimitry Andric return nullptr;
8000b57cec5SDimitry Andric }
8010b57cec5SDimitry Andric
8020b57cec5SDimitry Andric // If we can determine that all possible objects pointed to by the provided
8030b57cec5SDimitry Andric // pointer value are, not only dereferenceable, but also definitively less than
8040b57cec5SDimitry Andric // or equal to the provided maximum size, then return true. Otherwise, return
8050b57cec5SDimitry Andric // false (constant global values and allocas fall into this category).
8060b57cec5SDimitry Andric //
8070b57cec5SDimitry Andric // FIXME: This should probably live in ValueTracking (or similar).
isObjectSizeLessThanOrEq(Value * V,uint64_t MaxSize,const DataLayout & DL)8080b57cec5SDimitry Andric static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
8090b57cec5SDimitry Andric const DataLayout &DL) {
8100b57cec5SDimitry Andric SmallPtrSet<Value *, 4> Visited;
8110b57cec5SDimitry Andric SmallVector<Value *, 4> Worklist(1, V);
8120b57cec5SDimitry Andric
8130b57cec5SDimitry Andric do {
8140b57cec5SDimitry Andric Value *P = Worklist.pop_back_val();
8150b57cec5SDimitry Andric P = P->stripPointerCasts();
8160b57cec5SDimitry Andric
8170b57cec5SDimitry Andric if (!Visited.insert(P).second)
8180b57cec5SDimitry Andric continue;
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
8210b57cec5SDimitry Andric Worklist.push_back(SI->getTrueValue());
8220b57cec5SDimitry Andric Worklist.push_back(SI->getFalseValue());
8230b57cec5SDimitry Andric continue;
8240b57cec5SDimitry Andric }
8250b57cec5SDimitry Andric
8260b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(P)) {
827e8d8bef9SDimitry Andric append_range(Worklist, PN->incoming_values());
8280b57cec5SDimitry Andric continue;
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andric if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
8320b57cec5SDimitry Andric if (GA->isInterposable())
8330b57cec5SDimitry Andric return false;
8340b57cec5SDimitry Andric Worklist.push_back(GA->getAliasee());
8350b57cec5SDimitry Andric continue;
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric
8380b57cec5SDimitry Andric // If we know how big this object is, and it is less than MaxSize, continue
8390b57cec5SDimitry Andric // searching. Otherwise, return false.
8400b57cec5SDimitry Andric if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
8410b57cec5SDimitry Andric if (!AI->getAllocatedType()->isSized())
8420b57cec5SDimitry Andric return false;
8430b57cec5SDimitry Andric
8440b57cec5SDimitry Andric ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
8450b57cec5SDimitry Andric if (!CS)
8460b57cec5SDimitry Andric return false;
8470b57cec5SDimitry Andric
848bdd1243dSDimitry Andric TypeSize TS = DL.getTypeAllocSize(AI->getAllocatedType());
849bdd1243dSDimitry Andric if (TS.isScalable())
850bdd1243dSDimitry Andric return false;
8510b57cec5SDimitry Andric // Make sure that, even if the multiplication below would wrap as an
8520b57cec5SDimitry Andric // uint64_t, we still do the right thing.
853bdd1243dSDimitry Andric if ((CS->getValue().zext(128) * APInt(128, TS.getFixedValue()))
854bdd1243dSDimitry Andric .ugt(MaxSize))
8550b57cec5SDimitry Andric return false;
8560b57cec5SDimitry Andric continue;
8570b57cec5SDimitry Andric }
8580b57cec5SDimitry Andric
8590b57cec5SDimitry Andric if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
8600b57cec5SDimitry Andric if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
8610b57cec5SDimitry Andric return false;
8620b57cec5SDimitry Andric
8630b57cec5SDimitry Andric uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
8640b57cec5SDimitry Andric if (InitSize > MaxSize)
8650b57cec5SDimitry Andric return false;
8660b57cec5SDimitry Andric continue;
8670b57cec5SDimitry Andric }
8680b57cec5SDimitry Andric
8690b57cec5SDimitry Andric return false;
8700b57cec5SDimitry Andric } while (!Worklist.empty());
8710b57cec5SDimitry Andric
8720b57cec5SDimitry Andric return true;
8730b57cec5SDimitry Andric }
8740b57cec5SDimitry Andric
8750b57cec5SDimitry Andric // If we're indexing into an object of a known size, and the outer index is
8760b57cec5SDimitry Andric // not a constant, but having any value but zero would lead to undefined
8770b57cec5SDimitry Andric // behavior, replace it with zero.
8780b57cec5SDimitry Andric //
8790b57cec5SDimitry Andric // For example, if we have:
8800b57cec5SDimitry Andric // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
8810b57cec5SDimitry Andric // ...
8820b57cec5SDimitry Andric // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
8830b57cec5SDimitry Andric // ... = load i32* %arrayidx, align 4
8840b57cec5SDimitry Andric // Then we know that we can replace %x in the GEP with i64 0.
8850b57cec5SDimitry Andric //
8860b57cec5SDimitry Andric // FIXME: We could fold any GEP index to zero that would cause UB if it were
8870b57cec5SDimitry Andric // not zero. Currently, we only handle the first such index. Also, we could
8880b57cec5SDimitry Andric // also search through non-zero constant indices if we kept track of the
8890b57cec5SDimitry Andric // offsets those indices implied.
canReplaceGEPIdxWithZero(InstCombinerImpl & IC,GetElementPtrInst * GEPI,Instruction * MemI,unsigned & Idx)890e8d8bef9SDimitry Andric static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
891e8d8bef9SDimitry Andric GetElementPtrInst *GEPI, Instruction *MemI,
892e8d8bef9SDimitry Andric unsigned &Idx) {
8930b57cec5SDimitry Andric if (GEPI->getNumOperands() < 2)
8940b57cec5SDimitry Andric return false;
8950b57cec5SDimitry Andric
8960b57cec5SDimitry Andric // Find the first non-zero index of a GEP. If all indices are zero, return
8970b57cec5SDimitry Andric // one past the last index.
8980b57cec5SDimitry Andric auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
8990b57cec5SDimitry Andric unsigned I = 1;
9000b57cec5SDimitry Andric for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
9010b57cec5SDimitry Andric Value *V = GEPI->getOperand(I);
9020b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
9030b57cec5SDimitry Andric if (CI->isZero())
9040b57cec5SDimitry Andric continue;
9050b57cec5SDimitry Andric
9060b57cec5SDimitry Andric break;
9070b57cec5SDimitry Andric }
9080b57cec5SDimitry Andric
9090b57cec5SDimitry Andric return I;
9100b57cec5SDimitry Andric };
9110b57cec5SDimitry Andric
9120b57cec5SDimitry Andric // Skip through initial 'zero' indices, and find the corresponding pointer
9130b57cec5SDimitry Andric // type. See if the next index is not a constant.
9140b57cec5SDimitry Andric Idx = FirstNZIdx(GEPI);
9150b57cec5SDimitry Andric if (Idx == GEPI->getNumOperands())
9160b57cec5SDimitry Andric return false;
9170b57cec5SDimitry Andric if (isa<Constant>(GEPI->getOperand(Idx)))
9180b57cec5SDimitry Andric return false;
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andric SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
921e8d8bef9SDimitry Andric Type *SourceElementType = GEPI->getSourceElementType();
922e8d8bef9SDimitry Andric // Size information about scalable vectors is not available, so we cannot
923e8d8bef9SDimitry Andric // deduce whether indexing at n is undefined behaviour or not. Bail out.
9245f757f3fSDimitry Andric if (SourceElementType->isScalableTy())
925e8d8bef9SDimitry Andric return false;
926e8d8bef9SDimitry Andric
927e8d8bef9SDimitry Andric Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
9280b57cec5SDimitry Andric if (!AllocTy || !AllocTy->isSized())
9290b57cec5SDimitry Andric return false;
9300b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout();
931bdd1243dSDimitry Andric uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy).getFixedValue();
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric // If there are more indices after the one we might replace with a zero, make
9340b57cec5SDimitry Andric // sure they're all non-negative. If any of them are negative, the overall
9350b57cec5SDimitry Andric // address being computed might be before the base address determined by the
9360b57cec5SDimitry Andric // first non-zero index.
9370b57cec5SDimitry Andric auto IsAllNonNegative = [&]() {
9380b57cec5SDimitry Andric for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
9390b57cec5SDimitry Andric KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
9400b57cec5SDimitry Andric if (Known.isNonNegative())
9410b57cec5SDimitry Andric continue;
9420b57cec5SDimitry Andric return false;
9430b57cec5SDimitry Andric }
9440b57cec5SDimitry Andric
9450b57cec5SDimitry Andric return true;
9460b57cec5SDimitry Andric };
9470b57cec5SDimitry Andric
9480b57cec5SDimitry Andric // FIXME: If the GEP is not inbounds, and there are extra indices after the
9490b57cec5SDimitry Andric // one we'll replace, those could cause the address computation to wrap
9500b57cec5SDimitry Andric // (rendering the IsAllNonNegative() check below insufficient). We can do
9510b57cec5SDimitry Andric // better, ignoring zero indices (and other indices we can prove small
9520b57cec5SDimitry Andric // enough not to wrap).
9530b57cec5SDimitry Andric if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
9540b57cec5SDimitry Andric return false;
9550b57cec5SDimitry Andric
9560b57cec5SDimitry Andric // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
9570b57cec5SDimitry Andric // also known to be dereferenceable.
9580b57cec5SDimitry Andric return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
9590b57cec5SDimitry Andric IsAllNonNegative();
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric
9620b57cec5SDimitry Andric // If we're indexing into an object with a variable index for the memory
9630b57cec5SDimitry Andric // access, but the object has only one element, we can assume that the index
9640b57cec5SDimitry Andric // will always be zero. If we replace the GEP, return it.
replaceGEPIdxWithZero(InstCombinerImpl & IC,Value * Ptr,Instruction & MemI)965e8d8bef9SDimitry Andric static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
96606c3fb27SDimitry Andric Instruction &MemI) {
9670b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
9680b57cec5SDimitry Andric unsigned Idx;
9690b57cec5SDimitry Andric if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
9700b57cec5SDimitry Andric Instruction *NewGEPI = GEPI->clone();
9710b57cec5SDimitry Andric NewGEPI->setOperand(Idx,
9720b57cec5SDimitry Andric ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
9735f757f3fSDimitry Andric IC.InsertNewInstBefore(NewGEPI, GEPI->getIterator());
9740b57cec5SDimitry Andric return NewGEPI;
9750b57cec5SDimitry Andric }
9760b57cec5SDimitry Andric }
9770b57cec5SDimitry Andric
9780b57cec5SDimitry Andric return nullptr;
9790b57cec5SDimitry Andric }
9800b57cec5SDimitry Andric
canSimplifyNullStoreOrGEP(StoreInst & SI)9810b57cec5SDimitry Andric static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
9820b57cec5SDimitry Andric if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
9830b57cec5SDimitry Andric return false;
9840b57cec5SDimitry Andric
9850b57cec5SDimitry Andric auto *Ptr = SI.getPointerOperand();
9860b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
9870b57cec5SDimitry Andric Ptr = GEPI->getOperand(0);
9880b57cec5SDimitry Andric return (isa<ConstantPointerNull>(Ptr) &&
9890b57cec5SDimitry Andric !NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()));
9900b57cec5SDimitry Andric }
9910b57cec5SDimitry Andric
canSimplifyNullLoadOrGEP(LoadInst & LI,Value * Op)9920b57cec5SDimitry Andric static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
9930b57cec5SDimitry Andric if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
9940b57cec5SDimitry Andric const Value *GEPI0 = GEPI->getOperand(0);
9950b57cec5SDimitry Andric if (isa<ConstantPointerNull>(GEPI0) &&
9960b57cec5SDimitry Andric !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
9970b57cec5SDimitry Andric return true;
9980b57cec5SDimitry Andric }
9990b57cec5SDimitry Andric if (isa<UndefValue>(Op) ||
10000b57cec5SDimitry Andric (isa<ConstantPointerNull>(Op) &&
10010b57cec5SDimitry Andric !NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())))
10020b57cec5SDimitry Andric return true;
10030b57cec5SDimitry Andric return false;
10040b57cec5SDimitry Andric }
10050b57cec5SDimitry Andric
visitLoadInst(LoadInst & LI)1006e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
10070b57cec5SDimitry Andric Value *Op = LI.getOperand(0);
100806c3fb27SDimitry Andric if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
100906c3fb27SDimitry Andric return replaceInstUsesWith(LI, Res);
10100b57cec5SDimitry Andric
10110b57cec5SDimitry Andric // Try to canonicalize the loaded type.
10120b57cec5SDimitry Andric if (Instruction *Res = combineLoadToOperationType(*this, LI))
10130b57cec5SDimitry Andric return Res;
10140b57cec5SDimitry Andric
10155f757f3fSDimitry Andric if (!EnableInferAlignmentPass) {
10160b57cec5SDimitry Andric // Attempt to improve the alignment.
10175ffd83dbSDimitry Andric Align KnownAlign = getOrEnforceKnownAlignment(
10185ffd83dbSDimitry Andric Op, DL.getPrefTypeAlign(LI.getType()), DL, &LI, &AC, &DT);
10195ffd83dbSDimitry Andric if (KnownAlign > LI.getAlign())
10205ffd83dbSDimitry Andric LI.setAlignment(KnownAlign);
10215f757f3fSDimitry Andric }
10220b57cec5SDimitry Andric
10230b57cec5SDimitry Andric // Replace GEP indices if possible.
102406c3fb27SDimitry Andric if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
102506c3fb27SDimitry Andric return replaceOperand(LI, 0, NewGEPI);
10260b57cec5SDimitry Andric
10270b57cec5SDimitry Andric if (Instruction *Res = unpackLoadToAggregate(*this, LI))
10280b57cec5SDimitry Andric return Res;
10290b57cec5SDimitry Andric
10300b57cec5SDimitry Andric // Do really simple store-to-load forwarding and load CSE, to catch cases
10310b57cec5SDimitry Andric // where there are several consecutive memory accesses to the same location,
10320b57cec5SDimitry Andric // separated by a few arithmetic operations.
10330b57cec5SDimitry Andric bool IsLoadCSE = false;
1034b3edf446SDimitry Andric BatchAAResults BatchAA(*AA);
1035b3edf446SDimitry Andric if (Value *AvailableVal = FindAvailableLoadedValue(&LI, BatchAA, &IsLoadCSE)) {
10360b57cec5SDimitry Andric if (IsLoadCSE)
10370b57cec5SDimitry Andric combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
10380b57cec5SDimitry Andric
10390b57cec5SDimitry Andric return replaceInstUsesWith(
10400b57cec5SDimitry Andric LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
10410b57cec5SDimitry Andric LI.getName() + ".cast"));
10420b57cec5SDimitry Andric }
10430b57cec5SDimitry Andric
10440b57cec5SDimitry Andric // None of the following transforms are legal for volatile/ordered atomic
10450b57cec5SDimitry Andric // loads. Most of them do apply for unordered atomics.
10460b57cec5SDimitry Andric if (!LI.isUnordered()) return nullptr;
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andric // load(gep null, ...) -> unreachable
10490b57cec5SDimitry Andric // load null/undef -> unreachable
10500b57cec5SDimitry Andric // TODO: Consider a target hook for valid address spaces for this xforms.
10510b57cec5SDimitry Andric if (canSimplifyNullLoadOrGEP(LI, Op)) {
105206c3fb27SDimitry Andric CreateNonTerminatorUnreachable(&LI);
1053fe6060f1SDimitry Andric return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
10540b57cec5SDimitry Andric }
10550b57cec5SDimitry Andric
10560b57cec5SDimitry Andric if (Op->hasOneUse()) {
10570b57cec5SDimitry Andric // Change select and PHI nodes to select values instead of addresses: this
10580b57cec5SDimitry Andric // helps alias analysis out a lot, allows many others simplifications, and
10590b57cec5SDimitry Andric // exposes redundancy in the code.
10600b57cec5SDimitry Andric //
10610b57cec5SDimitry Andric // Note that we cannot do the transformation unless we know that the
10620b57cec5SDimitry Andric // introduced loads cannot trap! Something like this is valid as long as
10630b57cec5SDimitry Andric // the condition is always false: load (select bool %C, int* null, int* %G),
10640b57cec5SDimitry Andric // but it would not be valid if we transformed it to load from null
10650b57cec5SDimitry Andric // unconditionally.
10660b57cec5SDimitry Andric //
10670b57cec5SDimitry Andric if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
10680b57cec5SDimitry Andric // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
10695ffd83dbSDimitry Andric Align Alignment = LI.getAlign();
10708bcb0991SDimitry Andric if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(),
10718bcb0991SDimitry Andric Alignment, DL, SI) &&
10728bcb0991SDimitry Andric isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(),
10738bcb0991SDimitry Andric Alignment, DL, SI)) {
10740b57cec5SDimitry Andric LoadInst *V1 =
10750b57cec5SDimitry Andric Builder.CreateLoad(LI.getType(), SI->getOperand(1),
10760b57cec5SDimitry Andric SI->getOperand(1)->getName() + ".val");
10770b57cec5SDimitry Andric LoadInst *V2 =
10780b57cec5SDimitry Andric Builder.CreateLoad(LI.getType(), SI->getOperand(2),
10790b57cec5SDimitry Andric SI->getOperand(2)->getName() + ".val");
10800b57cec5SDimitry Andric assert(LI.isUnordered() && "implied by above");
10818bcb0991SDimitry Andric V1->setAlignment(Alignment);
10820b57cec5SDimitry Andric V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
10838bcb0991SDimitry Andric V2->setAlignment(Alignment);
10840b57cec5SDimitry Andric V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
10850b57cec5SDimitry Andric return SelectInst::Create(SI->getCondition(), V1, V2);
10860b57cec5SDimitry Andric }
10870b57cec5SDimitry Andric
10880b57cec5SDimitry Andric // load (select (cond, null, P)) -> load P
10890b57cec5SDimitry Andric if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
10900b57cec5SDimitry Andric !NullPointerIsDefined(SI->getFunction(),
10915ffd83dbSDimitry Andric LI.getPointerAddressSpace()))
10925ffd83dbSDimitry Andric return replaceOperand(LI, 0, SI->getOperand(2));
10930b57cec5SDimitry Andric
10940b57cec5SDimitry Andric // load (select (cond, P, null)) -> load P
10950b57cec5SDimitry Andric if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
10960b57cec5SDimitry Andric !NullPointerIsDefined(SI->getFunction(),
10975ffd83dbSDimitry Andric LI.getPointerAddressSpace()))
10985ffd83dbSDimitry Andric return replaceOperand(LI, 0, SI->getOperand(1));
10990b57cec5SDimitry Andric }
11000b57cec5SDimitry Andric }
11010b57cec5SDimitry Andric return nullptr;
11020b57cec5SDimitry Andric }
11030b57cec5SDimitry Andric
11040b57cec5SDimitry Andric /// Look for extractelement/insertvalue sequence that acts like a bitcast.
11050b57cec5SDimitry Andric ///
11060b57cec5SDimitry Andric /// \returns underlying value that was "cast", or nullptr otherwise.
11070b57cec5SDimitry Andric ///
11080b57cec5SDimitry Andric /// For example, if we have:
11090b57cec5SDimitry Andric ///
11100b57cec5SDimitry Andric /// %E0 = extractelement <2 x double> %U, i32 0
11110b57cec5SDimitry Andric /// %V0 = insertvalue [2 x double] undef, double %E0, 0
11120b57cec5SDimitry Andric /// %E1 = extractelement <2 x double> %U, i32 1
11130b57cec5SDimitry Andric /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
11140b57cec5SDimitry Andric ///
11150b57cec5SDimitry Andric /// and the layout of a <2 x double> is isomorphic to a [2 x double],
11160b57cec5SDimitry Andric /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
11170b57cec5SDimitry Andric /// Note that %U may contain non-undef values where %V1 has undef.
likeBitCastFromVector(InstCombinerImpl & IC,Value * V)1118e8d8bef9SDimitry Andric static Value *likeBitCastFromVector(InstCombinerImpl &IC, Value *V) {
11190b57cec5SDimitry Andric Value *U = nullptr;
11200b57cec5SDimitry Andric while (auto *IV = dyn_cast<InsertValueInst>(V)) {
11210b57cec5SDimitry Andric auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
11220b57cec5SDimitry Andric if (!E)
11230b57cec5SDimitry Andric return nullptr;
11240b57cec5SDimitry Andric auto *W = E->getVectorOperand();
11250b57cec5SDimitry Andric if (!U)
11260b57cec5SDimitry Andric U = W;
11270b57cec5SDimitry Andric else if (U != W)
11280b57cec5SDimitry Andric return nullptr;
11290b57cec5SDimitry Andric auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
11300b57cec5SDimitry Andric if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
11310b57cec5SDimitry Andric return nullptr;
11320b57cec5SDimitry Andric V = IV->getAggregateOperand();
11330b57cec5SDimitry Andric }
1134fe6060f1SDimitry Andric if (!match(V, m_Undef()) || !U)
11350b57cec5SDimitry Andric return nullptr;
11360b57cec5SDimitry Andric
11370b57cec5SDimitry Andric auto *UT = cast<VectorType>(U->getType());
11380b57cec5SDimitry Andric auto *VT = V->getType();
11390b57cec5SDimitry Andric // Check that types UT and VT are bitwise isomorphic.
11400b57cec5SDimitry Andric const auto &DL = IC.getDataLayout();
11410b57cec5SDimitry Andric if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
11420b57cec5SDimitry Andric return nullptr;
11430b57cec5SDimitry Andric }
11440b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(VT)) {
1145e8d8bef9SDimitry Andric if (AT->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
11460b57cec5SDimitry Andric return nullptr;
11470b57cec5SDimitry Andric } else {
11480b57cec5SDimitry Andric auto *ST = cast<StructType>(VT);
1149e8d8bef9SDimitry Andric if (ST->getNumElements() != cast<FixedVectorType>(UT)->getNumElements())
11500b57cec5SDimitry Andric return nullptr;
11510b57cec5SDimitry Andric for (const auto *EltT : ST->elements()) {
11520b57cec5SDimitry Andric if (EltT != UT->getElementType())
11530b57cec5SDimitry Andric return nullptr;
11540b57cec5SDimitry Andric }
11550b57cec5SDimitry Andric }
11560b57cec5SDimitry Andric return U;
11570b57cec5SDimitry Andric }
11580b57cec5SDimitry Andric
11590b57cec5SDimitry Andric /// Combine stores to match the type of value being stored.
11600b57cec5SDimitry Andric ///
11610b57cec5SDimitry Andric /// The core idea here is that the memory does not have any intrinsic type and
11620b57cec5SDimitry Andric /// where we can we should match the type of a store to the type of value being
11630b57cec5SDimitry Andric /// stored.
11640b57cec5SDimitry Andric ///
11650b57cec5SDimitry Andric /// However, this routine must never change the width of a store or the number of
11660b57cec5SDimitry Andric /// stores as that would introduce a semantic change. This combine is expected to
11670b57cec5SDimitry Andric /// be a semantic no-op which just allows stores to more closely model the types
11680b57cec5SDimitry Andric /// of their incoming values.
11690b57cec5SDimitry Andric ///
11700b57cec5SDimitry Andric /// Currently, we also refuse to change the precise type used for an atomic or
11710b57cec5SDimitry Andric /// volatile store. This is debatable, and might be reasonable to change later.
11720b57cec5SDimitry Andric /// However, it is risky in case some backend or other part of LLVM is relying
11730b57cec5SDimitry Andric /// on the exact type stored to select appropriate atomic operations.
11740b57cec5SDimitry Andric ///
11750b57cec5SDimitry Andric /// \returns true if the store was successfully combined away. This indicates
11760b57cec5SDimitry Andric /// the caller must erase the store instruction. We have to let the caller erase
11770b57cec5SDimitry Andric /// the store instruction as otherwise there is no way to signal whether it was
11780b57cec5SDimitry Andric /// combined or not: IC.EraseInstFromFunction returns a null pointer.
combineStoreToValueType(InstCombinerImpl & IC,StoreInst & SI)1179e8d8bef9SDimitry Andric static bool combineStoreToValueType(InstCombinerImpl &IC, StoreInst &SI) {
11800b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and ordered
11810b57cec5SDimitry Andric // atomic stores here but it isn't clear that this is important.
11820b57cec5SDimitry Andric if (!SI.isUnordered())
11830b57cec5SDimitry Andric return false;
11840b57cec5SDimitry Andric
11850b57cec5SDimitry Andric // swifterror values can't be bitcasted.
11860b57cec5SDimitry Andric if (SI.getPointerOperand()->isSwiftError())
11870b57cec5SDimitry Andric return false;
11880b57cec5SDimitry Andric
11890b57cec5SDimitry Andric Value *V = SI.getValueOperand();
11900b57cec5SDimitry Andric
11910b57cec5SDimitry Andric // Fold away bit casts of the stored value by storing the original type.
11920b57cec5SDimitry Andric if (auto *BC = dyn_cast<BitCastInst>(V)) {
1193e8d8bef9SDimitry Andric assert(!BC->getType()->isX86_AMXTy() &&
1194e8d8bef9SDimitry Andric "store to x86_amx* should not happen!");
11950b57cec5SDimitry Andric V = BC->getOperand(0);
1196e8d8bef9SDimitry Andric // Don't transform when the type is x86_amx, it makes the pass that lower
1197e8d8bef9SDimitry Andric // x86_amx type happy.
1198e8d8bef9SDimitry Andric if (V->getType()->isX86_AMXTy())
1199e8d8bef9SDimitry Andric return false;
12000b57cec5SDimitry Andric if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
12010b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V);
12020b57cec5SDimitry Andric return true;
12030b57cec5SDimitry Andric }
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andric if (Value *U = likeBitCastFromVector(IC, V))
12070b57cec5SDimitry Andric if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
12080b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, U);
12090b57cec5SDimitry Andric return true;
12100b57cec5SDimitry Andric }
12110b57cec5SDimitry Andric
12120b57cec5SDimitry Andric // FIXME: We should also canonicalize stores of vectors when their elements
12130b57cec5SDimitry Andric // are cast to other types.
12140b57cec5SDimitry Andric return false;
12150b57cec5SDimitry Andric }
12160b57cec5SDimitry Andric
unpackStoreToAggregate(InstCombinerImpl & IC,StoreInst & SI)1217e8d8bef9SDimitry Andric static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
12180b57cec5SDimitry Andric // FIXME: We could probably with some care handle both volatile and atomic
12190b57cec5SDimitry Andric // stores here but it isn't clear that this is important.
12200b57cec5SDimitry Andric if (!SI.isSimple())
12210b57cec5SDimitry Andric return false;
12220b57cec5SDimitry Andric
12230b57cec5SDimitry Andric Value *V = SI.getValueOperand();
12240b57cec5SDimitry Andric Type *T = V->getType();
12250b57cec5SDimitry Andric
12260b57cec5SDimitry Andric if (!T->isAggregateType())
12270b57cec5SDimitry Andric return false;
12280b57cec5SDimitry Andric
12290b57cec5SDimitry Andric if (auto *ST = dyn_cast<StructType>(T)) {
12300b57cec5SDimitry Andric // If the struct only have one element, we unpack.
12310b57cec5SDimitry Andric unsigned Count = ST->getNumElements();
12320b57cec5SDimitry Andric if (Count == 1) {
12330b57cec5SDimitry Andric V = IC.Builder.CreateExtractValue(V, 0);
12340b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V);
12350b57cec5SDimitry Andric return true;
12360b57cec5SDimitry Andric }
12370b57cec5SDimitry Andric
12380b57cec5SDimitry Andric // We don't want to break loads with padding here as we'd loose
12390b57cec5SDimitry Andric // the knowledge that padding exists for the rest of the pipeline.
12400b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout();
12410b57cec5SDimitry Andric auto *SL = DL.getStructLayout(ST);
124206c3fb27SDimitry Andric
124306c3fb27SDimitry Andric // Don't unpack for structure with scalable vector.
124406c3fb27SDimitry Andric if (SL->getSizeInBits().isScalable())
124506c3fb27SDimitry Andric return false;
124606c3fb27SDimitry Andric
12470b57cec5SDimitry Andric if (SL->hasPadding())
12480b57cec5SDimitry Andric return false;
12490b57cec5SDimitry Andric
12505ffd83dbSDimitry Andric const auto Align = SI.getAlign();
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric SmallString<16> EltName = V->getName();
12530b57cec5SDimitry Andric EltName += ".elt";
12540b57cec5SDimitry Andric auto *Addr = SI.getPointerOperand();
12550b57cec5SDimitry Andric SmallString<16> AddrName = Addr->getName();
12560b57cec5SDimitry Andric AddrName += ".repack";
12570b57cec5SDimitry Andric
12580b57cec5SDimitry Andric auto *IdxType = Type::getInt32Ty(ST->getContext());
12590b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0);
12600b57cec5SDimitry Andric for (unsigned i = 0; i < Count; i++) {
12610b57cec5SDimitry Andric Value *Indices[2] = {
12620b57cec5SDimitry Andric Zero,
12630b57cec5SDimitry Andric ConstantInt::get(IdxType, i),
12640b57cec5SDimitry Andric };
1265bdd1243dSDimitry Andric auto *Ptr =
1266bdd1243dSDimitry Andric IC.Builder.CreateInBoundsGEP(ST, Addr, ArrayRef(Indices), AddrName);
12670b57cec5SDimitry Andric auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
12685ffd83dbSDimitry Andric auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
12690b57cec5SDimitry Andric llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1270349cc55cSDimitry Andric NS->setAAMetadata(SI.getAAMetadata());
12710b57cec5SDimitry Andric }
12720b57cec5SDimitry Andric
12730b57cec5SDimitry Andric return true;
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric
12760b57cec5SDimitry Andric if (auto *AT = dyn_cast<ArrayType>(T)) {
12770b57cec5SDimitry Andric // If the array only have one element, we unpack.
12780b57cec5SDimitry Andric auto NumElements = AT->getNumElements();
12790b57cec5SDimitry Andric if (NumElements == 1) {
12800b57cec5SDimitry Andric V = IC.Builder.CreateExtractValue(V, 0);
12810b57cec5SDimitry Andric combineStoreToNewValue(IC, SI, V);
12820b57cec5SDimitry Andric return true;
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric
12850b57cec5SDimitry Andric // Bail out if the array is too large. Ideally we would like to optimize
12860b57cec5SDimitry Andric // arrays of arbitrary size but this has a terrible impact on compile time.
12870b57cec5SDimitry Andric // The threshold here is chosen arbitrarily, maybe needs a little bit of
12880b57cec5SDimitry Andric // tuning.
12890b57cec5SDimitry Andric if (NumElements > IC.MaxArraySizeForCombine)
12900b57cec5SDimitry Andric return false;
12910b57cec5SDimitry Andric
12920b57cec5SDimitry Andric const DataLayout &DL = IC.getDataLayout();
12935f757f3fSDimitry Andric TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
12945ffd83dbSDimitry Andric const auto Align = SI.getAlign();
12950b57cec5SDimitry Andric
12960b57cec5SDimitry Andric SmallString<16> EltName = V->getName();
12970b57cec5SDimitry Andric EltName += ".elt";
12980b57cec5SDimitry Andric auto *Addr = SI.getPointerOperand();
12990b57cec5SDimitry Andric SmallString<16> AddrName = Addr->getName();
13000b57cec5SDimitry Andric AddrName += ".repack";
13010b57cec5SDimitry Andric
13020b57cec5SDimitry Andric auto *IdxType = Type::getInt64Ty(T->getContext());
13030b57cec5SDimitry Andric auto *Zero = ConstantInt::get(IdxType, 0);
13040b57cec5SDimitry Andric
1305*0fca6ea1SDimitry Andric TypeSize Offset = TypeSize::getZero();
13060b57cec5SDimitry Andric for (uint64_t i = 0; i < NumElements; i++) {
13070b57cec5SDimitry Andric Value *Indices[2] = {
13080b57cec5SDimitry Andric Zero,
13090b57cec5SDimitry Andric ConstantInt::get(IdxType, i),
13100b57cec5SDimitry Andric };
1311bdd1243dSDimitry Andric auto *Ptr =
1312bdd1243dSDimitry Andric IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
13130b57cec5SDimitry Andric auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
13145f757f3fSDimitry Andric auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
13150b57cec5SDimitry Andric Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1316349cc55cSDimitry Andric NS->setAAMetadata(SI.getAAMetadata());
13170b57cec5SDimitry Andric Offset += EltSize;
13180b57cec5SDimitry Andric }
13190b57cec5SDimitry Andric
13200b57cec5SDimitry Andric return true;
13210b57cec5SDimitry Andric }
13220b57cec5SDimitry Andric
13230b57cec5SDimitry Andric return false;
13240b57cec5SDimitry Andric }
13250b57cec5SDimitry Andric
13260b57cec5SDimitry Andric /// equivalentAddressValues - Test if A and B will obviously have the same
13270b57cec5SDimitry Andric /// value. This includes recognizing that %t0 and %t1 will have the same
13280b57cec5SDimitry Andric /// value in code like this:
13290b57cec5SDimitry Andric /// %t0 = getelementptr \@a, 0, 3
13300b57cec5SDimitry Andric /// store i32 0, i32* %t0
13310b57cec5SDimitry Andric /// %t1 = getelementptr \@a, 0, 3
13320b57cec5SDimitry Andric /// %t2 = load i32* %t1
13330b57cec5SDimitry Andric ///
equivalentAddressValues(Value * A,Value * B)13340b57cec5SDimitry Andric static bool equivalentAddressValues(Value *A, Value *B) {
13350b57cec5SDimitry Andric // Test if the values are trivially equivalent.
13360b57cec5SDimitry Andric if (A == B) return true;
13370b57cec5SDimitry Andric
13380b57cec5SDimitry Andric // Test if the values come form identical arithmetic instructions.
13390b57cec5SDimitry Andric // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
13400b57cec5SDimitry Andric // its only used to compare two uses within the same basic block, which
13410b57cec5SDimitry Andric // means that they'll always either have the same value or one of them
13420b57cec5SDimitry Andric // will have an undefined value.
13430b57cec5SDimitry Andric if (isa<BinaryOperator>(A) ||
13440b57cec5SDimitry Andric isa<CastInst>(A) ||
13450b57cec5SDimitry Andric isa<PHINode>(A) ||
13460b57cec5SDimitry Andric isa<GetElementPtrInst>(A))
13470b57cec5SDimitry Andric if (Instruction *BI = dyn_cast<Instruction>(B))
13480b57cec5SDimitry Andric if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
13490b57cec5SDimitry Andric return true;
13500b57cec5SDimitry Andric
13510b57cec5SDimitry Andric // Otherwise they may not be equivalent.
13520b57cec5SDimitry Andric return false;
13530b57cec5SDimitry Andric }
13540b57cec5SDimitry Andric
visitStoreInst(StoreInst & SI)1355e8d8bef9SDimitry Andric Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
13560b57cec5SDimitry Andric Value *Val = SI.getOperand(0);
13570b57cec5SDimitry Andric Value *Ptr = SI.getOperand(1);
13580b57cec5SDimitry Andric
13590b57cec5SDimitry Andric // Try to canonicalize the stored type.
13600b57cec5SDimitry Andric if (combineStoreToValueType(*this, SI))
13610b57cec5SDimitry Andric return eraseInstFromFunction(SI);
13620b57cec5SDimitry Andric
13635f757f3fSDimitry Andric if (!EnableInferAlignmentPass) {
13640b57cec5SDimitry Andric // Attempt to improve the alignment.
13655ffd83dbSDimitry Andric const Align KnownAlign = getOrEnforceKnownAlignment(
13665ffd83dbSDimitry Andric Ptr, DL.getPrefTypeAlign(Val->getType()), DL, &SI, &AC, &DT);
13675ffd83dbSDimitry Andric if (KnownAlign > SI.getAlign())
13680b57cec5SDimitry Andric SI.setAlignment(KnownAlign);
13695f757f3fSDimitry Andric }
13700b57cec5SDimitry Andric
13710b57cec5SDimitry Andric // Try to canonicalize the stored type.
13720b57cec5SDimitry Andric if (unpackStoreToAggregate(*this, SI))
13730b57cec5SDimitry Andric return eraseInstFromFunction(SI);
13740b57cec5SDimitry Andric
13750b57cec5SDimitry Andric // Replace GEP indices if possible.
137606c3fb27SDimitry Andric if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
137706c3fb27SDimitry Andric return replaceOperand(SI, 1, NewGEPI);
13780b57cec5SDimitry Andric
13790b57cec5SDimitry Andric // Don't hack volatile/ordered stores.
13800b57cec5SDimitry Andric // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
13810b57cec5SDimitry Andric if (!SI.isUnordered()) return nullptr;
13820b57cec5SDimitry Andric
13830b57cec5SDimitry Andric // If the RHS is an alloca with a single use, zapify the store, making the
13840b57cec5SDimitry Andric // alloca dead.
13850b57cec5SDimitry Andric if (Ptr->hasOneUse()) {
13860b57cec5SDimitry Andric if (isa<AllocaInst>(Ptr))
13870b57cec5SDimitry Andric return eraseInstFromFunction(SI);
13880b57cec5SDimitry Andric if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
13890b57cec5SDimitry Andric if (isa<AllocaInst>(GEP->getOperand(0))) {
13900b57cec5SDimitry Andric if (GEP->getOperand(0)->hasOneUse())
13910b57cec5SDimitry Andric return eraseInstFromFunction(SI);
13920b57cec5SDimitry Andric }
13930b57cec5SDimitry Andric }
13940b57cec5SDimitry Andric }
13950b57cec5SDimitry Andric
13960b57cec5SDimitry Andric // If we have a store to a location which is known constant, we can conclude
13970b57cec5SDimitry Andric // that the store must be storing the constant value (else the memory
13980b57cec5SDimitry Andric // wouldn't be constant), and this must be a noop.
1399bdd1243dSDimitry Andric if (!isModSet(AA->getModRefInfoMask(Ptr)))
14000b57cec5SDimitry Andric return eraseInstFromFunction(SI);
14010b57cec5SDimitry Andric
14020b57cec5SDimitry Andric // Do really simple DSE, to catch cases where there are several consecutive
14030b57cec5SDimitry Andric // stores to the same location, separated by a few arithmetic operations. This
14040b57cec5SDimitry Andric // situation often occurs with bitfield accesses.
14050b57cec5SDimitry Andric BasicBlock::iterator BBI(SI);
14060b57cec5SDimitry Andric for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
14070b57cec5SDimitry Andric --ScanInsts) {
14080b57cec5SDimitry Andric --BBI;
14090b57cec5SDimitry Andric // Don't count debug info directives, lest they affect codegen,
14100b57cec5SDimitry Andric // and we skip pointer-to-pointer bitcasts, which are NOPs.
14115f757f3fSDimitry Andric if (BBI->isDebugOrPseudoInst()) {
14120b57cec5SDimitry Andric ScanInsts++;
14130b57cec5SDimitry Andric continue;
14140b57cec5SDimitry Andric }
14150b57cec5SDimitry Andric
14160b57cec5SDimitry Andric if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
14170b57cec5SDimitry Andric // Prev store isn't volatile, and stores to the same location?
141881ad6265SDimitry Andric if (PrevSI->isUnordered() &&
141981ad6265SDimitry Andric equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1)) &&
142081ad6265SDimitry Andric PrevSI->getValueOperand()->getType() ==
142181ad6265SDimitry Andric SI.getValueOperand()->getType()) {
14220b57cec5SDimitry Andric ++NumDeadStore;
142355e4f9d5SDimitry Andric // Manually add back the original store to the worklist now, so it will
142455e4f9d5SDimitry Andric // be processed after the operands of the removed store, as this may
142555e4f9d5SDimitry Andric // expose additional DSE opportunities.
14265ffd83dbSDimitry Andric Worklist.push(&SI);
14270b57cec5SDimitry Andric eraseInstFromFunction(*PrevSI);
142855e4f9d5SDimitry Andric return nullptr;
14290b57cec5SDimitry Andric }
14300b57cec5SDimitry Andric break;
14310b57cec5SDimitry Andric }
14320b57cec5SDimitry Andric
14330b57cec5SDimitry Andric // If this is a load, we have to stop. However, if the loaded value is from
14340b57cec5SDimitry Andric // the pointer we're loading and is producing the pointer we're storing,
14350b57cec5SDimitry Andric // then *this* store is dead (X = load P; store X -> P).
14360b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
14370b57cec5SDimitry Andric if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
14380b57cec5SDimitry Andric assert(SI.isUnordered() && "can't eliminate ordering operation");
14390b57cec5SDimitry Andric return eraseInstFromFunction(SI);
14400b57cec5SDimitry Andric }
14410b57cec5SDimitry Andric
14420b57cec5SDimitry Andric // Otherwise, this is a load from some other location. Stores before it
14430b57cec5SDimitry Andric // may not be dead.
14440b57cec5SDimitry Andric break;
14450b57cec5SDimitry Andric }
14460b57cec5SDimitry Andric
14470b57cec5SDimitry Andric // Don't skip over loads, throws or things that can modify memory.
14480b57cec5SDimitry Andric if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
14490b57cec5SDimitry Andric break;
14500b57cec5SDimitry Andric }
14510b57cec5SDimitry Andric
14520b57cec5SDimitry Andric // store X, null -> turns into 'unreachable' in SimplifyCFG
14530b57cec5SDimitry Andric // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
14540b57cec5SDimitry Andric if (canSimplifyNullStoreOrGEP(SI)) {
1455fe6060f1SDimitry Andric if (!isa<PoisonValue>(Val))
1456fe6060f1SDimitry Andric return replaceOperand(SI, 0, PoisonValue::get(Val->getType()));
14570b57cec5SDimitry Andric return nullptr; // Do not modify these!
14580b57cec5SDimitry Andric }
14590b57cec5SDimitry Andric
146006c3fb27SDimitry Andric // This is a non-terminator unreachable marker. Don't remove it.
146106c3fb27SDimitry Andric if (isa<UndefValue>(Ptr)) {
14625f757f3fSDimitry Andric // Remove guaranteed-to-transfer instructions before the marker.
14635f757f3fSDimitry Andric if (removeInstructionsBeforeUnreachable(SI))
146406c3fb27SDimitry Andric return &SI;
14655f757f3fSDimitry Andric
14665f757f3fSDimitry Andric // Remove all instructions after the marker and handle dead blocks this
14675f757f3fSDimitry Andric // implies.
14685f757f3fSDimitry Andric SmallVector<BasicBlock *> Worklist;
14695f757f3fSDimitry Andric handleUnreachableFrom(SI.getNextNode(), Worklist);
14705f757f3fSDimitry Andric handlePotentiallyDeadBlocks(Worklist);
147106c3fb27SDimitry Andric return nullptr;
147206c3fb27SDimitry Andric }
147306c3fb27SDimitry Andric
14740b57cec5SDimitry Andric // store undef, Ptr -> noop
147581ad6265SDimitry Andric // FIXME: This is technically incorrect because it might overwrite a poison
147681ad6265SDimitry Andric // value. Change to PoisonValue once #52930 is resolved.
14770b57cec5SDimitry Andric if (isa<UndefValue>(Val))
14780b57cec5SDimitry Andric return eraseInstFromFunction(SI);
14790b57cec5SDimitry Andric
14800b57cec5SDimitry Andric return nullptr;
14810b57cec5SDimitry Andric }
14820b57cec5SDimitry Andric
14830b57cec5SDimitry Andric /// Try to transform:
14840b57cec5SDimitry Andric /// if () { *P = v1; } else { *P = v2 }
14850b57cec5SDimitry Andric /// or:
14860b57cec5SDimitry Andric /// *P = v1; if () { *P = v2; }
14870b57cec5SDimitry Andric /// into a phi node with a store in the successor.
mergeStoreIntoSuccessor(StoreInst & SI)1488e8d8bef9SDimitry Andric bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
14895ffd83dbSDimitry Andric if (!SI.isUnordered())
14905ffd83dbSDimitry Andric return false; // This code has not been audited for volatile/ordered case.
14910b57cec5SDimitry Andric
14920b57cec5SDimitry Andric // Check if the successor block has exactly 2 incoming edges.
14930b57cec5SDimitry Andric BasicBlock *StoreBB = SI.getParent();
14940b57cec5SDimitry Andric BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
14950b57cec5SDimitry Andric if (!DestBB->hasNPredecessors(2))
14960b57cec5SDimitry Andric return false;
14970b57cec5SDimitry Andric
14980b57cec5SDimitry Andric // Capture the other block (the block that doesn't contain our store).
14990b57cec5SDimitry Andric pred_iterator PredIter = pred_begin(DestBB);
15000b57cec5SDimitry Andric if (*PredIter == StoreBB)
15010b57cec5SDimitry Andric ++PredIter;
15020b57cec5SDimitry Andric BasicBlock *OtherBB = *PredIter;
15030b57cec5SDimitry Andric
15040b57cec5SDimitry Andric // Bail out if all of the relevant blocks aren't distinct. This can happen,
15050b57cec5SDimitry Andric // for example, if SI is in an infinite loop.
15060b57cec5SDimitry Andric if (StoreBB == DestBB || OtherBB == DestBB)
15070b57cec5SDimitry Andric return false;
15080b57cec5SDimitry Andric
15090b57cec5SDimitry Andric // Verify that the other block ends in a branch and is not otherwise empty.
15100b57cec5SDimitry Andric BasicBlock::iterator BBI(OtherBB->getTerminator());
15110b57cec5SDimitry Andric BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
15120b57cec5SDimitry Andric if (!OtherBr || BBI == OtherBB->begin())
15130b57cec5SDimitry Andric return false;
15140b57cec5SDimitry Andric
151506c3fb27SDimitry Andric auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
151606c3fb27SDimitry Andric if (!OtherStore ||
151706c3fb27SDimitry Andric OtherStore->getPointerOperand() != SI.getPointerOperand())
151806c3fb27SDimitry Andric return false;
151906c3fb27SDimitry Andric
152006c3fb27SDimitry Andric auto *SIVTy = SI.getValueOperand()->getType();
152106c3fb27SDimitry Andric auto *OSVTy = OtherStore->getValueOperand()->getType();
152206c3fb27SDimitry Andric return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
152306c3fb27SDimitry Andric SI.hasSameSpecialState(OtherStore);
152406c3fb27SDimitry Andric };
152506c3fb27SDimitry Andric
15260b57cec5SDimitry Andric // If the other block ends in an unconditional branch, check for the 'if then
15270b57cec5SDimitry Andric // else' case. There is an instruction before the branch.
15280b57cec5SDimitry Andric StoreInst *OtherStore = nullptr;
15290b57cec5SDimitry Andric if (OtherBr->isUnconditional()) {
15300b57cec5SDimitry Andric --BBI;
1531349cc55cSDimitry Andric // Skip over debugging info and pseudo probes.
15325f757f3fSDimitry Andric while (BBI->isDebugOrPseudoInst()) {
15330b57cec5SDimitry Andric if (BBI==OtherBB->begin())
15340b57cec5SDimitry Andric return false;
15350b57cec5SDimitry Andric --BBI;
15360b57cec5SDimitry Andric }
15370b57cec5SDimitry Andric // If this isn't a store, isn't a store to the same location, or is not the
15380b57cec5SDimitry Andric // right kind of store, bail out.
15390b57cec5SDimitry Andric OtherStore = dyn_cast<StoreInst>(BBI);
154006c3fb27SDimitry Andric if (!OtherStoreIsMergeable(OtherStore))
15410b57cec5SDimitry Andric return false;
15420b57cec5SDimitry Andric } else {
15430b57cec5SDimitry Andric // Otherwise, the other block ended with a conditional branch. If one of the
15440b57cec5SDimitry Andric // destinations is StoreBB, then we have the if/then case.
15450b57cec5SDimitry Andric if (OtherBr->getSuccessor(0) != StoreBB &&
15460b57cec5SDimitry Andric OtherBr->getSuccessor(1) != StoreBB)
15470b57cec5SDimitry Andric return false;
15480b57cec5SDimitry Andric
15490b57cec5SDimitry Andric // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
15500b57cec5SDimitry Andric // if/then triangle. See if there is a store to the same ptr as SI that
15510b57cec5SDimitry Andric // lives in OtherBB.
15520b57cec5SDimitry Andric for (;; --BBI) {
15530b57cec5SDimitry Andric // Check to see if we find the matching store.
155406c3fb27SDimitry Andric OtherStore = dyn_cast<StoreInst>(BBI);
155506c3fb27SDimitry Andric if (OtherStoreIsMergeable(OtherStore))
15560b57cec5SDimitry Andric break;
155706c3fb27SDimitry Andric
15580b57cec5SDimitry Andric // If we find something that may be using or overwriting the stored
15590b57cec5SDimitry Andric // value, or if we run out of instructions, we can't do the transform.
15600b57cec5SDimitry Andric if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
15610b57cec5SDimitry Andric BBI->mayWriteToMemory() || BBI == OtherBB->begin())
15620b57cec5SDimitry Andric return false;
15630b57cec5SDimitry Andric }
15640b57cec5SDimitry Andric
15650b57cec5SDimitry Andric // In order to eliminate the store in OtherBr, we have to make sure nothing
15660b57cec5SDimitry Andric // reads or overwrites the stored value in StoreBB.
15670b57cec5SDimitry Andric for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
15680b57cec5SDimitry Andric // FIXME: This should really be AA driven.
15690b57cec5SDimitry Andric if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
15700b57cec5SDimitry Andric return false;
15710b57cec5SDimitry Andric }
15720b57cec5SDimitry Andric }
15730b57cec5SDimitry Andric
15740b57cec5SDimitry Andric // Insert a PHI node now if we need it.
157506c3fb27SDimitry Andric Value *MergedVal = OtherStore->getValueOperand();
15760b57cec5SDimitry Andric // The debug locations of the original instructions might differ. Merge them.
15770b57cec5SDimitry Andric DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
15780b57cec5SDimitry Andric OtherStore->getDebugLoc());
157906c3fb27SDimitry Andric if (MergedVal != SI.getValueOperand()) {
158006c3fb27SDimitry Andric PHINode *PN =
158106c3fb27SDimitry Andric PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
158206c3fb27SDimitry Andric PN->addIncoming(SI.getValueOperand(), SI.getParent());
158306c3fb27SDimitry Andric Builder.SetInsertPoint(OtherStore);
158406c3fb27SDimitry Andric PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
158506c3fb27SDimitry Andric OtherBB);
15865f757f3fSDimitry Andric MergedVal = InsertNewInstBefore(PN, DestBB->begin());
15870b57cec5SDimitry Andric PN->setDebugLoc(MergedLoc);
15880b57cec5SDimitry Andric }
15890b57cec5SDimitry Andric
15900b57cec5SDimitry Andric // Advance to a place where it is safe to insert the new store and insert it.
15910b57cec5SDimitry Andric BBI = DestBB->getFirstInsertionPt();
15925ffd83dbSDimitry Andric StoreInst *NewSI =
15935ffd83dbSDimitry Andric new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlign(),
15940b57cec5SDimitry Andric SI.getOrdering(), SI.getSyncScopeID());
15955f757f3fSDimitry Andric InsertNewInstBefore(NewSI, BBI);
15960b57cec5SDimitry Andric NewSI->setDebugLoc(MergedLoc);
1597bdd1243dSDimitry Andric NewSI->mergeDIAssignID({&SI, OtherStore});
15980b57cec5SDimitry Andric
15990b57cec5SDimitry Andric // If the two stores had AA tags, merge them.
1600349cc55cSDimitry Andric AAMDNodes AATags = SI.getAAMetadata();
1601349cc55cSDimitry Andric if (AATags)
1602349cc55cSDimitry Andric NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
16030b57cec5SDimitry Andric
16040b57cec5SDimitry Andric // Nuke the old stores.
16050b57cec5SDimitry Andric eraseInstFromFunction(SI);
16060b57cec5SDimitry Andric eraseInstFromFunction(*OtherStore);
16070b57cec5SDimitry Andric return true;
16080b57cec5SDimitry Andric }
1609