1e8d8bef9SDimitry Andric //===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
281ad6265SDimitry Andric // intrinsics
3e8d8bef9SDimitry Andric //
4e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
6e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7e8d8bef9SDimitry Andric //
8e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
9e8d8bef9SDimitry Andric //
10e8d8bef9SDimitry Andric // This pass replaces masked memory intrinsics - when unsupported by the target
11e8d8bef9SDimitry Andric // - with a chain of basic blocks, that deal with the elements one-by-one if the
12e8d8bef9SDimitry Andric // appropriate mask bit is set.
13e8d8bef9SDimitry Andric //
14e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
15e8d8bef9SDimitry Andric
16e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h"
17e8d8bef9SDimitry Andric #include "llvm/ADT/Twine.h"
18fe6060f1SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
19e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
20e8d8bef9SDimitry Andric #include "llvm/IR/BasicBlock.h"
21e8d8bef9SDimitry Andric #include "llvm/IR/Constant.h"
22e8d8bef9SDimitry Andric #include "llvm/IR/Constants.h"
23e8d8bef9SDimitry Andric #include "llvm/IR/DerivedTypes.h"
24fe6060f1SDimitry Andric #include "llvm/IR/Dominators.h"
25e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
26e8d8bef9SDimitry Andric #include "llvm/IR/IRBuilder.h"
27e8d8bef9SDimitry Andric #include "llvm/IR/Instruction.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/Instructions.h"
29e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
30e8d8bef9SDimitry Andric #include "llvm/IR/Type.h"
31e8d8bef9SDimitry Andric #include "llvm/IR/Value.h"
32e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
33e8d8bef9SDimitry Andric #include "llvm/Pass.h"
34e8d8bef9SDimitry Andric #include "llvm/Support/Casting.h"
35e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar.h"
36fe6060f1SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37e8d8bef9SDimitry Andric #include <cassert>
38bdd1243dSDimitry Andric #include <optional>
39e8d8bef9SDimitry Andric
40e8d8bef9SDimitry Andric using namespace llvm;
41e8d8bef9SDimitry Andric
42e8d8bef9SDimitry Andric #define DEBUG_TYPE "scalarize-masked-mem-intrin"
43e8d8bef9SDimitry Andric
44e8d8bef9SDimitry Andric namespace {
45e8d8bef9SDimitry Andric
46e8d8bef9SDimitry Andric class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
47e8d8bef9SDimitry Andric public:
48e8d8bef9SDimitry Andric static char ID; // Pass identification, replacement for typeid
49e8d8bef9SDimitry Andric
ScalarizeMaskedMemIntrinLegacyPass()50e8d8bef9SDimitry Andric explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
51e8d8bef9SDimitry Andric initializeScalarizeMaskedMemIntrinLegacyPassPass(
52e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry());
53e8d8bef9SDimitry Andric }
54e8d8bef9SDimitry Andric
55e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override;
56e8d8bef9SDimitry Andric
getPassName() const57e8d8bef9SDimitry Andric StringRef getPassName() const override {
58e8d8bef9SDimitry Andric return "Scalarize Masked Memory Intrinsics";
59e8d8bef9SDimitry Andric }
60e8d8bef9SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const61e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
62e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
63fe6060f1SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
64e8d8bef9SDimitry Andric }
65e8d8bef9SDimitry Andric };
66e8d8bef9SDimitry Andric
67e8d8bef9SDimitry Andric } // end anonymous namespace
68e8d8bef9SDimitry Andric
69e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
70fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL,
71fe6060f1SDimitry Andric DomTreeUpdater *DTU);
72e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
73e8d8bef9SDimitry Andric const TargetTransformInfo &TTI,
74fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU);
75e8d8bef9SDimitry Andric
76e8d8bef9SDimitry Andric char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
77e8d8bef9SDimitry Andric
78e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
79e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false,
80e8d8bef9SDimitry Andric false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)81e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
82fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
83e8d8bef9SDimitry Andric INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
84e8d8bef9SDimitry Andric "Scalarize unsupported masked memory intrinsics", false,
85e8d8bef9SDimitry Andric false)
86e8d8bef9SDimitry Andric
87e8d8bef9SDimitry Andric FunctionPass *llvm::createScalarizeMaskedMemIntrinLegacyPass() {
88e8d8bef9SDimitry Andric return new ScalarizeMaskedMemIntrinLegacyPass();
89e8d8bef9SDimitry Andric }
90e8d8bef9SDimitry Andric
isConstantIntVector(Value * Mask)91e8d8bef9SDimitry Andric static bool isConstantIntVector(Value *Mask) {
92e8d8bef9SDimitry Andric Constant *C = dyn_cast<Constant>(Mask);
93e8d8bef9SDimitry Andric if (!C)
94e8d8bef9SDimitry Andric return false;
95e8d8bef9SDimitry Andric
96e8d8bef9SDimitry Andric unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
97e8d8bef9SDimitry Andric for (unsigned i = 0; i != NumElts; ++i) {
98e8d8bef9SDimitry Andric Constant *CElt = C->getAggregateElement(i);
99e8d8bef9SDimitry Andric if (!CElt || !isa<ConstantInt>(CElt))
100e8d8bef9SDimitry Andric return false;
101e8d8bef9SDimitry Andric }
102e8d8bef9SDimitry Andric
103e8d8bef9SDimitry Andric return true;
104e8d8bef9SDimitry Andric }
105e8d8bef9SDimitry Andric
adjustForEndian(const DataLayout & DL,unsigned VectorWidth,unsigned Idx)106fe6060f1SDimitry Andric static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
107fe6060f1SDimitry Andric unsigned Idx) {
108fe6060f1SDimitry Andric return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
109fe6060f1SDimitry Andric }
110fe6060f1SDimitry Andric
111e8d8bef9SDimitry Andric // Translate a masked load intrinsic like
112e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
113e8d8bef9SDimitry Andric // <16 x i1> %mask, <16 x i32> %passthru)
114e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
115e8d8bef9SDimitry Andric // the appropriate mask bit is set
116e8d8bef9SDimitry Andric //
117e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32*
118e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0
119e8d8bef9SDimitry Andric // br i1 %2, label %cond.load, label %else
120e8d8bef9SDimitry Andric //
121e8d8bef9SDimitry Andric // cond.load: ; preds = %0
122e8d8bef9SDimitry Andric // %3 = getelementptr i32* %1, i32 0
123e8d8bef9SDimitry Andric // %4 = load i32* %3
124e8d8bef9SDimitry Andric // %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
125e8d8bef9SDimitry Andric // br label %else
126e8d8bef9SDimitry Andric //
127e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.load
12806c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
129e8d8bef9SDimitry Andric // %6 = extractelement <16 x i1> %mask, i32 1
130e8d8bef9SDimitry Andric // br i1 %6, label %cond.load1, label %else2
131e8d8bef9SDimitry Andric //
132e8d8bef9SDimitry Andric // cond.load1: ; preds = %else
133e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1
134e8d8bef9SDimitry Andric // %8 = load i32* %7
135e8d8bef9SDimitry Andric // %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
136e8d8bef9SDimitry Andric // br label %else2
137e8d8bef9SDimitry Andric //
138e8d8bef9SDimitry Andric // else2: ; preds = %else, %cond.load1
139e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
140e8d8bef9SDimitry Andric // %10 = extractelement <16 x i1> %mask, i32 2
141e8d8bef9SDimitry Andric // br i1 %10, label %cond.load4, label %else5
142e8d8bef9SDimitry Andric //
scalarizeMaskedLoad(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)143fe6060f1SDimitry Andric static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
144fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) {
145e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0);
146e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1);
147e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2);
148e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3);
149e8d8bef9SDimitry Andric
150e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
151e8d8bef9SDimitry Andric VectorType *VecType = cast<FixedVectorType>(CI->getType());
152e8d8bef9SDimitry Andric
153e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType();
154e8d8bef9SDimitry Andric
155e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
156e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
157e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent();
158e8d8bef9SDimitry Andric
159e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
160e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
161e8d8bef9SDimitry Andric
162e8d8bef9SDimitry Andric // Short-cut if the mask is all-true.
163e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
164e8d8bef9SDimitry Andric Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
165e8d8bef9SDimitry Andric CI->replaceAllUsesWith(NewI);
166e8d8bef9SDimitry Andric CI->eraseFromParent();
167e8d8bef9SDimitry Andric return;
168e8d8bef9SDimitry Andric }
169e8d8bef9SDimitry Andric
170e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction.
171e8d8bef9SDimitry Andric const Align AdjustedAlignVal =
172e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
173e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
174e8d8bef9SDimitry Andric
175e8d8bef9SDimitry Andric // The result vector
176e8d8bef9SDimitry Andric Value *VResult = Src0;
177e8d8bef9SDimitry Andric
178e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
179e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
180e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
181e8d8bef9SDimitry Andric continue;
18206c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
183e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
184e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, Load, Idx);
185e8d8bef9SDimitry Andric }
186e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
187e8d8bef9SDimitry Andric CI->eraseFromParent();
188e8d8bef9SDimitry Andric return;
189e8d8bef9SDimitry Andric }
190e8d8bef9SDimitry Andric
191e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
192e8d8bef9SDimitry Andric // better results on X86 at least.
193e8d8bef9SDimitry Andric Value *SclrMask;
194e8d8bef9SDimitry Andric if (VectorWidth != 1) {
195e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
196e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
197e8d8bef9SDimitry Andric }
198e8d8bef9SDimitry Andric
199e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
200e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
201e8d8bef9SDimitry Andric //
202e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
203e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
204e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0
205e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else
206e8d8bef9SDimitry Andric //
207e8d8bef9SDimitry Andric Value *Predicate;
208e8d8bef9SDimitry Andric if (VectorWidth != 1) {
209fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
210fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
211e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
212e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
213e8d8bef9SDimitry Andric } else {
214e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx);
215e8d8bef9SDimitry Andric }
216e8d8bef9SDimitry Andric
217e8d8bef9SDimitry Andric // Create "cond" block
218e8d8bef9SDimitry Andric //
219e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0
220e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr
221e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
222e8d8bef9SDimitry Andric //
223fe6060f1SDimitry Andric Instruction *ThenTerm =
224fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
225fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
226e8d8bef9SDimitry Andric
227fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
228fe6060f1SDimitry Andric CondBlock->setName("cond.load");
229fe6060f1SDimitry Andric
230fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
23106c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
232e8d8bef9SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
233e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
234e8d8bef9SDimitry Andric
235e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
236fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
237fe6060f1SDimitry Andric NewIfBlock->setName("else");
238e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock;
239e8d8bef9SDimitry Andric IfBlock = NewIfBlock;
240e8d8bef9SDimitry Andric
241e8d8bef9SDimitry Andric // Create the phi to join the new and previous value.
242fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
243e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
244e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock);
245e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock);
246e8d8bef9SDimitry Andric VResult = Phi;
247e8d8bef9SDimitry Andric }
248e8d8bef9SDimitry Andric
249e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
250e8d8bef9SDimitry Andric CI->eraseFromParent();
251e8d8bef9SDimitry Andric
252e8d8bef9SDimitry Andric ModifiedDT = true;
253e8d8bef9SDimitry Andric }
254e8d8bef9SDimitry Andric
255e8d8bef9SDimitry Andric // Translate a masked store intrinsic, like
256e8d8bef9SDimitry Andric // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
257e8d8bef9SDimitry Andric // <16 x i1> %mask)
258e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
259e8d8bef9SDimitry Andric // the appropriate mask bit is set
260e8d8bef9SDimitry Andric //
261e8d8bef9SDimitry Andric // %1 = bitcast i8* %addr to i32*
262e8d8bef9SDimitry Andric // %2 = extractelement <16 x i1> %mask, i32 0
263e8d8bef9SDimitry Andric // br i1 %2, label %cond.store, label %else
264e8d8bef9SDimitry Andric //
265e8d8bef9SDimitry Andric // cond.store: ; preds = %0
266e8d8bef9SDimitry Andric // %3 = extractelement <16 x i32> %val, i32 0
267e8d8bef9SDimitry Andric // %4 = getelementptr i32* %1, i32 0
268e8d8bef9SDimitry Andric // store i32 %3, i32* %4
269e8d8bef9SDimitry Andric // br label %else
270e8d8bef9SDimitry Andric //
271e8d8bef9SDimitry Andric // else: ; preds = %0, %cond.store
272e8d8bef9SDimitry Andric // %5 = extractelement <16 x i1> %mask, i32 1
273e8d8bef9SDimitry Andric // br i1 %5, label %cond.store1, label %else2
274e8d8bef9SDimitry Andric //
275e8d8bef9SDimitry Andric // cond.store1: ; preds = %else
276e8d8bef9SDimitry Andric // %6 = extractelement <16 x i32> %val, i32 1
277e8d8bef9SDimitry Andric // %7 = getelementptr i32* %1, i32 1
278e8d8bef9SDimitry Andric // store i32 %6, i32* %7
279e8d8bef9SDimitry Andric // br label %else2
280e8d8bef9SDimitry Andric // . . .
scalarizeMaskedStore(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)281fe6060f1SDimitry Andric static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
282fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) {
283e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0);
284e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1);
285e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2);
286e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3);
287e8d8bef9SDimitry Andric
288e8d8bef9SDimitry Andric const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
289e8d8bef9SDimitry Andric auto *VecType = cast<VectorType>(Src->getType());
290e8d8bef9SDimitry Andric
291e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType();
292e8d8bef9SDimitry Andric
293e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
294e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
295e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
296e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
297e8d8bef9SDimitry Andric
298e8d8bef9SDimitry Andric // Short-cut if the mask is all-true.
299e8d8bef9SDimitry Andric if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
300e8d8bef9SDimitry Andric Builder.CreateAlignedStore(Src, Ptr, AlignVal);
301e8d8bef9SDimitry Andric CI->eraseFromParent();
302e8d8bef9SDimitry Andric return;
303e8d8bef9SDimitry Andric }
304e8d8bef9SDimitry Andric
305e8d8bef9SDimitry Andric // Adjust alignment for the scalar instruction.
306e8d8bef9SDimitry Andric const Align AdjustedAlignVal =
307e8d8bef9SDimitry Andric commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
308e8d8bef9SDimitry Andric unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
309e8d8bef9SDimitry Andric
310e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
311e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
312e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
313e8d8bef9SDimitry Andric continue;
314e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx);
31506c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
316e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
317e8d8bef9SDimitry Andric }
318e8d8bef9SDimitry Andric CI->eraseFromParent();
319e8d8bef9SDimitry Andric return;
320e8d8bef9SDimitry Andric }
321e8d8bef9SDimitry Andric
322e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
323e8d8bef9SDimitry Andric // better results on X86 at least.
324e8d8bef9SDimitry Andric Value *SclrMask;
325e8d8bef9SDimitry Andric if (VectorWidth != 1) {
326e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
327e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
328e8d8bef9SDimitry Andric }
329e8d8bef9SDimitry Andric
330e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
331e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
332e8d8bef9SDimitry Andric //
333e8d8bef9SDimitry Andric // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
334e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0
335e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else
336e8d8bef9SDimitry Andric //
337e8d8bef9SDimitry Andric Value *Predicate;
338e8d8bef9SDimitry Andric if (VectorWidth != 1) {
339fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
340fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
341e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
342e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
343e8d8bef9SDimitry Andric } else {
344e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx);
345e8d8bef9SDimitry Andric }
346e8d8bef9SDimitry Andric
347e8d8bef9SDimitry Andric // Create "cond" block
348e8d8bef9SDimitry Andric //
349e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx
350e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0
351e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr
352e8d8bef9SDimitry Andric //
353fe6060f1SDimitry Andric Instruction *ThenTerm =
354fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
355fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
356e8d8bef9SDimitry Andric
357fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
358fe6060f1SDimitry Andric CondBlock->setName("cond.store");
359fe6060f1SDimitry Andric
360fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
361e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx);
36206c3fb27SDimitry Andric Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
363e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
364e8d8bef9SDimitry Andric
365e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
366fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
367fe6060f1SDimitry Andric NewIfBlock->setName("else");
368fe6060f1SDimitry Andric
369fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
370e8d8bef9SDimitry Andric }
371e8d8bef9SDimitry Andric CI->eraseFromParent();
372e8d8bef9SDimitry Andric
373e8d8bef9SDimitry Andric ModifiedDT = true;
374e8d8bef9SDimitry Andric }
375e8d8bef9SDimitry Andric
376e8d8bef9SDimitry Andric // Translate a masked gather intrinsic like
377e8d8bef9SDimitry Andric // <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
378e8d8bef9SDimitry Andric // <16 x i1> %Mask, <16 x i32> %Src)
379e8d8bef9SDimitry Andric // to a chain of basic blocks, with loading element one-by-one if
380e8d8bef9SDimitry Andric // the appropriate mask bit is set
381e8d8bef9SDimitry Andric //
382e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
383e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
384e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.load, label %else
385e8d8bef9SDimitry Andric //
386e8d8bef9SDimitry Andric // cond.load:
387e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
388e8d8bef9SDimitry Andric // %Load0 = load i32, i32* %Ptr0, align 4
38906c3fb27SDimitry Andric // %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
390e8d8bef9SDimitry Andric // br label %else
391e8d8bef9SDimitry Andric //
392e8d8bef9SDimitry Andric // else:
39306c3fb27SDimitry Andric // %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
394e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
395e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load1, label %else2
396e8d8bef9SDimitry Andric //
397e8d8bef9SDimitry Andric // cond.load1:
398e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
399e8d8bef9SDimitry Andric // %Load1 = load i32, i32* %Ptr1, align 4
400e8d8bef9SDimitry Andric // %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
401e8d8bef9SDimitry Andric // br label %else2
402e8d8bef9SDimitry Andric // . . .
403e8d8bef9SDimitry Andric // %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
404e8d8bef9SDimitry Andric // ret <16 x i32> %Result
scalarizeMaskedGather(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)405fe6060f1SDimitry Andric static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI,
406fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) {
407e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(0);
408e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(1);
409e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2);
410e8d8bef9SDimitry Andric Value *Src0 = CI->getArgOperand(3);
411e8d8bef9SDimitry Andric
412e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType());
413e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType();
414e8d8bef9SDimitry Andric
415e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
416e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
417e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent();
418e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
419e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
420e8d8bef9SDimitry Andric
421e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
422e8d8bef9SDimitry Andric
423e8d8bef9SDimitry Andric // The result vector
424e8d8bef9SDimitry Andric Value *VResult = Src0;
425e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements();
426e8d8bef9SDimitry Andric
427e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants.
428e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
429e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
430e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
431e8d8bef9SDimitry Andric continue;
432e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
433e8d8bef9SDimitry Andric LoadInst *Load =
434e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
435e8d8bef9SDimitry Andric VResult =
436e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
437e8d8bef9SDimitry Andric }
438e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
439e8d8bef9SDimitry Andric CI->eraseFromParent();
440e8d8bef9SDimitry Andric return;
441e8d8bef9SDimitry Andric }
442e8d8bef9SDimitry Andric
443e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
444e8d8bef9SDimitry Andric // better results on X86 at least.
445e8d8bef9SDimitry Andric Value *SclrMask;
446e8d8bef9SDimitry Andric if (VectorWidth != 1) {
447e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
448e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
449e8d8bef9SDimitry Andric }
450e8d8bef9SDimitry Andric
451e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
452e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
453e8d8bef9SDimitry Andric //
454e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
455e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0
456e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.load, label %else
457e8d8bef9SDimitry Andric //
458e8d8bef9SDimitry Andric
459e8d8bef9SDimitry Andric Value *Predicate;
460e8d8bef9SDimitry Andric if (VectorWidth != 1) {
461fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
462fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
463e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
464e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
465e8d8bef9SDimitry Andric } else {
466e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
467e8d8bef9SDimitry Andric }
468e8d8bef9SDimitry Andric
469e8d8bef9SDimitry Andric // Create "cond" block
470e8d8bef9SDimitry Andric //
471e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0
472e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr
473e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
474e8d8bef9SDimitry Andric //
475fe6060f1SDimitry Andric Instruction *ThenTerm =
476fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
477fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
478e8d8bef9SDimitry Andric
479fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
480fe6060f1SDimitry Andric CondBlock->setName("cond.load");
481fe6060f1SDimitry Andric
482fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
483e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
484e8d8bef9SDimitry Andric LoadInst *Load =
485e8d8bef9SDimitry Andric Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
486e8d8bef9SDimitry Andric Value *NewVResult =
487e8d8bef9SDimitry Andric Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
488e8d8bef9SDimitry Andric
489e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
490fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
491fe6060f1SDimitry Andric NewIfBlock->setName("else");
492e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock;
493e8d8bef9SDimitry Andric IfBlock = NewIfBlock;
494e8d8bef9SDimitry Andric
495fe6060f1SDimitry Andric // Create the phi to join the new and previous value.
496fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
497e8d8bef9SDimitry Andric PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
498e8d8bef9SDimitry Andric Phi->addIncoming(NewVResult, CondBlock);
499e8d8bef9SDimitry Andric Phi->addIncoming(VResult, PrevIfBlock);
500e8d8bef9SDimitry Andric VResult = Phi;
501e8d8bef9SDimitry Andric }
502e8d8bef9SDimitry Andric
503e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
504e8d8bef9SDimitry Andric CI->eraseFromParent();
505e8d8bef9SDimitry Andric
506e8d8bef9SDimitry Andric ModifiedDT = true;
507e8d8bef9SDimitry Andric }
508e8d8bef9SDimitry Andric
509e8d8bef9SDimitry Andric // Translate a masked scatter intrinsic, like
510e8d8bef9SDimitry Andric // void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
511e8d8bef9SDimitry Andric // <16 x i1> %Mask)
512e8d8bef9SDimitry Andric // to a chain of basic blocks, that stores element one-by-one if
513e8d8bef9SDimitry Andric // the appropriate mask bit is set.
514e8d8bef9SDimitry Andric //
515e8d8bef9SDimitry Andric // %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
516e8d8bef9SDimitry Andric // %Mask0 = extractelement <16 x i1> %Mask, i32 0
517e8d8bef9SDimitry Andric // br i1 %Mask0, label %cond.store, label %else
518e8d8bef9SDimitry Andric //
519e8d8bef9SDimitry Andric // cond.store:
520e8d8bef9SDimitry Andric // %Elt0 = extractelement <16 x i32> %Src, i32 0
521e8d8bef9SDimitry Andric // %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
522e8d8bef9SDimitry Andric // store i32 %Elt0, i32* %Ptr0, align 4
523e8d8bef9SDimitry Andric // br label %else
524e8d8bef9SDimitry Andric //
525e8d8bef9SDimitry Andric // else:
526e8d8bef9SDimitry Andric // %Mask1 = extractelement <16 x i1> %Mask, i32 1
527e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store1, label %else2
528e8d8bef9SDimitry Andric //
529e8d8bef9SDimitry Andric // cond.store1:
530e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
531e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
532e8d8bef9SDimitry Andric // store i32 %Elt1, i32* %Ptr1, align 4
533e8d8bef9SDimitry Andric // br label %else2
534e8d8bef9SDimitry Andric // . . .
scalarizeMaskedScatter(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)535fe6060f1SDimitry Andric static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI,
536fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) {
537e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0);
538e8d8bef9SDimitry Andric Value *Ptrs = CI->getArgOperand(1);
539e8d8bef9SDimitry Andric Value *Alignment = CI->getArgOperand(2);
540e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(3);
541e8d8bef9SDimitry Andric
542e8d8bef9SDimitry Andric auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
543e8d8bef9SDimitry Andric
544e8d8bef9SDimitry Andric assert(
545e8d8bef9SDimitry Andric isa<VectorType>(Ptrs->getType()) &&
546e8d8bef9SDimitry Andric isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
547e8d8bef9SDimitry Andric "Vector of pointers is expected in masked scatter intrinsic");
548e8d8bef9SDimitry Andric
549e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
550e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
551e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
552e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
553e8d8bef9SDimitry Andric
554e8d8bef9SDimitry Andric MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
555e8d8bef9SDimitry Andric unsigned VectorWidth = SrcFVTy->getNumElements();
556e8d8bef9SDimitry Andric
557e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants.
558e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
559e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
560e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
561e8d8bef9SDimitry Andric continue;
562e8d8bef9SDimitry Andric Value *OneElt =
563e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
564e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
565e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
566e8d8bef9SDimitry Andric }
567e8d8bef9SDimitry Andric CI->eraseFromParent();
568e8d8bef9SDimitry Andric return;
569e8d8bef9SDimitry Andric }
570e8d8bef9SDimitry Andric
571e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
572e8d8bef9SDimitry Andric // better results on X86 at least.
573e8d8bef9SDimitry Andric Value *SclrMask;
574e8d8bef9SDimitry Andric if (VectorWidth != 1) {
575e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
576e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
577e8d8bef9SDimitry Andric }
578e8d8bef9SDimitry Andric
579e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
580e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
581e8d8bef9SDimitry Andric //
582e8d8bef9SDimitry Andric // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
583e8d8bef9SDimitry Andric // %cond = icmp ne i16 %mask_1, 0
584e8d8bef9SDimitry Andric // br i1 %Mask1, label %cond.store, label %else
585e8d8bef9SDimitry Andric //
586e8d8bef9SDimitry Andric Value *Predicate;
587e8d8bef9SDimitry Andric if (VectorWidth != 1) {
588fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
589fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
590e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
591e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
592e8d8bef9SDimitry Andric } else {
593e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
594e8d8bef9SDimitry Andric }
595e8d8bef9SDimitry Andric
596e8d8bef9SDimitry Andric // Create "cond" block
597e8d8bef9SDimitry Andric //
598e8d8bef9SDimitry Andric // %Elt1 = extractelement <16 x i32> %Src, i32 1
599e8d8bef9SDimitry Andric // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
600e8d8bef9SDimitry Andric // %store i32 %Elt1, i32* %Ptr1
601e8d8bef9SDimitry Andric //
602fe6060f1SDimitry Andric Instruction *ThenTerm =
603fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
604fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
605e8d8bef9SDimitry Andric
606fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
607fe6060f1SDimitry Andric CondBlock->setName("cond.store");
608fe6060f1SDimitry Andric
609fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
610e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
611e8d8bef9SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
612e8d8bef9SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
613e8d8bef9SDimitry Andric
614e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
615fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
616fe6060f1SDimitry Andric NewIfBlock->setName("else");
617fe6060f1SDimitry Andric
618fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
619e8d8bef9SDimitry Andric }
620e8d8bef9SDimitry Andric CI->eraseFromParent();
621e8d8bef9SDimitry Andric
622e8d8bef9SDimitry Andric ModifiedDT = true;
623e8d8bef9SDimitry Andric }
624e8d8bef9SDimitry Andric
scalarizeMaskedExpandLoad(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)625fe6060f1SDimitry Andric static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
626fe6060f1SDimitry Andric DomTreeUpdater *DTU, bool &ModifiedDT) {
627e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(0);
628e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(1);
629e8d8bef9SDimitry Andric Value *PassThru = CI->getArgOperand(2);
630*0fca6ea1SDimitry Andric Align Alignment = CI->getParamAlign(0).valueOrOne();
631e8d8bef9SDimitry Andric
632e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(CI->getType());
633e8d8bef9SDimitry Andric
634e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType();
635e8d8bef9SDimitry Andric
636e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
637e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
638e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent();
639e8d8bef9SDimitry Andric
640e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
641e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
642e8d8bef9SDimitry Andric
643e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements();
644e8d8bef9SDimitry Andric
645e8d8bef9SDimitry Andric // The result vector
646e8d8bef9SDimitry Andric Value *VResult = PassThru;
647e8d8bef9SDimitry Andric
648*0fca6ea1SDimitry Andric // Adjust alignment for the scalar instruction.
649*0fca6ea1SDimitry Andric const Align AdjustedAlignment =
650*0fca6ea1SDimitry Andric commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
651*0fca6ea1SDimitry Andric
652e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants.
65306c3fb27SDimitry Andric // Create a build_vector pattern, with loads/poisons as necessary and then
654e8d8bef9SDimitry Andric // shuffle blend with the pass through value.
655e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
656e8d8bef9SDimitry Andric unsigned MemIndex = 0;
657bdd1243dSDimitry Andric VResult = PoisonValue::get(VecType);
65806c3fb27SDimitry Andric SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
659e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
660e8d8bef9SDimitry Andric Value *InsertElt;
661e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
66206c3fb27SDimitry Andric InsertElt = PoisonValue::get(EltTy);
663e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx + VectorWidth;
664e8d8bef9SDimitry Andric } else {
665e8d8bef9SDimitry Andric Value *NewPtr =
666e8d8bef9SDimitry Andric Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
667*0fca6ea1SDimitry Andric InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
668e8d8bef9SDimitry Andric "Load" + Twine(Idx));
669e8d8bef9SDimitry Andric ShuffleMask[Idx] = Idx;
670e8d8bef9SDimitry Andric ++MemIndex;
671e8d8bef9SDimitry Andric }
672e8d8bef9SDimitry Andric VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
673e8d8bef9SDimitry Andric "Res" + Twine(Idx));
674e8d8bef9SDimitry Andric }
675e8d8bef9SDimitry Andric VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
676e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
677e8d8bef9SDimitry Andric CI->eraseFromParent();
678e8d8bef9SDimitry Andric return;
679e8d8bef9SDimitry Andric }
680e8d8bef9SDimitry Andric
681e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
682e8d8bef9SDimitry Andric // better results on X86 at least.
683e8d8bef9SDimitry Andric Value *SclrMask;
684e8d8bef9SDimitry Andric if (VectorWidth != 1) {
685e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
686e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
687e8d8bef9SDimitry Andric }
688e8d8bef9SDimitry Andric
689e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
690e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
691e8d8bef9SDimitry Andric //
692e8d8bef9SDimitry Andric // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
693e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
694e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.load, label %else
695e8d8bef9SDimitry Andric //
696e8d8bef9SDimitry Andric
697e8d8bef9SDimitry Andric Value *Predicate;
698e8d8bef9SDimitry Andric if (VectorWidth != 1) {
699fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
700fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
701e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
702e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
703e8d8bef9SDimitry Andric } else {
704e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
705e8d8bef9SDimitry Andric }
706e8d8bef9SDimitry Andric
707e8d8bef9SDimitry Andric // Create "cond" block
708e8d8bef9SDimitry Andric //
709e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0
710e8d8bef9SDimitry Andric // %Elt = load i32* %EltAddr
711e8d8bef9SDimitry Andric // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
712e8d8bef9SDimitry Andric //
713fe6060f1SDimitry Andric Instruction *ThenTerm =
714fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
715fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
716e8d8bef9SDimitry Andric
717fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
718fe6060f1SDimitry Andric CondBlock->setName("cond.load");
719fe6060f1SDimitry Andric
720fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
721*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
722e8d8bef9SDimitry Andric Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
723e8d8bef9SDimitry Andric
724e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come.
725e8d8bef9SDimitry Andric Value *NewPtr;
726e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth)
727e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
728e8d8bef9SDimitry Andric
729e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
730fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
731fe6060f1SDimitry Andric NewIfBlock->setName("else");
732e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock;
733e8d8bef9SDimitry Andric IfBlock = NewIfBlock;
734e8d8bef9SDimitry Andric
735e8d8bef9SDimitry Andric // Create the phi to join the new and previous value.
736fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
737e8d8bef9SDimitry Andric PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
738e8d8bef9SDimitry Andric ResultPhi->addIncoming(NewVResult, CondBlock);
739e8d8bef9SDimitry Andric ResultPhi->addIncoming(VResult, PrevIfBlock);
740e8d8bef9SDimitry Andric VResult = ResultPhi;
741e8d8bef9SDimitry Andric
742e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration.
743e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) {
744e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
745e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock);
746e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock);
747e8d8bef9SDimitry Andric Ptr = PtrPhi;
748e8d8bef9SDimitry Andric }
749e8d8bef9SDimitry Andric }
750e8d8bef9SDimitry Andric
751e8d8bef9SDimitry Andric CI->replaceAllUsesWith(VResult);
752e8d8bef9SDimitry Andric CI->eraseFromParent();
753e8d8bef9SDimitry Andric
754e8d8bef9SDimitry Andric ModifiedDT = true;
755e8d8bef9SDimitry Andric }
756e8d8bef9SDimitry Andric
scalarizeMaskedCompressStore(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)757fe6060f1SDimitry Andric static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI,
758fe6060f1SDimitry Andric DomTreeUpdater *DTU,
759fe6060f1SDimitry Andric bool &ModifiedDT) {
760e8d8bef9SDimitry Andric Value *Src = CI->getArgOperand(0);
761e8d8bef9SDimitry Andric Value *Ptr = CI->getArgOperand(1);
762e8d8bef9SDimitry Andric Value *Mask = CI->getArgOperand(2);
763*0fca6ea1SDimitry Andric Align Alignment = CI->getParamAlign(1).valueOrOne();
764e8d8bef9SDimitry Andric
765e8d8bef9SDimitry Andric auto *VecType = cast<FixedVectorType>(Src->getType());
766e8d8bef9SDimitry Andric
767e8d8bef9SDimitry Andric IRBuilder<> Builder(CI->getContext());
768e8d8bef9SDimitry Andric Instruction *InsertPt = CI;
769e8d8bef9SDimitry Andric BasicBlock *IfBlock = CI->getParent();
770e8d8bef9SDimitry Andric
771e8d8bef9SDimitry Andric Builder.SetInsertPoint(InsertPt);
772e8d8bef9SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
773e8d8bef9SDimitry Andric
774e8d8bef9SDimitry Andric Type *EltTy = VecType->getElementType();
775e8d8bef9SDimitry Andric
776*0fca6ea1SDimitry Andric // Adjust alignment for the scalar instruction.
777*0fca6ea1SDimitry Andric const Align AdjustedAlignment =
778*0fca6ea1SDimitry Andric commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
779*0fca6ea1SDimitry Andric
780e8d8bef9SDimitry Andric unsigned VectorWidth = VecType->getNumElements();
781e8d8bef9SDimitry Andric
782e8d8bef9SDimitry Andric // Shorten the way if the mask is a vector of constants.
783e8d8bef9SDimitry Andric if (isConstantIntVector(Mask)) {
784e8d8bef9SDimitry Andric unsigned MemIndex = 0;
785e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
786e8d8bef9SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
787e8d8bef9SDimitry Andric continue;
788e8d8bef9SDimitry Andric Value *OneElt =
789e8d8bef9SDimitry Andric Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
790e8d8bef9SDimitry Andric Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
791*0fca6ea1SDimitry Andric Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
792e8d8bef9SDimitry Andric ++MemIndex;
793e8d8bef9SDimitry Andric }
794e8d8bef9SDimitry Andric CI->eraseFromParent();
795e8d8bef9SDimitry Andric return;
796e8d8bef9SDimitry Andric }
797e8d8bef9SDimitry Andric
798e8d8bef9SDimitry Andric // If the mask is not v1i1, use scalar bit test operations. This generates
799e8d8bef9SDimitry Andric // better results on X86 at least.
800e8d8bef9SDimitry Andric Value *SclrMask;
801e8d8bef9SDimitry Andric if (VectorWidth != 1) {
802e8d8bef9SDimitry Andric Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
803e8d8bef9SDimitry Andric SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
804e8d8bef9SDimitry Andric }
805e8d8bef9SDimitry Andric
806e8d8bef9SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
807e8d8bef9SDimitry Andric // Fill the "else" block, created in the previous iteration
808e8d8bef9SDimitry Andric //
809e8d8bef9SDimitry Andric // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
810e8d8bef9SDimitry Andric // br i1 %mask_1, label %cond.store, label %else
811e8d8bef9SDimitry Andric //
812e8d8bef9SDimitry Andric Value *Predicate;
813e8d8bef9SDimitry Andric if (VectorWidth != 1) {
814fe6060f1SDimitry Andric Value *Mask = Builder.getInt(APInt::getOneBitSet(
815fe6060f1SDimitry Andric VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
816e8d8bef9SDimitry Andric Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
817e8d8bef9SDimitry Andric Builder.getIntN(VectorWidth, 0));
818e8d8bef9SDimitry Andric } else {
819e8d8bef9SDimitry Andric Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
820e8d8bef9SDimitry Andric }
821e8d8bef9SDimitry Andric
822e8d8bef9SDimitry Andric // Create "cond" block
823e8d8bef9SDimitry Andric //
824e8d8bef9SDimitry Andric // %OneElt = extractelement <16 x i32> %Src, i32 Idx
825e8d8bef9SDimitry Andric // %EltAddr = getelementptr i32* %1, i32 0
826e8d8bef9SDimitry Andric // %store i32 %OneElt, i32* %EltAddr
827e8d8bef9SDimitry Andric //
828fe6060f1SDimitry Andric Instruction *ThenTerm =
829fe6060f1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
830fe6060f1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
831e8d8bef9SDimitry Andric
832fe6060f1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
833fe6060f1SDimitry Andric CondBlock->setName("cond.store");
834fe6060f1SDimitry Andric
835fe6060f1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
836e8d8bef9SDimitry Andric Value *OneElt = Builder.CreateExtractElement(Src, Idx);
837*0fca6ea1SDimitry Andric Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
838e8d8bef9SDimitry Andric
839e8d8bef9SDimitry Andric // Move the pointer if there are more blocks to come.
840e8d8bef9SDimitry Andric Value *NewPtr;
841e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth)
842e8d8bef9SDimitry Andric NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
843e8d8bef9SDimitry Andric
844e8d8bef9SDimitry Andric // Create "else" block, fill it in the next iteration
845fe6060f1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
846fe6060f1SDimitry Andric NewIfBlock->setName("else");
847e8d8bef9SDimitry Andric BasicBlock *PrevIfBlock = IfBlock;
848e8d8bef9SDimitry Andric IfBlock = NewIfBlock;
849e8d8bef9SDimitry Andric
850fe6060f1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
851fe6060f1SDimitry Andric
852e8d8bef9SDimitry Andric // Add a PHI for the pointer if this isn't the last iteration.
853e8d8bef9SDimitry Andric if ((Idx + 1) != VectorWidth) {
854e8d8bef9SDimitry Andric PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
855e8d8bef9SDimitry Andric PtrPhi->addIncoming(NewPtr, CondBlock);
856e8d8bef9SDimitry Andric PtrPhi->addIncoming(Ptr, PrevIfBlock);
857e8d8bef9SDimitry Andric Ptr = PtrPhi;
858e8d8bef9SDimitry Andric }
859e8d8bef9SDimitry Andric }
860e8d8bef9SDimitry Andric CI->eraseFromParent();
861e8d8bef9SDimitry Andric
862e8d8bef9SDimitry Andric ModifiedDT = true;
863e8d8bef9SDimitry Andric }
864e8d8bef9SDimitry Andric
scalarizeMaskedVectorHistogram(const DataLayout & DL,CallInst * CI,DomTreeUpdater * DTU,bool & ModifiedDT)865*0fca6ea1SDimitry Andric static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI,
866*0fca6ea1SDimitry Andric DomTreeUpdater *DTU,
867*0fca6ea1SDimitry Andric bool &ModifiedDT) {
868*0fca6ea1SDimitry Andric // If we extend histogram to return a result someday (like the updated vector)
869*0fca6ea1SDimitry Andric // then we'll need to support it here.
870*0fca6ea1SDimitry Andric assert(CI->getType()->isVoidTy() && "Histogram with non-void return.");
871*0fca6ea1SDimitry Andric Value *Ptrs = CI->getArgOperand(0);
872*0fca6ea1SDimitry Andric Value *Inc = CI->getArgOperand(1);
873*0fca6ea1SDimitry Andric Value *Mask = CI->getArgOperand(2);
874*0fca6ea1SDimitry Andric
875*0fca6ea1SDimitry Andric auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
876*0fca6ea1SDimitry Andric Type *EltTy = Inc->getType();
877*0fca6ea1SDimitry Andric
878*0fca6ea1SDimitry Andric IRBuilder<> Builder(CI->getContext());
879*0fca6ea1SDimitry Andric Instruction *InsertPt = CI;
880*0fca6ea1SDimitry Andric Builder.SetInsertPoint(InsertPt);
881*0fca6ea1SDimitry Andric
882*0fca6ea1SDimitry Andric Builder.SetCurrentDebugLocation(CI->getDebugLoc());
883*0fca6ea1SDimitry Andric
884*0fca6ea1SDimitry Andric // FIXME: Do we need to add an alignment parameter to the intrinsic?
885*0fca6ea1SDimitry Andric unsigned VectorWidth = AddrType->getNumElements();
886*0fca6ea1SDimitry Andric
887*0fca6ea1SDimitry Andric // Shorten the way if the mask is a vector of constants.
888*0fca6ea1SDimitry Andric if (isConstantIntVector(Mask)) {
889*0fca6ea1SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
890*0fca6ea1SDimitry Andric if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
891*0fca6ea1SDimitry Andric continue;
892*0fca6ea1SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
893*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
894*0fca6ea1SDimitry Andric Value *Add = Builder.CreateAdd(Load, Inc);
895*0fca6ea1SDimitry Andric Builder.CreateStore(Add, Ptr);
896*0fca6ea1SDimitry Andric }
897*0fca6ea1SDimitry Andric CI->eraseFromParent();
898*0fca6ea1SDimitry Andric return;
899*0fca6ea1SDimitry Andric }
900*0fca6ea1SDimitry Andric
901*0fca6ea1SDimitry Andric for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
902*0fca6ea1SDimitry Andric Value *Predicate =
903*0fca6ea1SDimitry Andric Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
904*0fca6ea1SDimitry Andric
905*0fca6ea1SDimitry Andric Instruction *ThenTerm =
906*0fca6ea1SDimitry Andric SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
907*0fca6ea1SDimitry Andric /*BranchWeights=*/nullptr, DTU);
908*0fca6ea1SDimitry Andric
909*0fca6ea1SDimitry Andric BasicBlock *CondBlock = ThenTerm->getParent();
910*0fca6ea1SDimitry Andric CondBlock->setName("cond.histogram.update");
911*0fca6ea1SDimitry Andric
912*0fca6ea1SDimitry Andric Builder.SetInsertPoint(CondBlock->getTerminator());
913*0fca6ea1SDimitry Andric Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
914*0fca6ea1SDimitry Andric LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
915*0fca6ea1SDimitry Andric Value *Add = Builder.CreateAdd(Load, Inc);
916*0fca6ea1SDimitry Andric Builder.CreateStore(Add, Ptr);
917*0fca6ea1SDimitry Andric
918*0fca6ea1SDimitry Andric // Create "else" block, fill it in the next iteration
919*0fca6ea1SDimitry Andric BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
920*0fca6ea1SDimitry Andric NewIfBlock->setName("else");
921*0fca6ea1SDimitry Andric Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
922*0fca6ea1SDimitry Andric }
923*0fca6ea1SDimitry Andric
924*0fca6ea1SDimitry Andric CI->eraseFromParent();
925*0fca6ea1SDimitry Andric ModifiedDT = true;
926*0fca6ea1SDimitry Andric }
927*0fca6ea1SDimitry Andric
runImpl(Function & F,const TargetTransformInfo & TTI,DominatorTree * DT)928fe6060f1SDimitry Andric static bool runImpl(Function &F, const TargetTransformInfo &TTI,
929fe6060f1SDimitry Andric DominatorTree *DT) {
930bdd1243dSDimitry Andric std::optional<DomTreeUpdater> DTU;
931fe6060f1SDimitry Andric if (DT)
932fe6060f1SDimitry Andric DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
933fe6060f1SDimitry Andric
934e8d8bef9SDimitry Andric bool EverMadeChange = false;
935e8d8bef9SDimitry Andric bool MadeChange = true;
936*0fca6ea1SDimitry Andric auto &DL = F.getDataLayout();
937e8d8bef9SDimitry Andric while (MadeChange) {
938e8d8bef9SDimitry Andric MadeChange = false;
939349cc55cSDimitry Andric for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
940e8d8bef9SDimitry Andric bool ModifiedDTOnIteration = false;
941349cc55cSDimitry Andric MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
942bdd1243dSDimitry Andric DTU ? &*DTU : nullptr);
943fe6060f1SDimitry Andric
944e8d8bef9SDimitry Andric // Restart BB iteration if the dominator tree of the Function was changed
945e8d8bef9SDimitry Andric if (ModifiedDTOnIteration)
946e8d8bef9SDimitry Andric break;
947e8d8bef9SDimitry Andric }
948e8d8bef9SDimitry Andric
949e8d8bef9SDimitry Andric EverMadeChange |= MadeChange;
950e8d8bef9SDimitry Andric }
951e8d8bef9SDimitry Andric return EverMadeChange;
952e8d8bef9SDimitry Andric }
953e8d8bef9SDimitry Andric
runOnFunction(Function & F)954e8d8bef9SDimitry Andric bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
955e8d8bef9SDimitry Andric auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
956fe6060f1SDimitry Andric DominatorTree *DT = nullptr;
957fe6060f1SDimitry Andric if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
958fe6060f1SDimitry Andric DT = &DTWP->getDomTree();
959fe6060f1SDimitry Andric return runImpl(F, TTI, DT);
960e8d8bef9SDimitry Andric }
961e8d8bef9SDimitry Andric
962e8d8bef9SDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)963e8d8bef9SDimitry Andric ScalarizeMaskedMemIntrinPass::run(Function &F, FunctionAnalysisManager &AM) {
964e8d8bef9SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F);
965fe6060f1SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
966fe6060f1SDimitry Andric if (!runImpl(F, TTI, DT))
967e8d8bef9SDimitry Andric return PreservedAnalyses::all();
968e8d8bef9SDimitry Andric PreservedAnalyses PA;
969e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>();
970fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
971e8d8bef9SDimitry Andric return PA;
972e8d8bef9SDimitry Andric }
973e8d8bef9SDimitry Andric
optimizeBlock(BasicBlock & BB,bool & ModifiedDT,const TargetTransformInfo & TTI,const DataLayout & DL,DomTreeUpdater * DTU)974e8d8bef9SDimitry Andric static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
975fe6060f1SDimitry Andric const TargetTransformInfo &TTI, const DataLayout &DL,
976fe6060f1SDimitry Andric DomTreeUpdater *DTU) {
977e8d8bef9SDimitry Andric bool MadeChange = false;
978e8d8bef9SDimitry Andric
979e8d8bef9SDimitry Andric BasicBlock::iterator CurInstIterator = BB.begin();
980e8d8bef9SDimitry Andric while (CurInstIterator != BB.end()) {
981e8d8bef9SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
982fe6060f1SDimitry Andric MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
983e8d8bef9SDimitry Andric if (ModifiedDT)
984e8d8bef9SDimitry Andric return true;
985e8d8bef9SDimitry Andric }
986e8d8bef9SDimitry Andric
987e8d8bef9SDimitry Andric return MadeChange;
988e8d8bef9SDimitry Andric }
989e8d8bef9SDimitry Andric
optimizeCallInst(CallInst * CI,bool & ModifiedDT,const TargetTransformInfo & TTI,const DataLayout & DL,DomTreeUpdater * DTU)990e8d8bef9SDimitry Andric static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
991e8d8bef9SDimitry Andric const TargetTransformInfo &TTI,
992fe6060f1SDimitry Andric const DataLayout &DL, DomTreeUpdater *DTU) {
993e8d8bef9SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
994e8d8bef9SDimitry Andric if (II) {
995e8d8bef9SDimitry Andric // The scalarization code below does not work for scalable vectors.
996e8d8bef9SDimitry Andric if (isa<ScalableVectorType>(II->getType()) ||
997349cc55cSDimitry Andric any_of(II->args(),
998e8d8bef9SDimitry Andric [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
999e8d8bef9SDimitry Andric return false;
1000e8d8bef9SDimitry Andric
1001e8d8bef9SDimitry Andric switch (II->getIntrinsicID()) {
1002e8d8bef9SDimitry Andric default:
1003e8d8bef9SDimitry Andric break;
1004*0fca6ea1SDimitry Andric case Intrinsic::experimental_vector_histogram_add:
1005*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedVectorHistogram(CI->getArgOperand(0)->getType(),
1006*0fca6ea1SDimitry Andric CI->getArgOperand(1)->getType()))
1007*0fca6ea1SDimitry Andric return false;
1008*0fca6ea1SDimitry Andric scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
1009*0fca6ea1SDimitry Andric return true;
1010e8d8bef9SDimitry Andric case Intrinsic::masked_load:
1011e8d8bef9SDimitry Andric // Scalarize unsupported vector masked load
1012e8d8bef9SDimitry Andric if (TTI.isLegalMaskedLoad(
1013e8d8bef9SDimitry Andric CI->getType(),
1014e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
1015e8d8bef9SDimitry Andric return false;
1016fe6060f1SDimitry Andric scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
1017e8d8bef9SDimitry Andric return true;
1018e8d8bef9SDimitry Andric case Intrinsic::masked_store:
1019e8d8bef9SDimitry Andric if (TTI.isLegalMaskedStore(
1020e8d8bef9SDimitry Andric CI->getArgOperand(0)->getType(),
1021e8d8bef9SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
1022e8d8bef9SDimitry Andric return false;
1023fe6060f1SDimitry Andric scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
1024e8d8bef9SDimitry Andric return true;
1025e8d8bef9SDimitry Andric case Intrinsic::masked_gather: {
1026fe6060f1SDimitry Andric MaybeAlign MA =
1027fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
1028e8d8bef9SDimitry Andric Type *LoadTy = CI->getType();
1029fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA,
1030fe6060f1SDimitry Andric LoadTy->getScalarType());
103104eeddc0SDimitry Andric if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
103204eeddc0SDimitry Andric !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
1033e8d8bef9SDimitry Andric return false;
1034fe6060f1SDimitry Andric scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
1035e8d8bef9SDimitry Andric return true;
1036e8d8bef9SDimitry Andric }
1037e8d8bef9SDimitry Andric case Intrinsic::masked_scatter: {
1038fe6060f1SDimitry Andric MaybeAlign MA =
1039fe6060f1SDimitry Andric cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
1040e8d8bef9SDimitry Andric Type *StoreTy = CI->getArgOperand(0)->getType();
1041fe6060f1SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment(MA,
1042fe6060f1SDimitry Andric StoreTy->getScalarType());
104304eeddc0SDimitry Andric if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
104404eeddc0SDimitry Andric !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
104504eeddc0SDimitry Andric Alignment))
1046e8d8bef9SDimitry Andric return false;
1047fe6060f1SDimitry Andric scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
1048e8d8bef9SDimitry Andric return true;
1049e8d8bef9SDimitry Andric }
1050e8d8bef9SDimitry Andric case Intrinsic::masked_expandload:
1051*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedExpandLoad(
1052*0fca6ea1SDimitry Andric CI->getType(),
1053*0fca6ea1SDimitry Andric CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne()))
1054e8d8bef9SDimitry Andric return false;
1055fe6060f1SDimitry Andric scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
1056e8d8bef9SDimitry Andric return true;
1057e8d8bef9SDimitry Andric case Intrinsic::masked_compressstore:
1058*0fca6ea1SDimitry Andric if (TTI.isLegalMaskedCompressStore(
1059*0fca6ea1SDimitry Andric CI->getArgOperand(0)->getType(),
1060*0fca6ea1SDimitry Andric CI->getAttributes().getParamAttrs(1).getAlignment().valueOrOne()))
1061e8d8bef9SDimitry Andric return false;
1062fe6060f1SDimitry Andric scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
1063e8d8bef9SDimitry Andric return true;
1064e8d8bef9SDimitry Andric }
1065e8d8bef9SDimitry Andric }
1066e8d8bef9SDimitry Andric
1067e8d8bef9SDimitry Andric return false;
1068e8d8bef9SDimitry Andric }
1069