xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/Scalarizer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
906c3fb27SDimitry Andric // This pass converts vector operations into scalar operations (or, optionally,
1006c3fb27SDimitry Andric // operations on smaller vector widths), in order to expose optimization
1106c3fb27SDimitry Andric // opportunities on the individual scalar operations.
120b57cec5SDimitry Andric // It is mainly intended for targets that do not have vector units, but it
130b57cec5SDimitry Andric // may also be useful for revectorizing code to different vector widths.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
17480093f4SDimitry Andric #include "llvm/Transforms/Scalar/Scalarizer.h"
180b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
200b57cec5SDimitry Andric #include "llvm/ADT/Twine.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
220b57cec5SDimitry Andric #include "llvm/IR/Argument.h"
230b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
240b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
250b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
260b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
275ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h"
280b57cec5SDimitry Andric #include "llvm/IR/Function.h"
290b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
300b57cec5SDimitry Andric #include "llvm/IR/InstVisitor.h"
310b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
320b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
330b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
340b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
350b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
360b57cec5SDimitry Andric #include "llvm/IR/Module.h"
370b57cec5SDimitry Andric #include "llvm/IR/Type.h"
380b57cec5SDimitry Andric #include "llvm/IR/Value.h"
390b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
40480093f4SDimitry Andric #include "llvm/Support/CommandLine.h"
415ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/Local.h"
420b57cec5SDimitry Andric #include <cassert>
430b57cec5SDimitry Andric #include <cstdint>
440b57cec5SDimitry Andric #include <iterator>
450b57cec5SDimitry Andric #include <map>
460b57cec5SDimitry Andric #include <utility>
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric using namespace llvm;
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric #define DEBUG_TYPE "scalarizer"
510b57cec5SDimitry Andric 
5281ad6265SDimitry Andric static cl::opt<bool> ClScalarizeVariableInsertExtract(
535ffd83dbSDimitry Andric     "scalarize-variable-insert-extract", cl::init(true), cl::Hidden,
545ffd83dbSDimitry Andric     cl::desc("Allow the scalarizer pass to scalarize "
555ffd83dbSDimitry Andric              "insertelement/extractelement with variable index"));
565ffd83dbSDimitry Andric 
570b57cec5SDimitry Andric // This is disabled by default because having separate loads and stores
580b57cec5SDimitry Andric // makes it more likely that the -combiner-alias-analysis limits will be
590b57cec5SDimitry Andric // reached.
6081ad6265SDimitry Andric static cl::opt<bool> ClScalarizeLoadStore(
6181ad6265SDimitry Andric     "scalarize-load-store", cl::init(false), cl::Hidden,
620b57cec5SDimitry Andric     cl::desc("Allow the scalarizer pass to scalarize loads and store"));
630b57cec5SDimitry Andric 
6406c3fb27SDimitry Andric // Split vectors larger than this size into fragments, where each fragment is
6506c3fb27SDimitry Andric // either a vector no larger than this size or a scalar.
6606c3fb27SDimitry Andric //
6706c3fb27SDimitry Andric // Instructions with operands or results of different sizes that would be split
6806c3fb27SDimitry Andric // into a different number of fragments are currently left as-is.
6906c3fb27SDimitry Andric static cl::opt<unsigned> ClScalarizeMinBits(
7006c3fb27SDimitry Andric     "scalarize-min-bits", cl::init(0), cl::Hidden,
7106c3fb27SDimitry Andric     cl::desc("Instruct the scalarizer pass to attempt to keep values of a "
7206c3fb27SDimitry Andric              "minimum number of bits"));
7306c3fb27SDimitry Andric 
740b57cec5SDimitry Andric namespace {
750b57cec5SDimitry Andric 
skipPastPhiNodesAndDbg(BasicBlock::iterator Itr)76349cc55cSDimitry Andric BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
77349cc55cSDimitry Andric   BasicBlock *BB = Itr->getParent();
78349cc55cSDimitry Andric   if (isa<PHINode>(Itr))
79349cc55cSDimitry Andric     Itr = BB->getFirstInsertionPt();
80349cc55cSDimitry Andric   if (Itr != BB->end())
81349cc55cSDimitry Andric     Itr = skipDebugIntrinsics(Itr);
82349cc55cSDimitry Andric   return Itr;
83349cc55cSDimitry Andric }
84349cc55cSDimitry Andric 
850b57cec5SDimitry Andric // Used to store the scattered form of a vector.
860b57cec5SDimitry Andric using ValueVector = SmallVector<Value *, 8>;
870b57cec5SDimitry Andric 
88bdd1243dSDimitry Andric // Used to map a vector Value and associated type to its scattered form.
89bdd1243dSDimitry Andric // The associated type is only non-null for pointer values that are "scattered"
90bdd1243dSDimitry Andric // when used as pointer operands to load or store.
91bdd1243dSDimitry Andric //
92bdd1243dSDimitry Andric // We use std::map because we want iterators to persist across insertion and
93bdd1243dSDimitry Andric // because the values are relatively large.
94bdd1243dSDimitry Andric using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric // Lists Instructions that have been replaced with scalar implementations,
970b57cec5SDimitry Andric // along with a pointer to their scattered forms.
980b57cec5SDimitry Andric using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
990b57cec5SDimitry Andric 
10006c3fb27SDimitry Andric struct VectorSplit {
10106c3fb27SDimitry Andric   // The type of the vector.
10206c3fb27SDimitry Andric   FixedVectorType *VecTy = nullptr;
10306c3fb27SDimitry Andric 
10406c3fb27SDimitry Andric   // The number of elements packed in a fragment (other than the remainder).
10506c3fb27SDimitry Andric   unsigned NumPacked = 0;
10606c3fb27SDimitry Andric 
10706c3fb27SDimitry Andric   // The number of fragments (scalars or smaller vectors) into which the vector
10806c3fb27SDimitry Andric   // shall be split.
10906c3fb27SDimitry Andric   unsigned NumFragments = 0;
11006c3fb27SDimitry Andric 
11106c3fb27SDimitry Andric   // The type of each complete fragment.
11206c3fb27SDimitry Andric   Type *SplitTy = nullptr;
11306c3fb27SDimitry Andric 
11406c3fb27SDimitry Andric   // The type of the remainder (last) fragment; null if all fragments are
11506c3fb27SDimitry Andric   // complete.
11606c3fb27SDimitry Andric   Type *RemainderTy = nullptr;
11706c3fb27SDimitry Andric 
getFragmentType__anon7628c1430111::VectorSplit11806c3fb27SDimitry Andric   Type *getFragmentType(unsigned I) const {
11906c3fb27SDimitry Andric     return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
12006c3fb27SDimitry Andric   }
12106c3fb27SDimitry Andric };
12206c3fb27SDimitry Andric 
1230b57cec5SDimitry Andric // Provides a very limited vector-like interface for lazily accessing one
1240b57cec5SDimitry Andric // component of a scattered vector or vector pointer.
1250b57cec5SDimitry Andric class Scatterer {
1260b57cec5SDimitry Andric public:
1270b57cec5SDimitry Andric   Scatterer() = default;
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   // Scatter V into Size components.  If new instructions are needed,
1300b57cec5SDimitry Andric   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
1310b57cec5SDimitry Andric   // the results.
13206c3fb27SDimitry Andric   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
13306c3fb27SDimitry Andric             const VectorSplit &VS, ValueVector *cachePtr = nullptr);
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric   // Return component I, creating a new Value for it if necessary.
1360b57cec5SDimitry Andric   Value *operator[](unsigned I);
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   // Return the number of components.
size() const13906c3fb27SDimitry Andric   unsigned size() const { return VS.NumFragments; }
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric private:
1420b57cec5SDimitry Andric   BasicBlock *BB;
1430b57cec5SDimitry Andric   BasicBlock::iterator BBI;
1440b57cec5SDimitry Andric   Value *V;
14506c3fb27SDimitry Andric   VectorSplit VS;
14606c3fb27SDimitry Andric   bool IsPointer;
1470b57cec5SDimitry Andric   ValueVector *CachePtr;
1480b57cec5SDimitry Andric   ValueVector Tmp;
1490b57cec5SDimitry Andric };
1500b57cec5SDimitry Andric 
151bdd1243dSDimitry Andric // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
1520b57cec5SDimitry Andric // called Name that compares X and Y in the same way as FCI.
1530b57cec5SDimitry Andric struct FCmpSplitter {
FCmpSplitter__anon7628c1430111::FCmpSplitter1540b57cec5SDimitry Andric   FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
1550b57cec5SDimitry Andric 
operator ()__anon7628c1430111::FCmpSplitter1560b57cec5SDimitry Andric   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
1570b57cec5SDimitry Andric                     const Twine &Name) const {
1580b57cec5SDimitry Andric     return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
1590b57cec5SDimitry Andric   }
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   FCmpInst &FCI;
1620b57cec5SDimitry Andric };
1630b57cec5SDimitry Andric 
164bdd1243dSDimitry Andric // ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
1650b57cec5SDimitry Andric // called Name that compares X and Y in the same way as ICI.
1660b57cec5SDimitry Andric struct ICmpSplitter {
ICmpSplitter__anon7628c1430111::ICmpSplitter1670b57cec5SDimitry Andric   ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
1680b57cec5SDimitry Andric 
operator ()__anon7628c1430111::ICmpSplitter1690b57cec5SDimitry Andric   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
1700b57cec5SDimitry Andric                     const Twine &Name) const {
1710b57cec5SDimitry Andric     return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   ICmpInst &ICI;
1750b57cec5SDimitry Andric };
1760b57cec5SDimitry Andric 
177bdd1243dSDimitry Andric // UnarySplitter(UO)(Builder, X, Name) uses Builder to create
1780b57cec5SDimitry Andric // a unary operator like UO called Name with operand X.
1790b57cec5SDimitry Andric struct UnarySplitter {
UnarySplitter__anon7628c1430111::UnarySplitter1800b57cec5SDimitry Andric   UnarySplitter(UnaryOperator &uo) : UO(uo) {}
1810b57cec5SDimitry Andric 
operator ()__anon7628c1430111::UnarySplitter1820b57cec5SDimitry Andric   Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
1830b57cec5SDimitry Andric     return Builder.CreateUnOp(UO.getOpcode(), Op, Name);
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   UnaryOperator &UO;
1870b57cec5SDimitry Andric };
1880b57cec5SDimitry Andric 
189bdd1243dSDimitry Andric // BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
1900b57cec5SDimitry Andric // a binary operator like BO called Name with operands X and Y.
1910b57cec5SDimitry Andric struct BinarySplitter {
BinarySplitter__anon7628c1430111::BinarySplitter1920b57cec5SDimitry Andric   BinarySplitter(BinaryOperator &bo) : BO(bo) {}
1930b57cec5SDimitry Andric 
operator ()__anon7628c1430111::BinarySplitter1940b57cec5SDimitry Andric   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
1950b57cec5SDimitry Andric                     const Twine &Name) const {
1960b57cec5SDimitry Andric     return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
1970b57cec5SDimitry Andric   }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   BinaryOperator &BO;
2000b57cec5SDimitry Andric };
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric // Information about a load or store that we're scalarizing.
2030b57cec5SDimitry Andric struct VectorLayout {
2040b57cec5SDimitry Andric   VectorLayout() = default;
2050b57cec5SDimitry Andric 
20606c3fb27SDimitry Andric   // Return the alignment of fragment Frag.
getFragmentAlign__anon7628c1430111::VectorLayout20706c3fb27SDimitry Andric   Align getFragmentAlign(unsigned Frag) {
20806c3fb27SDimitry Andric     return commonAlignment(VecAlign, Frag * SplitSize);
2090b57cec5SDimitry Andric   }
2100b57cec5SDimitry Andric 
21106c3fb27SDimitry Andric   // The split of the underlying vector type.
21206c3fb27SDimitry Andric   VectorSplit VS;
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   // The alignment of the vector.
2155ffd83dbSDimitry Andric   Align VecAlign;
2160b57cec5SDimitry Andric 
21706c3fb27SDimitry Andric   // The size of each (non-remainder) fragment in bytes.
21806c3fb27SDimitry Andric   uint64_t SplitSize = 0;
2190b57cec5SDimitry Andric };
2200b57cec5SDimitry Andric 
22106c3fb27SDimitry Andric /// Concatenate the given fragments to a single vector value of the type
22206c3fb27SDimitry Andric /// described in @p VS.
concatenate(IRBuilder<> & Builder,ArrayRef<Value * > Fragments,const VectorSplit & VS,Twine Name)22306c3fb27SDimitry Andric static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
22406c3fb27SDimitry Andric                           const VectorSplit &VS, Twine Name) {
22506c3fb27SDimitry Andric   unsigned NumElements = VS.VecTy->getNumElements();
22606c3fb27SDimitry Andric   SmallVector<int> ExtendMask;
22706c3fb27SDimitry Andric   SmallVector<int> InsertMask;
22806c3fb27SDimitry Andric 
22906c3fb27SDimitry Andric   if (VS.NumPacked > 1) {
23006c3fb27SDimitry Andric     // Prepare the shufflevector masks once and re-use them for all
23106c3fb27SDimitry Andric     // fragments.
23206c3fb27SDimitry Andric     ExtendMask.resize(NumElements, -1);
23306c3fb27SDimitry Andric     for (unsigned I = 0; I < VS.NumPacked; ++I)
23406c3fb27SDimitry Andric       ExtendMask[I] = I;
23506c3fb27SDimitry Andric 
23606c3fb27SDimitry Andric     InsertMask.resize(NumElements);
23706c3fb27SDimitry Andric     for (unsigned I = 0; I < NumElements; ++I)
23806c3fb27SDimitry Andric       InsertMask[I] = I;
23906c3fb27SDimitry Andric   }
24006c3fb27SDimitry Andric 
24106c3fb27SDimitry Andric   Value *Res = PoisonValue::get(VS.VecTy);
24206c3fb27SDimitry Andric   for (unsigned I = 0; I < VS.NumFragments; ++I) {
24306c3fb27SDimitry Andric     Value *Fragment = Fragments[I];
24406c3fb27SDimitry Andric 
24506c3fb27SDimitry Andric     unsigned NumPacked = VS.NumPacked;
24606c3fb27SDimitry Andric     if (I == VS.NumFragments - 1 && VS.RemainderTy) {
24706c3fb27SDimitry Andric       if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
24806c3fb27SDimitry Andric         NumPacked = RemVecTy->getNumElements();
24906c3fb27SDimitry Andric       else
25006c3fb27SDimitry Andric         NumPacked = 1;
25106c3fb27SDimitry Andric     }
25206c3fb27SDimitry Andric 
25306c3fb27SDimitry Andric     if (NumPacked == 1) {
25406c3fb27SDimitry Andric       Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
25506c3fb27SDimitry Andric                                         Name + ".upto" + Twine(I));
25606c3fb27SDimitry Andric     } else {
25706c3fb27SDimitry Andric       Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
25806c3fb27SDimitry Andric       if (I == 0) {
25906c3fb27SDimitry Andric         Res = Fragment;
26006c3fb27SDimitry Andric       } else {
26106c3fb27SDimitry Andric         for (unsigned J = 0; J < NumPacked; ++J)
26206c3fb27SDimitry Andric           InsertMask[I * VS.NumPacked + J] = NumElements + J;
26306c3fb27SDimitry Andric         Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
26406c3fb27SDimitry Andric                                           Name + ".upto" + Twine(I));
26506c3fb27SDimitry Andric         for (unsigned J = 0; J < NumPacked; ++J)
26606c3fb27SDimitry Andric           InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
26706c3fb27SDimitry Andric       }
26806c3fb27SDimitry Andric     }
26906c3fb27SDimitry Andric   }
27006c3fb27SDimitry Andric 
27106c3fb27SDimitry Andric   return Res;
27206c3fb27SDimitry Andric }
27306c3fb27SDimitry Andric 
27481ad6265SDimitry Andric template <typename T>
getWithDefaultOverride(const cl::opt<T> & ClOption,const std::optional<T> & DefaultOverride)27581ad6265SDimitry Andric T getWithDefaultOverride(const cl::opt<T> &ClOption,
276bdd1243dSDimitry Andric                          const std::optional<T> &DefaultOverride) {
27781ad6265SDimitry Andric   return ClOption.getNumOccurrences() ? ClOption
27881ad6265SDimitry Andric                                       : DefaultOverride.value_or(ClOption);
27981ad6265SDimitry Andric }
28081ad6265SDimitry Andric 
2810b57cec5SDimitry Andric class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
2820b57cec5SDimitry Andric public:
ScalarizerVisitor(DominatorTree * DT,ScalarizerPassOptions Options)2835f757f3fSDimitry Andric   ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options)
2845f757f3fSDimitry Andric       : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride(
2855f757f3fSDimitry Andric                     ClScalarizeVariableInsertExtract,
28681ad6265SDimitry Andric                     Options.ScalarizeVariableInsertExtract)),
28781ad6265SDimitry Andric         ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
28806c3fb27SDimitry Andric                                                   Options.ScalarizeLoadStore)),
28906c3fb27SDimitry Andric         ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
29006c3fb27SDimitry Andric                                                 Options.ScalarizeMinBits)) {}
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric   bool visit(Function &F);
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric   // InstVisitor methods.  They return true if the instruction was scalarized,
2950b57cec5SDimitry Andric   // false if nothing changed.
visitInstruction(Instruction & I)2960b57cec5SDimitry Andric   bool visitInstruction(Instruction &I) { return false; }
2970b57cec5SDimitry Andric   bool visitSelectInst(SelectInst &SI);
2980b57cec5SDimitry Andric   bool visitICmpInst(ICmpInst &ICI);
2990b57cec5SDimitry Andric   bool visitFCmpInst(FCmpInst &FCI);
3000b57cec5SDimitry Andric   bool visitUnaryOperator(UnaryOperator &UO);
3010b57cec5SDimitry Andric   bool visitBinaryOperator(BinaryOperator &BO);
3020b57cec5SDimitry Andric   bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
3030b57cec5SDimitry Andric   bool visitCastInst(CastInst &CI);
3040b57cec5SDimitry Andric   bool visitBitCastInst(BitCastInst &BCI);
3055ffd83dbSDimitry Andric   bool visitInsertElementInst(InsertElementInst &IEI);
3065ffd83dbSDimitry Andric   bool visitExtractElementInst(ExtractElementInst &EEI);
3070b57cec5SDimitry Andric   bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
3080b57cec5SDimitry Andric   bool visitPHINode(PHINode &PHI);
3090b57cec5SDimitry Andric   bool visitLoadInst(LoadInst &LI);
3100b57cec5SDimitry Andric   bool visitStoreInst(StoreInst &SI);
3110b57cec5SDimitry Andric   bool visitCallInst(CallInst &ICI);
31206c3fb27SDimitry Andric   bool visitFreezeInst(FreezeInst &FI);
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric private:
31506c3fb27SDimitry Andric   Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
31606c3fb27SDimitry Andric   void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
31781ad6265SDimitry Andric   void replaceUses(Instruction *Op, Value *CV);
3180b57cec5SDimitry Andric   bool canTransferMetadata(unsigned Kind);
3190b57cec5SDimitry Andric   void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
32006c3fb27SDimitry Andric   std::optional<VectorSplit> getVectorSplit(Type *Ty);
321bdd1243dSDimitry Andric   std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
3220b57cec5SDimitry Andric                                               const DataLayout &DL);
3230b57cec5SDimitry Andric   bool finish();
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   template<typename T> bool splitUnary(Instruction &, const T &);
3260b57cec5SDimitry Andric   template<typename T> bool splitBinary(Instruction &, const T &);
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   bool splitCall(CallInst &CI);
3290b57cec5SDimitry Andric 
3300b57cec5SDimitry Andric   ScatterMap Scattered;
3310b57cec5SDimitry Andric   GatherList Gathered;
33281ad6265SDimitry Andric   bool Scalarized;
3330b57cec5SDimitry Andric 
3345ffd83dbSDimitry Andric   SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
3355ffd83dbSDimitry Andric 
336480093f4SDimitry Andric   DominatorTree *DT;
33781ad6265SDimitry Andric 
33881ad6265SDimitry Andric   const bool ScalarizeVariableInsertExtract;
33981ad6265SDimitry Andric   const bool ScalarizeLoadStore;
34006c3fb27SDimitry Andric   const unsigned ScalarizeMinBits;
3410b57cec5SDimitry Andric };
3420b57cec5SDimitry Andric 
3430b57cec5SDimitry Andric } // end anonymous namespace
3440b57cec5SDimitry Andric 
Scatterer(BasicBlock * bb,BasicBlock::iterator bbi,Value * v,const VectorSplit & VS,ValueVector * cachePtr)3450b57cec5SDimitry Andric Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
34606c3fb27SDimitry Andric                      const VectorSplit &VS, ValueVector *cachePtr)
34706c3fb27SDimitry Andric     : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
34806c3fb27SDimitry Andric   IsPointer = V->getType()->isPointerTy();
34906c3fb27SDimitry Andric   if (!CachePtr) {
35006c3fb27SDimitry Andric     Tmp.resize(VS.NumFragments, nullptr);
35106c3fb27SDimitry Andric   } else {
35206c3fb27SDimitry Andric     assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
35306c3fb27SDimitry Andric             IsPointer) &&
35406c3fb27SDimitry Andric            "Inconsistent vector sizes");
35506c3fb27SDimitry Andric     if (VS.NumFragments > CachePtr->size())
35606c3fb27SDimitry Andric       CachePtr->resize(VS.NumFragments, nullptr);
35781ad6265SDimitry Andric   }
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric 
36006c3fb27SDimitry Andric // Return fragment Frag, creating a new Value for it if necessary.
operator [](unsigned Frag)36106c3fb27SDimitry Andric Value *Scatterer::operator[](unsigned Frag) {
36206c3fb27SDimitry Andric   ValueVector &CV = CachePtr ? *CachePtr : Tmp;
3630b57cec5SDimitry Andric   // Try to reuse a previous value.
36406c3fb27SDimitry Andric   if (CV[Frag])
36506c3fb27SDimitry Andric     return CV[Frag];
3660b57cec5SDimitry Andric   IRBuilder<> Builder(BB, BBI);
36706c3fb27SDimitry Andric   if (IsPointer) {
36806c3fb27SDimitry Andric     if (Frag == 0)
36906c3fb27SDimitry Andric       CV[Frag] = V;
37006c3fb27SDimitry Andric     else
37106c3fb27SDimitry Andric       CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
37206c3fb27SDimitry Andric                                             V->getName() + ".i" + Twine(Frag));
37306c3fb27SDimitry Andric     return CV[Frag];
3740b57cec5SDimitry Andric   }
37506c3fb27SDimitry Andric 
37606c3fb27SDimitry Andric   Type *FragmentTy = VS.getFragmentType(Frag);
37706c3fb27SDimitry Andric 
37806c3fb27SDimitry Andric   if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
37906c3fb27SDimitry Andric     SmallVector<int> Mask;
38006c3fb27SDimitry Andric     for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
38106c3fb27SDimitry Andric       Mask.push_back(Frag * VS.NumPacked + J);
38206c3fb27SDimitry Andric     CV[Frag] =
38306c3fb27SDimitry Andric         Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
38406c3fb27SDimitry Andric                                     V->getName() + ".i" + Twine(Frag));
3850b57cec5SDimitry Andric   } else {
38606c3fb27SDimitry Andric     // Search through a chain of InsertElementInsts looking for element Frag.
3870b57cec5SDimitry Andric     // Record other elements in the cache.  The new V is still suitable
3880b57cec5SDimitry Andric     // for all uncached indices.
3890b57cec5SDimitry Andric     while (true) {
3900b57cec5SDimitry Andric       InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
3910b57cec5SDimitry Andric       if (!Insert)
3920b57cec5SDimitry Andric         break;
3930b57cec5SDimitry Andric       ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
3940b57cec5SDimitry Andric       if (!Idx)
3950b57cec5SDimitry Andric         break;
3960b57cec5SDimitry Andric       unsigned J = Idx->getZExtValue();
3970b57cec5SDimitry Andric       V = Insert->getOperand(0);
39806c3fb27SDimitry Andric       if (Frag * VS.NumPacked == J) {
39906c3fb27SDimitry Andric         CV[Frag] = Insert->getOperand(1);
40006c3fb27SDimitry Andric         return CV[Frag];
40106c3fb27SDimitry Andric       }
40206c3fb27SDimitry Andric 
40306c3fb27SDimitry Andric       if (VS.NumPacked == 1 && !CV[J]) {
4040b57cec5SDimitry Andric         // Only cache the first entry we find for each index we're not actively
4050b57cec5SDimitry Andric         // searching for. This prevents us from going too far up the chain and
4060b57cec5SDimitry Andric         // caching incorrect entries.
4070b57cec5SDimitry Andric         CV[J] = Insert->getOperand(1);
4080b57cec5SDimitry Andric       }
4090b57cec5SDimitry Andric     }
41006c3fb27SDimitry Andric     CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
41106c3fb27SDimitry Andric                                             V->getName() + ".i" + Twine(Frag));
4120b57cec5SDimitry Andric   }
41306c3fb27SDimitry Andric 
41406c3fb27SDimitry Andric   return CV[Frag];
4150b57cec5SDimitry Andric }
4160b57cec5SDimitry Andric 
visit(Function & F)4170b57cec5SDimitry Andric bool ScalarizerVisitor::visit(Function &F) {
4180b57cec5SDimitry Andric   assert(Gathered.empty() && Scattered.empty());
4190b57cec5SDimitry Andric 
42081ad6265SDimitry Andric   Scalarized = false;
42181ad6265SDimitry Andric 
4220b57cec5SDimitry Andric   // To ensure we replace gathered components correctly we need to do an ordered
4230b57cec5SDimitry Andric   // traversal of the basic blocks in the function.
4240b57cec5SDimitry Andric   ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
4250b57cec5SDimitry Andric   for (BasicBlock *BB : RPOT) {
4260b57cec5SDimitry Andric     for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
4270b57cec5SDimitry Andric       Instruction *I = &*II;
4280b57cec5SDimitry Andric       bool Done = InstVisitor::visit(I);
4290b57cec5SDimitry Andric       ++II;
4300b57cec5SDimitry Andric       if (Done && I->getType()->isVoidTy())
4310b57cec5SDimitry Andric         I->eraseFromParent();
4320b57cec5SDimitry Andric     }
4330b57cec5SDimitry Andric   }
4340b57cec5SDimitry Andric   return finish();
4350b57cec5SDimitry Andric }
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric // Return a scattered form of V that can be accessed by Point.  V must be a
4380b57cec5SDimitry Andric // vector or a pointer to a vector.
scatter(Instruction * Point,Value * V,const VectorSplit & VS)43981ad6265SDimitry Andric Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
44006c3fb27SDimitry Andric                                      const VectorSplit &VS) {
4410b57cec5SDimitry Andric   if (Argument *VArg = dyn_cast<Argument>(V)) {
4420b57cec5SDimitry Andric     // Put the scattered form of arguments in the entry block,
4430b57cec5SDimitry Andric     // so that it can be used everywhere.
4440b57cec5SDimitry Andric     Function *F = VArg->getParent();
4450b57cec5SDimitry Andric     BasicBlock *BB = &F->getEntryBlock();
44606c3fb27SDimitry Andric     return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
4470b57cec5SDimitry Andric   }
4480b57cec5SDimitry Andric   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
449480093f4SDimitry Andric     // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
450480093f4SDimitry Andric     // nodes in predecessors. If those predecessors are unreachable from entry,
451480093f4SDimitry Andric     // then the IR in those blocks could have unexpected properties resulting in
452480093f4SDimitry Andric     // infinite loops in Scatterer::operator[]. By simply treating values
453480093f4SDimitry Andric     // originating from instructions in unreachable blocks as undef we do not
454480093f4SDimitry Andric     // need to analyse them further.
455480093f4SDimitry Andric     if (!DT->isReachableFromEntry(VOp->getParent()))
456480093f4SDimitry Andric       return Scatterer(Point->getParent(), Point->getIterator(),
45706c3fb27SDimitry Andric                        PoisonValue::get(V->getType()), VS);
4580b57cec5SDimitry Andric     // Put the scattered form of an instruction directly after the
459349cc55cSDimitry Andric     // instruction, skipping over PHI nodes and debug intrinsics.
4600b57cec5SDimitry Andric     BasicBlock *BB = VOp->getParent();
461349cc55cSDimitry Andric     return Scatterer(
46206c3fb27SDimitry Andric         BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
46306c3fb27SDimitry Andric         &Scattered[{V, VS.SplitTy}]);
4640b57cec5SDimitry Andric   }
4650b57cec5SDimitry Andric   // In the fallback case, just put the scattered before Point and
4660b57cec5SDimitry Andric   // keep the result local to Point.
46706c3fb27SDimitry Andric   return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
4680b57cec5SDimitry Andric }
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric // Replace Op with the gathered form of the components in CV.  Defer the
4710b57cec5SDimitry Andric // deletion of Op and creation of the gathered form to the end of the pass,
4720b57cec5SDimitry Andric // so that we can avoid creating the gathered form if all uses of Op are
4730b57cec5SDimitry Andric // replaced with uses of CV.
gather(Instruction * Op,const ValueVector & CV,const VectorSplit & VS)47406c3fb27SDimitry Andric void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
47506c3fb27SDimitry Andric                                const VectorSplit &VS) {
4760b57cec5SDimitry Andric   transferMetadataAndIRFlags(Op, CV);
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric   // If we already have a scattered form of Op (created from ExtractElements
4790b57cec5SDimitry Andric   // of Op itself), replace them with the new form.
48006c3fb27SDimitry Andric   ValueVector &SV = Scattered[{Op, VS.SplitTy}];
4810b57cec5SDimitry Andric   if (!SV.empty()) {
4820b57cec5SDimitry Andric     for (unsigned I = 0, E = SV.size(); I != E; ++I) {
4830b57cec5SDimitry Andric       Value *V = SV[I];
4845ffd83dbSDimitry Andric       if (V == nullptr || SV[I] == CV[I])
4850b57cec5SDimitry Andric         continue;
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric       Instruction *Old = cast<Instruction>(V);
488e8d8bef9SDimitry Andric       if (isa<Instruction>(CV[I]))
4890b57cec5SDimitry Andric         CV[I]->takeName(Old);
4900b57cec5SDimitry Andric       Old->replaceAllUsesWith(CV[I]);
4915ffd83dbSDimitry Andric       PotentiallyDeadInstrs.emplace_back(Old);
4920b57cec5SDimitry Andric     }
4930b57cec5SDimitry Andric   }
4940b57cec5SDimitry Andric   SV = CV;
4950b57cec5SDimitry Andric   Gathered.push_back(GatherList::value_type(Op, &SV));
4960b57cec5SDimitry Andric }
4970b57cec5SDimitry Andric 
49881ad6265SDimitry Andric // Replace Op with CV and collect Op has a potentially dead instruction.
replaceUses(Instruction * Op,Value * CV)49981ad6265SDimitry Andric void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
50081ad6265SDimitry Andric   if (CV != Op) {
50181ad6265SDimitry Andric     Op->replaceAllUsesWith(CV);
50281ad6265SDimitry Andric     PotentiallyDeadInstrs.emplace_back(Op);
50381ad6265SDimitry Andric     Scalarized = true;
50481ad6265SDimitry Andric   }
50581ad6265SDimitry Andric }
50681ad6265SDimitry Andric 
5070b57cec5SDimitry Andric // Return true if it is safe to transfer the given metadata tag from
5080b57cec5SDimitry Andric // vector to scalar instructions.
canTransferMetadata(unsigned Tag)5090b57cec5SDimitry Andric bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
5100b57cec5SDimitry Andric   return (Tag == LLVMContext::MD_tbaa
5110b57cec5SDimitry Andric           || Tag == LLVMContext::MD_fpmath
5120b57cec5SDimitry Andric           || Tag == LLVMContext::MD_tbaa_struct
5130b57cec5SDimitry Andric           || Tag == LLVMContext::MD_invariant_load
5140b57cec5SDimitry Andric           || Tag == LLVMContext::MD_alias_scope
5150b57cec5SDimitry Andric           || Tag == LLVMContext::MD_noalias
5165f757f3fSDimitry Andric           || Tag == LLVMContext::MD_mem_parallel_loop_access
5170b57cec5SDimitry Andric           || Tag == LLVMContext::MD_access_group);
5180b57cec5SDimitry Andric }
5190b57cec5SDimitry Andric 
5200b57cec5SDimitry Andric // Transfer metadata from Op to the instructions in CV if it is known
5210b57cec5SDimitry Andric // to be safe to do so.
transferMetadataAndIRFlags(Instruction * Op,const ValueVector & CV)5220b57cec5SDimitry Andric void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
5230b57cec5SDimitry Andric                                                    const ValueVector &CV) {
5240b57cec5SDimitry Andric   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
5250b57cec5SDimitry Andric   Op->getAllMetadataOtherThanDebugLoc(MDs);
526*0fca6ea1SDimitry Andric   for (Value *V : CV) {
527*0fca6ea1SDimitry Andric     if (Instruction *New = dyn_cast<Instruction>(V)) {
5280b57cec5SDimitry Andric       for (const auto &MD : MDs)
5290b57cec5SDimitry Andric         if (canTransferMetadata(MD.first))
5300b57cec5SDimitry Andric           New->setMetadata(MD.first, MD.second);
5310b57cec5SDimitry Andric       New->copyIRFlags(Op);
5320b57cec5SDimitry Andric       if (Op->getDebugLoc() && !New->getDebugLoc())
5330b57cec5SDimitry Andric         New->setDebugLoc(Op->getDebugLoc());
5340b57cec5SDimitry Andric     }
5350b57cec5SDimitry Andric   }
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric 
53806c3fb27SDimitry Andric // Determine how Ty is split, if at all.
getVectorSplit(Type * Ty)53906c3fb27SDimitry Andric std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
54006c3fb27SDimitry Andric   VectorSplit Split;
54106c3fb27SDimitry Andric   Split.VecTy = dyn_cast<FixedVectorType>(Ty);
54206c3fb27SDimitry Andric   if (!Split.VecTy)
54306c3fb27SDimitry Andric     return {};
54406c3fb27SDimitry Andric 
54506c3fb27SDimitry Andric   unsigned NumElems = Split.VecTy->getNumElements();
54606c3fb27SDimitry Andric   Type *ElemTy = Split.VecTy->getElementType();
54706c3fb27SDimitry Andric 
54806c3fb27SDimitry Andric   if (NumElems == 1 || ElemTy->isPointerTy() ||
54906c3fb27SDimitry Andric       2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
55006c3fb27SDimitry Andric     Split.NumPacked = 1;
55106c3fb27SDimitry Andric     Split.NumFragments = NumElems;
55206c3fb27SDimitry Andric     Split.SplitTy = ElemTy;
55306c3fb27SDimitry Andric   } else {
55406c3fb27SDimitry Andric     Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
55506c3fb27SDimitry Andric     if (Split.NumPacked >= NumElems)
55606c3fb27SDimitry Andric       return {};
55706c3fb27SDimitry Andric 
55806c3fb27SDimitry Andric     Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
55906c3fb27SDimitry Andric     Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
56006c3fb27SDimitry Andric 
56106c3fb27SDimitry Andric     unsigned RemainderElems = NumElems % Split.NumPacked;
56206c3fb27SDimitry Andric     if (RemainderElems > 1)
56306c3fb27SDimitry Andric       Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
56406c3fb27SDimitry Andric     else if (RemainderElems == 1)
56506c3fb27SDimitry Andric       Split.RemainderTy = ElemTy;
56606c3fb27SDimitry Andric   }
56706c3fb27SDimitry Andric 
56806c3fb27SDimitry Andric   return Split;
56906c3fb27SDimitry Andric }
57006c3fb27SDimitry Andric 
5710b57cec5SDimitry Andric // Try to fill in Layout from Ty, returning true on success.  Alignment is
572bdd1243dSDimitry Andric // the alignment of the vector, or std::nullopt if the ABI default should be
573bdd1243dSDimitry Andric // used.
574bdd1243dSDimitry Andric std::optional<VectorLayout>
getVectorLayout(Type * Ty,Align Alignment,const DataLayout & DL)5755ffd83dbSDimitry Andric ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
5765ffd83dbSDimitry Andric                                    const DataLayout &DL) {
57706c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(Ty);
57806c3fb27SDimitry Andric   if (!VS)
57906c3fb27SDimitry Andric     return {};
58006c3fb27SDimitry Andric 
5815ffd83dbSDimitry Andric   VectorLayout Layout;
58206c3fb27SDimitry Andric   Layout.VS = *VS;
58306c3fb27SDimitry Andric   // Check that we're dealing with full-byte fragments.
58406c3fb27SDimitry Andric   if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
58506c3fb27SDimitry Andric       (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
58606c3fb27SDimitry Andric     return {};
5870b57cec5SDimitry Andric   Layout.VecAlign = Alignment;
58806c3fb27SDimitry Andric   Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
5895ffd83dbSDimitry Andric   return Layout;
5900b57cec5SDimitry Andric }
5910b57cec5SDimitry Andric 
5920b57cec5SDimitry Andric // Scalarize one-operand instruction I, using Split(Builder, X, Name)
5930b57cec5SDimitry Andric // to create an instruction like I with operand X and name Name.
5940b57cec5SDimitry Andric template<typename Splitter>
splitUnary(Instruction & I,const Splitter & Split)5950b57cec5SDimitry Andric bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
59606c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(I.getType());
59706c3fb27SDimitry Andric   if (!VS)
5980b57cec5SDimitry Andric     return false;
5990b57cec5SDimitry Andric 
60006c3fb27SDimitry Andric   std::optional<VectorSplit> OpVS;
60106c3fb27SDimitry Andric   if (I.getOperand(0)->getType() == I.getType()) {
60206c3fb27SDimitry Andric     OpVS = VS;
60306c3fb27SDimitry Andric   } else {
60406c3fb27SDimitry Andric     OpVS = getVectorSplit(I.getOperand(0)->getType());
60506c3fb27SDimitry Andric     if (!OpVS || VS->NumPacked != OpVS->NumPacked)
60606c3fb27SDimitry Andric       return false;
60706c3fb27SDimitry Andric   }
60806c3fb27SDimitry Andric 
6090b57cec5SDimitry Andric   IRBuilder<> Builder(&I);
61006c3fb27SDimitry Andric   Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
61106c3fb27SDimitry Andric   assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
6120b57cec5SDimitry Andric   ValueVector Res;
61306c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
61406c3fb27SDimitry Andric   for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
61506c3fb27SDimitry Andric     Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
61606c3fb27SDimitry Andric   gather(&I, Res, *VS);
6170b57cec5SDimitry Andric   return true;
6180b57cec5SDimitry Andric }
6190b57cec5SDimitry Andric 
6200b57cec5SDimitry Andric // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
6210b57cec5SDimitry Andric // to create an instruction like I with operands X and Y and name Name.
6220b57cec5SDimitry Andric template<typename Splitter>
splitBinary(Instruction & I,const Splitter & Split)6230b57cec5SDimitry Andric bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
62406c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(I.getType());
62506c3fb27SDimitry Andric   if (!VS)
6260b57cec5SDimitry Andric     return false;
6270b57cec5SDimitry Andric 
62806c3fb27SDimitry Andric   std::optional<VectorSplit> OpVS;
62906c3fb27SDimitry Andric   if (I.getOperand(0)->getType() == I.getType()) {
63006c3fb27SDimitry Andric     OpVS = VS;
63106c3fb27SDimitry Andric   } else {
63206c3fb27SDimitry Andric     OpVS = getVectorSplit(I.getOperand(0)->getType());
63306c3fb27SDimitry Andric     if (!OpVS || VS->NumPacked != OpVS->NumPacked)
63406c3fb27SDimitry Andric       return false;
6355ffd83dbSDimitry Andric   }
63606c3fb27SDimitry Andric 
63706c3fb27SDimitry Andric   IRBuilder<> Builder(&I);
63806c3fb27SDimitry Andric   Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
63906c3fb27SDimitry Andric   Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
64006c3fb27SDimitry Andric   assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
64106c3fb27SDimitry Andric   assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
64206c3fb27SDimitry Andric   ValueVector Res;
64306c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
64406c3fb27SDimitry Andric   for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
64506c3fb27SDimitry Andric     Value *Op0 = VOp0[Frag];
64606c3fb27SDimitry Andric     Value *Op1 = VOp1[Frag];
64706c3fb27SDimitry Andric     Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
64806c3fb27SDimitry Andric   }
64906c3fb27SDimitry Andric   gather(&I, Res, *VS);
6500b57cec5SDimitry Andric   return true;
6510b57cec5SDimitry Andric }
6520b57cec5SDimitry Andric 
isTriviallyScalariable(Intrinsic::ID ID)6530b57cec5SDimitry Andric static bool isTriviallyScalariable(Intrinsic::ID ID) {
6540b57cec5SDimitry Andric   return isTriviallyVectorizable(ID);
6550b57cec5SDimitry Andric }
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric /// If a call to a vector typed intrinsic function, split into a scalar call per
6580b57cec5SDimitry Andric /// element if possible for the intrinsic.
splitCall(CallInst & CI)6590b57cec5SDimitry Andric bool ScalarizerVisitor::splitCall(CallInst &CI) {
66006c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
66106c3fb27SDimitry Andric   if (!VS)
6620b57cec5SDimitry Andric     return false;
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric   Function *F = CI.getCalledFunction();
6650b57cec5SDimitry Andric   if (!F)
6660b57cec5SDimitry Andric     return false;
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric   Intrinsic::ID ID = F->getIntrinsicID();
6690b57cec5SDimitry Andric   if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
6700b57cec5SDimitry Andric     return false;
6710b57cec5SDimitry Andric 
67206c3fb27SDimitry Andric   // unsigned NumElems = VT->getNumElements();
673349cc55cSDimitry Andric   unsigned NumArgs = CI.arg_size();
6740b57cec5SDimitry Andric 
6750b57cec5SDimitry Andric   ValueVector ScalarOperands(NumArgs);
6760b57cec5SDimitry Andric   SmallVector<Scatterer, 8> Scattered(NumArgs);
67706c3fb27SDimitry Andric   SmallVector<int> OverloadIdx(NumArgs, -1);
6780b57cec5SDimitry Andric 
679fe6060f1SDimitry Andric   SmallVector<llvm::Type *, 3> Tys;
68006c3fb27SDimitry Andric   // Add return type if intrinsic is overloaded on it.
68106c3fb27SDimitry Andric   if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
68206c3fb27SDimitry Andric     Tys.push_back(VS->SplitTy);
683fe6060f1SDimitry Andric 
6840b57cec5SDimitry Andric   // Assumes that any vector type has the same number of elements as the return
6850b57cec5SDimitry Andric   // vector type, which is true for all current intrinsics.
6860b57cec5SDimitry Andric   for (unsigned I = 0; I != NumArgs; ++I) {
6870b57cec5SDimitry Andric     Value *OpI = CI.getOperand(I);
6885f757f3fSDimitry Andric     if ([[maybe_unused]] auto *OpVecTy =
6895f757f3fSDimitry Andric             dyn_cast<FixedVectorType>(OpI->getType())) {
69006c3fb27SDimitry Andric       assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
69106c3fb27SDimitry Andric       std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
69206c3fb27SDimitry Andric       if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
69306c3fb27SDimitry Andric         // The natural split of the operand doesn't match the result. This could
69406c3fb27SDimitry Andric         // happen if the vector elements are different and the ScalarizeMinBits
69506c3fb27SDimitry Andric         // option is used.
69606c3fb27SDimitry Andric         //
69706c3fb27SDimitry Andric         // We could in principle handle this case as well, at the cost of
69806c3fb27SDimitry Andric         // complicating the scattering machinery to support multiple scattering
69906c3fb27SDimitry Andric         // granularities for a single value.
70006c3fb27SDimitry Andric         return false;
70106c3fb27SDimitry Andric       }
70206c3fb27SDimitry Andric 
70306c3fb27SDimitry Andric       Scattered[I] = scatter(&CI, OpI, *OpVS);
70406c3fb27SDimitry Andric       if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) {
70506c3fb27SDimitry Andric         OverloadIdx[I] = Tys.size();
70606c3fb27SDimitry Andric         Tys.push_back(OpVS->SplitTy);
70706c3fb27SDimitry Andric       }
7080b57cec5SDimitry Andric     } else {
7090b57cec5SDimitry Andric       ScalarOperands[I] = OpI;
71081ad6265SDimitry Andric       if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
711fe6060f1SDimitry Andric         Tys.push_back(OpI->getType());
7120b57cec5SDimitry Andric     }
7130b57cec5SDimitry Andric   }
7140b57cec5SDimitry Andric 
71506c3fb27SDimitry Andric   ValueVector Res(VS->NumFragments);
7160b57cec5SDimitry Andric   ValueVector ScalarCallOps(NumArgs);
7170b57cec5SDimitry Andric 
71806c3fb27SDimitry Andric   Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
7190b57cec5SDimitry Andric   IRBuilder<> Builder(&CI);
7200b57cec5SDimitry Andric 
7210b57cec5SDimitry Andric   // Perform actual scalarization, taking care to preserve any scalar operands.
72206c3fb27SDimitry Andric   for (unsigned I = 0; I < VS->NumFragments; ++I) {
72306c3fb27SDimitry Andric     bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
7240b57cec5SDimitry Andric     ScalarCallOps.clear();
7250b57cec5SDimitry Andric 
72606c3fb27SDimitry Andric     if (IsRemainder)
72706c3fb27SDimitry Andric       Tys[0] = VS->RemainderTy;
72806c3fb27SDimitry Andric 
7290b57cec5SDimitry Andric     for (unsigned J = 0; J != NumArgs; ++J) {
73006c3fb27SDimitry Andric       if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) {
7310b57cec5SDimitry Andric         ScalarCallOps.push_back(ScalarOperands[J]);
73206c3fb27SDimitry Andric       } else {
73306c3fb27SDimitry Andric         ScalarCallOps.push_back(Scattered[J][I]);
73406c3fb27SDimitry Andric         if (IsRemainder && OverloadIdx[J] >= 0)
73506c3fb27SDimitry Andric           Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
73606c3fb27SDimitry Andric       }
7370b57cec5SDimitry Andric     }
7380b57cec5SDimitry Andric 
73906c3fb27SDimitry Andric     if (IsRemainder)
74006c3fb27SDimitry Andric       NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
74106c3fb27SDimitry Andric 
74206c3fb27SDimitry Andric     Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
74306c3fb27SDimitry Andric                                 CI.getName() + ".i" + Twine(I));
7440b57cec5SDimitry Andric   }
7450b57cec5SDimitry Andric 
74606c3fb27SDimitry Andric   gather(&CI, Res, *VS);
7470b57cec5SDimitry Andric   return true;
7480b57cec5SDimitry Andric }
7490b57cec5SDimitry Andric 
visitSelectInst(SelectInst & SI)7500b57cec5SDimitry Andric bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
75106c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
75206c3fb27SDimitry Andric   if (!VS)
7530b57cec5SDimitry Andric     return false;
7540b57cec5SDimitry Andric 
75506c3fb27SDimitry Andric   std::optional<VectorSplit> CondVS;
75606c3fb27SDimitry Andric   if (isa<FixedVectorType>(SI.getCondition()->getType())) {
75706c3fb27SDimitry Andric     CondVS = getVectorSplit(SI.getCondition()->getType());
75806c3fb27SDimitry Andric     if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
75906c3fb27SDimitry Andric       // This happens when ScalarizeMinBits is used.
76006c3fb27SDimitry Andric       return false;
76106c3fb27SDimitry Andric     }
76206c3fb27SDimitry Andric   }
7630b57cec5SDimitry Andric 
76406c3fb27SDimitry Andric   IRBuilder<> Builder(&SI);
76506c3fb27SDimitry Andric   Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
76606c3fb27SDimitry Andric   Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
76706c3fb27SDimitry Andric   assert(VOp1.size() == VS->NumFragments && "Mismatched select");
76806c3fb27SDimitry Andric   assert(VOp2.size() == VS->NumFragments && "Mismatched select");
76906c3fb27SDimitry Andric   ValueVector Res;
77006c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
77106c3fb27SDimitry Andric 
77206c3fb27SDimitry Andric   if (CondVS) {
77306c3fb27SDimitry Andric     Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
77406c3fb27SDimitry Andric     assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
77506c3fb27SDimitry Andric     for (unsigned I = 0; I < VS->NumFragments; ++I) {
7765ffd83dbSDimitry Andric       Value *Op0 = VOp0[I];
7775ffd83dbSDimitry Andric       Value *Op1 = VOp1[I];
7785ffd83dbSDimitry Andric       Value *Op2 = VOp2[I];
7795ffd83dbSDimitry Andric       Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
7800b57cec5SDimitry Andric                                     SI.getName() + ".i" + Twine(I));
7815ffd83dbSDimitry Andric     }
7820b57cec5SDimitry Andric   } else {
7830b57cec5SDimitry Andric     Value *Op0 = SI.getOperand(0);
78406c3fb27SDimitry Andric     for (unsigned I = 0; I < VS->NumFragments; ++I) {
7855ffd83dbSDimitry Andric       Value *Op1 = VOp1[I];
7865ffd83dbSDimitry Andric       Value *Op2 = VOp2[I];
7875ffd83dbSDimitry Andric       Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
7880b57cec5SDimitry Andric                                     SI.getName() + ".i" + Twine(I));
7890b57cec5SDimitry Andric     }
7905ffd83dbSDimitry Andric   }
79106c3fb27SDimitry Andric   gather(&SI, Res, *VS);
7920b57cec5SDimitry Andric   return true;
7930b57cec5SDimitry Andric }
7940b57cec5SDimitry Andric 
visitICmpInst(ICmpInst & ICI)7950b57cec5SDimitry Andric bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
7960b57cec5SDimitry Andric   return splitBinary(ICI, ICmpSplitter(ICI));
7970b57cec5SDimitry Andric }
7980b57cec5SDimitry Andric 
visitFCmpInst(FCmpInst & FCI)7990b57cec5SDimitry Andric bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
8000b57cec5SDimitry Andric   return splitBinary(FCI, FCmpSplitter(FCI));
8010b57cec5SDimitry Andric }
8020b57cec5SDimitry Andric 
visitUnaryOperator(UnaryOperator & UO)8030b57cec5SDimitry Andric bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
8040b57cec5SDimitry Andric   return splitUnary(UO, UnarySplitter(UO));
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric 
visitBinaryOperator(BinaryOperator & BO)8070b57cec5SDimitry Andric bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
8080b57cec5SDimitry Andric   return splitBinary(BO, BinarySplitter(BO));
8090b57cec5SDimitry Andric }
8100b57cec5SDimitry Andric 
visitGetElementPtrInst(GetElementPtrInst & GEPI)8110b57cec5SDimitry Andric bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
81206c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
81306c3fb27SDimitry Andric   if (!VS)
8140b57cec5SDimitry Andric     return false;
8150b57cec5SDimitry Andric 
8160b57cec5SDimitry Andric   IRBuilder<> Builder(&GEPI);
8170b57cec5SDimitry Andric   unsigned NumIndices = GEPI.getNumIndices();
8180b57cec5SDimitry Andric 
81906c3fb27SDimitry Andric   // The base pointer and indices might be scalar even if it's a vector GEP.
82006c3fb27SDimitry Andric   SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
82106c3fb27SDimitry Andric   SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
8220b57cec5SDimitry Andric 
82306c3fb27SDimitry Andric   for (unsigned I = 0; I < 1 + NumIndices; ++I) {
82406c3fb27SDimitry Andric     if (auto *VecTy =
82506c3fb27SDimitry Andric             dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
82606c3fb27SDimitry Andric       std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
82706c3fb27SDimitry Andric       if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
82806c3fb27SDimitry Andric         // This can happen when ScalarizeMinBits is used.
82906c3fb27SDimitry Andric         return false;
83006c3fb27SDimitry Andric       }
83106c3fb27SDimitry Andric       ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
83206c3fb27SDimitry Andric     } else {
83306c3fb27SDimitry Andric       ScalarOps[I] = GEPI.getOperand(I);
83406c3fb27SDimitry Andric     }
8350b57cec5SDimitry Andric   }
8360b57cec5SDimitry Andric 
8370b57cec5SDimitry Andric   ValueVector Res;
83806c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
83906c3fb27SDimitry Andric   for (unsigned I = 0; I < VS->NumFragments; ++I) {
84006c3fb27SDimitry Andric     SmallVector<Value *, 8> SplitOps;
84106c3fb27SDimitry Andric     SplitOps.resize(1 + NumIndices);
84206c3fb27SDimitry Andric     for (unsigned J = 0; J < 1 + NumIndices; ++J) {
84306c3fb27SDimitry Andric       if (ScalarOps[J])
84406c3fb27SDimitry Andric         SplitOps[J] = ScalarOps[J];
84506c3fb27SDimitry Andric       else
84606c3fb27SDimitry Andric         SplitOps[J] = ScatterOps[J][I];
84706c3fb27SDimitry Andric     }
84806c3fb27SDimitry Andric     Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
84906c3fb27SDimitry Andric                                ArrayRef(SplitOps).drop_front(),
8500b57cec5SDimitry Andric                                GEPI.getName() + ".i" + Twine(I));
8510b57cec5SDimitry Andric     if (GEPI.isInBounds())
8520b57cec5SDimitry Andric       if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
8530b57cec5SDimitry Andric         NewGEPI->setIsInBounds();
8540b57cec5SDimitry Andric   }
85506c3fb27SDimitry Andric   gather(&GEPI, Res, *VS);
8560b57cec5SDimitry Andric   return true;
8570b57cec5SDimitry Andric }
8580b57cec5SDimitry Andric 
visitCastInst(CastInst & CI)8590b57cec5SDimitry Andric bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
86006c3fb27SDimitry Andric   std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
86106c3fb27SDimitry Andric   if (!DestVS)
8620b57cec5SDimitry Andric     return false;
8630b57cec5SDimitry Andric 
86406c3fb27SDimitry Andric   std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
86506c3fb27SDimitry Andric   if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
86606c3fb27SDimitry Andric     return false;
86706c3fb27SDimitry Andric 
8680b57cec5SDimitry Andric   IRBuilder<> Builder(&CI);
86906c3fb27SDimitry Andric   Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
87006c3fb27SDimitry Andric   assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
8710b57cec5SDimitry Andric   ValueVector Res;
87206c3fb27SDimitry Andric   Res.resize(DestVS->NumFragments);
87306c3fb27SDimitry Andric   for (unsigned I = 0; I < DestVS->NumFragments; ++I)
87406c3fb27SDimitry Andric     Res[I] =
87506c3fb27SDimitry Andric         Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
8760b57cec5SDimitry Andric                            CI.getName() + ".i" + Twine(I));
87706c3fb27SDimitry Andric   gather(&CI, Res, *DestVS);
8780b57cec5SDimitry Andric   return true;
8790b57cec5SDimitry Andric }
8800b57cec5SDimitry Andric 
visitBitCastInst(BitCastInst & BCI)8810b57cec5SDimitry Andric bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
88206c3fb27SDimitry Andric   std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
88306c3fb27SDimitry Andric   std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
88406c3fb27SDimitry Andric   if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
8850b57cec5SDimitry Andric     return false;
8860b57cec5SDimitry Andric 
88706c3fb27SDimitry Andric   const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
8880b57cec5SDimitry Andric 
88906c3fb27SDimitry Andric   // Vectors of pointers are always fully scalarized.
89006c3fb27SDimitry Andric   assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
89106c3fb27SDimitry Andric 
89206c3fb27SDimitry Andric   IRBuilder<> Builder(&BCI);
89306c3fb27SDimitry Andric   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
89406c3fb27SDimitry Andric   ValueVector Res;
89506c3fb27SDimitry Andric   Res.resize(DstVS->NumFragments);
89606c3fb27SDimitry Andric 
89706c3fb27SDimitry Andric   unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
89806c3fb27SDimitry Andric   unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
89906c3fb27SDimitry Andric 
90006c3fb27SDimitry Andric   if (isPointerTy || DstSplitBits == SrcSplitBits) {
90106c3fb27SDimitry Andric     assert(DstVS->NumFragments == SrcVS->NumFragments);
90206c3fb27SDimitry Andric     for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
90306c3fb27SDimitry Andric       Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
9040b57cec5SDimitry Andric                                      BCI.getName() + ".i" + Twine(I));
90506c3fb27SDimitry Andric     }
90606c3fb27SDimitry Andric   } else if (SrcSplitBits % DstSplitBits == 0) {
90706c3fb27SDimitry Andric     // Convert each source fragment to the same-sized destination vector and
90806c3fb27SDimitry Andric     // then scatter the result to the destination.
90906c3fb27SDimitry Andric     VectorSplit MidVS;
91006c3fb27SDimitry Andric     MidVS.NumPacked = DstVS->NumPacked;
91106c3fb27SDimitry Andric     MidVS.NumFragments = SrcSplitBits / DstSplitBits;
91206c3fb27SDimitry Andric     MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
91306c3fb27SDimitry Andric                                        MidVS.NumPacked * MidVS.NumFragments);
91406c3fb27SDimitry Andric     MidVS.SplitTy = DstVS->SplitTy;
91506c3fb27SDimitry Andric 
9160b57cec5SDimitry Andric     unsigned ResI = 0;
91706c3fb27SDimitry Andric     for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
91806c3fb27SDimitry Andric       Value *V = Op0[I];
91906c3fb27SDimitry Andric 
9200b57cec5SDimitry Andric       // Look through any existing bitcasts before converting to <N x t2>.
9210b57cec5SDimitry Andric       // In the best case, the resulting conversion might be a no-op.
92206c3fb27SDimitry Andric       Instruction *VI;
9230b57cec5SDimitry Andric       while ((VI = dyn_cast<Instruction>(V)) &&
9240b57cec5SDimitry Andric              VI->getOpcode() == Instruction::BitCast)
9250b57cec5SDimitry Andric         V = VI->getOperand(0);
92606c3fb27SDimitry Andric 
92706c3fb27SDimitry Andric       V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
92806c3fb27SDimitry Andric 
92906c3fb27SDimitry Andric       Scatterer Mid = scatter(&BCI, V, MidVS);
93006c3fb27SDimitry Andric       for (unsigned J = 0; J < MidVS.NumFragments; ++J)
93106c3fb27SDimitry Andric         Res[ResI++] = Mid[J];
93206c3fb27SDimitry Andric     }
93306c3fb27SDimitry Andric   } else if (DstSplitBits % SrcSplitBits == 0) {
93406c3fb27SDimitry Andric     // Gather enough source fragments to make up a destination fragment and
93506c3fb27SDimitry Andric     // then convert to the destination type.
93606c3fb27SDimitry Andric     VectorSplit MidVS;
93706c3fb27SDimitry Andric     MidVS.NumFragments = DstSplitBits / SrcSplitBits;
93806c3fb27SDimitry Andric     MidVS.NumPacked = SrcVS->NumPacked;
93906c3fb27SDimitry Andric     MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
94006c3fb27SDimitry Andric                                        MidVS.NumPacked * MidVS.NumFragments);
94106c3fb27SDimitry Andric     MidVS.SplitTy = SrcVS->SplitTy;
94206c3fb27SDimitry Andric 
94306c3fb27SDimitry Andric     unsigned SrcI = 0;
94406c3fb27SDimitry Andric     SmallVector<Value *, 8> ConcatOps;
94506c3fb27SDimitry Andric     ConcatOps.resize(MidVS.NumFragments);
94606c3fb27SDimitry Andric     for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
94706c3fb27SDimitry Andric       for (unsigned J = 0; J < MidVS.NumFragments; ++J)
94806c3fb27SDimitry Andric         ConcatOps[J] = Op0[SrcI++];
94906c3fb27SDimitry Andric       Value *V = concatenate(Builder, ConcatOps, MidVS,
95006c3fb27SDimitry Andric                              BCI.getName() + ".i" + Twine(I));
95106c3fb27SDimitry Andric       Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
95206c3fb27SDimitry Andric                                      BCI.getName() + ".i" + Twine(I));
9530b57cec5SDimitry Andric     }
9540b57cec5SDimitry Andric   } else {
95506c3fb27SDimitry Andric     return false;
9560b57cec5SDimitry Andric   }
95706c3fb27SDimitry Andric 
95806c3fb27SDimitry Andric   gather(&BCI, Res, *DstVS);
9590b57cec5SDimitry Andric   return true;
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric 
visitInsertElementInst(InsertElementInst & IEI)9625ffd83dbSDimitry Andric bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
96306c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
96406c3fb27SDimitry Andric   if (!VS)
9655ffd83dbSDimitry Andric     return false;
9665ffd83dbSDimitry Andric 
9675ffd83dbSDimitry Andric   IRBuilder<> Builder(&IEI);
96806c3fb27SDimitry Andric   Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
9695ffd83dbSDimitry Andric   Value *NewElt = IEI.getOperand(1);
9705ffd83dbSDimitry Andric   Value *InsIdx = IEI.getOperand(2);
9715ffd83dbSDimitry Andric 
9725ffd83dbSDimitry Andric   ValueVector Res;
97306c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
9745ffd83dbSDimitry Andric 
9755ffd83dbSDimitry Andric   if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
97606c3fb27SDimitry Andric     unsigned Idx = CI->getZExtValue();
97706c3fb27SDimitry Andric     unsigned Fragment = Idx / VS->NumPacked;
97806c3fb27SDimitry Andric     for (unsigned I = 0; I < VS->NumFragments; ++I) {
97906c3fb27SDimitry Andric       if (I == Fragment) {
98006c3fb27SDimitry Andric         bool IsPacked = VS->NumPacked > 1;
98106c3fb27SDimitry Andric         if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
98206c3fb27SDimitry Andric             !VS->RemainderTy->isVectorTy())
98306c3fb27SDimitry Andric           IsPacked = false;
98406c3fb27SDimitry Andric         if (IsPacked) {
98506c3fb27SDimitry Andric           Res[I] =
98606c3fb27SDimitry Andric               Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
9875ffd83dbSDimitry Andric         } else {
98806c3fb27SDimitry Andric           Res[I] = NewElt;
98906c3fb27SDimitry Andric         }
99006c3fb27SDimitry Andric       } else {
99106c3fb27SDimitry Andric         Res[I] = Op0[I];
99206c3fb27SDimitry Andric       }
99306c3fb27SDimitry Andric     }
99406c3fb27SDimitry Andric   } else {
99506c3fb27SDimitry Andric     // Never split a variable insertelement that isn't fully scalarized.
99606c3fb27SDimitry Andric     if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
9975ffd83dbSDimitry Andric       return false;
9985ffd83dbSDimitry Andric 
99906c3fb27SDimitry Andric     for (unsigned I = 0; I < VS->NumFragments; ++I) {
10005ffd83dbSDimitry Andric       Value *ShouldReplace =
10015ffd83dbSDimitry Andric           Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
10025ffd83dbSDimitry Andric                                InsIdx->getName() + ".is." + Twine(I));
10035ffd83dbSDimitry Andric       Value *OldElt = Op0[I];
10045ffd83dbSDimitry Andric       Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
10055ffd83dbSDimitry Andric                                     IEI.getName() + ".i" + Twine(I));
10065ffd83dbSDimitry Andric     }
10075ffd83dbSDimitry Andric   }
10085ffd83dbSDimitry Andric 
100906c3fb27SDimitry Andric   gather(&IEI, Res, *VS);
10105ffd83dbSDimitry Andric   return true;
10115ffd83dbSDimitry Andric }
10125ffd83dbSDimitry Andric 
visitExtractElementInst(ExtractElementInst & EEI)10135ffd83dbSDimitry Andric bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
101406c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
101506c3fb27SDimitry Andric   if (!VS)
10165ffd83dbSDimitry Andric     return false;
10175ffd83dbSDimitry Andric 
10185ffd83dbSDimitry Andric   IRBuilder<> Builder(&EEI);
101906c3fb27SDimitry Andric   Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
10205ffd83dbSDimitry Andric   Value *ExtIdx = EEI.getOperand(1);
10215ffd83dbSDimitry Andric 
10225ffd83dbSDimitry Andric   if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
102306c3fb27SDimitry Andric     unsigned Idx = CI->getZExtValue();
102406c3fb27SDimitry Andric     unsigned Fragment = Idx / VS->NumPacked;
102506c3fb27SDimitry Andric     Value *Res = Op0[Fragment];
102606c3fb27SDimitry Andric     bool IsPacked = VS->NumPacked > 1;
102706c3fb27SDimitry Andric     if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
102806c3fb27SDimitry Andric         !VS->RemainderTy->isVectorTy())
102906c3fb27SDimitry Andric       IsPacked = false;
103006c3fb27SDimitry Andric     if (IsPacked)
103106c3fb27SDimitry Andric       Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
103281ad6265SDimitry Andric     replaceUses(&EEI, Res);
10335ffd83dbSDimitry Andric     return true;
10345ffd83dbSDimitry Andric   }
10355ffd83dbSDimitry Andric 
103606c3fb27SDimitry Andric   // Never split a variable extractelement that isn't fully scalarized.
103706c3fb27SDimitry Andric   if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
10385ffd83dbSDimitry Andric     return false;
10395ffd83dbSDimitry Andric 
104006c3fb27SDimitry Andric   Value *Res = PoisonValue::get(VS->VecTy->getElementType());
104106c3fb27SDimitry Andric   for (unsigned I = 0; I < VS->NumFragments; ++I) {
10425ffd83dbSDimitry Andric     Value *ShouldExtract =
10435ffd83dbSDimitry Andric         Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
10445ffd83dbSDimitry Andric                              ExtIdx->getName() + ".is." + Twine(I));
10455ffd83dbSDimitry Andric     Value *Elt = Op0[I];
10465ffd83dbSDimitry Andric     Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
10475ffd83dbSDimitry Andric                                EEI.getName() + ".upto" + Twine(I));
10485ffd83dbSDimitry Andric   }
104981ad6265SDimitry Andric   replaceUses(&EEI, Res);
10505ffd83dbSDimitry Andric   return true;
10515ffd83dbSDimitry Andric }
10525ffd83dbSDimitry Andric 
visitShuffleVectorInst(ShuffleVectorInst & SVI)10530b57cec5SDimitry Andric bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
105406c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
105506c3fb27SDimitry Andric   std::optional<VectorSplit> VSOp =
105606c3fb27SDimitry Andric       getVectorSplit(SVI.getOperand(0)->getType());
105706c3fb27SDimitry Andric   if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
10580b57cec5SDimitry Andric     return false;
10590b57cec5SDimitry Andric 
106006c3fb27SDimitry Andric   Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
106106c3fb27SDimitry Andric   Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
10620b57cec5SDimitry Andric   ValueVector Res;
106306c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
10640b57cec5SDimitry Andric 
106506c3fb27SDimitry Andric   for (unsigned I = 0; I < VS->NumFragments; ++I) {
10660b57cec5SDimitry Andric     int Selector = SVI.getMaskValue(I);
10670b57cec5SDimitry Andric     if (Selector < 0)
106806c3fb27SDimitry Andric       Res[I] = PoisonValue::get(VS->VecTy->getElementType());
10690b57cec5SDimitry Andric     else if (unsigned(Selector) < Op0.size())
10700b57cec5SDimitry Andric       Res[I] = Op0[Selector];
10710b57cec5SDimitry Andric     else
10720b57cec5SDimitry Andric       Res[I] = Op1[Selector - Op0.size()];
10730b57cec5SDimitry Andric   }
107406c3fb27SDimitry Andric   gather(&SVI, Res, *VS);
10750b57cec5SDimitry Andric   return true;
10760b57cec5SDimitry Andric }
10770b57cec5SDimitry Andric 
visitPHINode(PHINode & PHI)10780b57cec5SDimitry Andric bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
107906c3fb27SDimitry Andric   std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
108006c3fb27SDimitry Andric   if (!VS)
10810b57cec5SDimitry Andric     return false;
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   IRBuilder<> Builder(&PHI);
10840b57cec5SDimitry Andric   ValueVector Res;
108506c3fb27SDimitry Andric   Res.resize(VS->NumFragments);
10860b57cec5SDimitry Andric 
10870b57cec5SDimitry Andric   unsigned NumOps = PHI.getNumOperands();
108806c3fb27SDimitry Andric   for (unsigned I = 0; I < VS->NumFragments; ++I) {
108906c3fb27SDimitry Andric     Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
10900b57cec5SDimitry Andric                                PHI.getName() + ".i" + Twine(I));
109106c3fb27SDimitry Andric   }
10920b57cec5SDimitry Andric 
10930b57cec5SDimitry Andric   for (unsigned I = 0; I < NumOps; ++I) {
109406c3fb27SDimitry Andric     Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
10950b57cec5SDimitry Andric     BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
109606c3fb27SDimitry Andric     for (unsigned J = 0; J < VS->NumFragments; ++J)
10970b57cec5SDimitry Andric       cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
10980b57cec5SDimitry Andric   }
109906c3fb27SDimitry Andric   gather(&PHI, Res, *VS);
11000b57cec5SDimitry Andric   return true;
11010b57cec5SDimitry Andric }
11020b57cec5SDimitry Andric 
visitLoadInst(LoadInst & LI)11030b57cec5SDimitry Andric bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
11040b57cec5SDimitry Andric   if (!ScalarizeLoadStore)
11050b57cec5SDimitry Andric     return false;
11060b57cec5SDimitry Andric   if (!LI.isSimple())
11070b57cec5SDimitry Andric     return false;
11080b57cec5SDimitry Andric 
1109bdd1243dSDimitry Andric   std::optional<VectorLayout> Layout = getVectorLayout(
1110*0fca6ea1SDimitry Andric       LI.getType(), LI.getAlign(), LI.getDataLayout());
11115ffd83dbSDimitry Andric   if (!Layout)
11120b57cec5SDimitry Andric     return false;
11130b57cec5SDimitry Andric 
11140b57cec5SDimitry Andric   IRBuilder<> Builder(&LI);
111506c3fb27SDimitry Andric   Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
11160b57cec5SDimitry Andric   ValueVector Res;
111706c3fb27SDimitry Andric   Res.resize(Layout->VS.NumFragments);
11180b57cec5SDimitry Andric 
111906c3fb27SDimitry Andric   for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
112006c3fb27SDimitry Andric     Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
112106c3fb27SDimitry Andric                                        Align(Layout->getFragmentAlign(I)),
11220b57cec5SDimitry Andric                                        LI.getName() + ".i" + Twine(I));
112306c3fb27SDimitry Andric   }
112406c3fb27SDimitry Andric   gather(&LI, Res, Layout->VS);
11250b57cec5SDimitry Andric   return true;
11260b57cec5SDimitry Andric }
11270b57cec5SDimitry Andric 
visitStoreInst(StoreInst & SI)11280b57cec5SDimitry Andric bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
11290b57cec5SDimitry Andric   if (!ScalarizeLoadStore)
11300b57cec5SDimitry Andric     return false;
11310b57cec5SDimitry Andric   if (!SI.isSimple())
11320b57cec5SDimitry Andric     return false;
11330b57cec5SDimitry Andric 
11340b57cec5SDimitry Andric   Value *FullValue = SI.getValueOperand();
1135bdd1243dSDimitry Andric   std::optional<VectorLayout> Layout = getVectorLayout(
1136*0fca6ea1SDimitry Andric       FullValue->getType(), SI.getAlign(), SI.getDataLayout());
11375ffd83dbSDimitry Andric   if (!Layout)
11380b57cec5SDimitry Andric     return false;
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric   IRBuilder<> Builder(&SI);
114106c3fb27SDimitry Andric   Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
114206c3fb27SDimitry Andric   Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
11430b57cec5SDimitry Andric 
11440b57cec5SDimitry Andric   ValueVector Stores;
114506c3fb27SDimitry Andric   Stores.resize(Layout->VS.NumFragments);
114606c3fb27SDimitry Andric   for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
11475ffd83dbSDimitry Andric     Value *Val = VVal[I];
11485ffd83dbSDimitry Andric     Value *Ptr = VPtr[I];
114906c3fb27SDimitry Andric     Stores[I] =
115006c3fb27SDimitry Andric         Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
11510b57cec5SDimitry Andric   }
11520b57cec5SDimitry Andric   transferMetadataAndIRFlags(&SI, Stores);
11530b57cec5SDimitry Andric   return true;
11540b57cec5SDimitry Andric }
11550b57cec5SDimitry Andric 
visitCallInst(CallInst & CI)11560b57cec5SDimitry Andric bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
11570b57cec5SDimitry Andric   return splitCall(CI);
11580b57cec5SDimitry Andric }
11590b57cec5SDimitry Andric 
visitFreezeInst(FreezeInst & FI)116006c3fb27SDimitry Andric bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
116106c3fb27SDimitry Andric   return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
116206c3fb27SDimitry Andric     return Builder.CreateFreeze(Op, Name);
116306c3fb27SDimitry Andric   });
116406c3fb27SDimitry Andric }
116506c3fb27SDimitry Andric 
11660b57cec5SDimitry Andric // Delete the instructions that we scalarized.  If a full vector result
11670b57cec5SDimitry Andric // is still needed, recreate it using InsertElements.
finish()11680b57cec5SDimitry Andric bool ScalarizerVisitor::finish() {
11690b57cec5SDimitry Andric   // The presence of data in Gathered or Scattered indicates changes
11700b57cec5SDimitry Andric   // made to the Function.
117181ad6265SDimitry Andric   if (Gathered.empty() && Scattered.empty() && !Scalarized)
11720b57cec5SDimitry Andric     return false;
11730b57cec5SDimitry Andric   for (const auto &GMI : Gathered) {
11740b57cec5SDimitry Andric     Instruction *Op = GMI.first;
11750b57cec5SDimitry Andric     ValueVector &CV = *GMI.second;
11760b57cec5SDimitry Andric     if (!Op->use_empty()) {
11770b57cec5SDimitry Andric       // The value is still needed, so recreate it using a series of
117806c3fb27SDimitry Andric       // insertelements and/or shufflevectors.
117906c3fb27SDimitry Andric       Value *Res;
1180bdd1243dSDimitry Andric       if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
11810b57cec5SDimitry Andric         BasicBlock *BB = Op->getParent();
11820b57cec5SDimitry Andric         IRBuilder<> Builder(Op);
11830b57cec5SDimitry Andric         if (isa<PHINode>(Op))
11840b57cec5SDimitry Andric           Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
118506c3fb27SDimitry Andric 
118606c3fb27SDimitry Andric         VectorSplit VS = *getVectorSplit(Ty);
118706c3fb27SDimitry Andric         assert(VS.NumFragments == CV.size());
118806c3fb27SDimitry Andric 
118906c3fb27SDimitry Andric         Res = concatenate(Builder, CV, VS, Op->getName());
119006c3fb27SDimitry Andric 
1191e8d8bef9SDimitry Andric         Res->takeName(Op);
11925ffd83dbSDimitry Andric       } else {
11935ffd83dbSDimitry Andric         assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
11945ffd83dbSDimitry Andric         Res = CV[0];
11955ffd83dbSDimitry Andric         if (Op == Res)
11965ffd83dbSDimitry Andric           continue;
11975ffd83dbSDimitry Andric       }
11980b57cec5SDimitry Andric       Op->replaceAllUsesWith(Res);
11990b57cec5SDimitry Andric     }
12005ffd83dbSDimitry Andric     PotentiallyDeadInstrs.emplace_back(Op);
12010b57cec5SDimitry Andric   }
12020b57cec5SDimitry Andric   Gathered.clear();
12030b57cec5SDimitry Andric   Scattered.clear();
120481ad6265SDimitry Andric   Scalarized = false;
12055ffd83dbSDimitry Andric 
12065ffd83dbSDimitry Andric   RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
12075ffd83dbSDimitry Andric 
12080b57cec5SDimitry Andric   return true;
12090b57cec5SDimitry Andric }
12100b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)12110b57cec5SDimitry Andric PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
1212480093f4SDimitry Andric   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
12135f757f3fSDimitry Andric   ScalarizerVisitor Impl(DT, Options);
12140b57cec5SDimitry Andric   bool Changed = Impl.visit(F);
1215480093f4SDimitry Andric   PreservedAnalyses PA;
1216480093f4SDimitry Andric   PA.preserve<DominatorTreeAnalysis>();
1217480093f4SDimitry Andric   return Changed ? PA : PreservedAnalyses::all();
12180b57cec5SDimitry Andric }
1219