181ad6265SDimitry Andric //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric ///
981ad6265SDimitry Andric /// \file
1081ad6265SDimitry Andric /// This file contains implementations for different VPlan recipes.
1181ad6265SDimitry Andric ///
1281ad6265SDimitry Andric //===----------------------------------------------------------------------===//
1381ad6265SDimitry Andric
1481ad6265SDimitry Andric #include "VPlan.h"
155f757f3fSDimitry Andric #include "VPlanAnalysis.h"
1681ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h"
1781ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
1881ad6265SDimitry Andric #include "llvm/ADT/Twine.h"
1981ad6265SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
2081ad6265SDimitry Andric #include "llvm/IR/BasicBlock.h"
2181ad6265SDimitry Andric #include "llvm/IR/IRBuilder.h"
2281ad6265SDimitry Andric #include "llvm/IR/Instruction.h"
2381ad6265SDimitry Andric #include "llvm/IR/Instructions.h"
2481ad6265SDimitry Andric #include "llvm/IR/Type.h"
2581ad6265SDimitry Andric #include "llvm/IR/Value.h"
2681ad6265SDimitry Andric #include "llvm/Support/Casting.h"
2781ad6265SDimitry Andric #include "llvm/Support/CommandLine.h"
2881ad6265SDimitry Andric #include "llvm/Support/Debug.h"
2981ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h"
30753f127fSDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
311db9f3b2SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
3281ad6265SDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3381ad6265SDimitry Andric #include <cassert>
3481ad6265SDimitry Andric
3581ad6265SDimitry Andric using namespace llvm;
3681ad6265SDimitry Andric
37753f127fSDimitry Andric using VectorParts = SmallVector<Value *, 2>;
38753f127fSDimitry Andric
3906c3fb27SDimitry Andric namespace llvm {
4081ad6265SDimitry Andric extern cl::opt<bool> EnableVPlanNativePath;
4106c3fb27SDimitry Andric }
42*0fca6ea1SDimitry Andric extern cl::opt<unsigned> ForceTargetInstructionCost;
4381ad6265SDimitry Andric
44753f127fSDimitry Andric #define LV_NAME "loop-vectorize"
45753f127fSDimitry Andric #define DEBUG_TYPE LV_NAME
46753f127fSDimitry Andric
mayWriteToMemory() const4781ad6265SDimitry Andric bool VPRecipeBase::mayWriteToMemory() const {
4881ad6265SDimitry Andric switch (getVPDefID()) {
495f757f3fSDimitry Andric case VPInterleaveSC:
505f757f3fSDimitry Andric return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
51*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC:
52*0fca6ea1SDimitry Andric case VPWidenStoreSC:
53*0fca6ea1SDimitry Andric return true;
5481ad6265SDimitry Andric case VPReplicateSC:
5581ad6265SDimitry Andric return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
5681ad6265SDimitry Andric ->mayWriteToMemory();
57*0fca6ea1SDimitry Andric case VPWidenCallSC:
58*0fca6ea1SDimitry Andric return !cast<VPWidenCallRecipe>(this)
59*0fca6ea1SDimitry Andric ->getCalledScalarFunction()
60*0fca6ea1SDimitry Andric ->onlyReadsMemory();
6181ad6265SDimitry Andric case VPBranchOnMaskSC:
62bdd1243dSDimitry Andric case VPScalarIVStepsSC:
6306c3fb27SDimitry Andric case VPPredInstPHISC:
6481ad6265SDimitry Andric return false;
6581ad6265SDimitry Andric case VPBlendSC:
66*0fca6ea1SDimitry Andric case VPReductionEVLSC:
6781ad6265SDimitry Andric case VPReductionSC:
6806c3fb27SDimitry Andric case VPWidenCanonicalIVSC:
6906c3fb27SDimitry Andric case VPWidenCastSC:
7006c3fb27SDimitry Andric case VPWidenGEPSC:
7106c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC:
72*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC:
73*0fca6ea1SDimitry Andric case VPWidenLoadSC:
7406c3fb27SDimitry Andric case VPWidenPHISC:
7506c3fb27SDimitry Andric case VPWidenSC:
7681ad6265SDimitry Andric case VPWidenSelectSC: {
7781ad6265SDimitry Andric const Instruction *I =
7881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
7981ad6265SDimitry Andric (void)I;
8081ad6265SDimitry Andric assert((!I || !I->mayWriteToMemory()) &&
8181ad6265SDimitry Andric "underlying instruction may write to memory");
8281ad6265SDimitry Andric return false;
8381ad6265SDimitry Andric }
8481ad6265SDimitry Andric default:
8581ad6265SDimitry Andric return true;
8681ad6265SDimitry Andric }
8781ad6265SDimitry Andric }
8881ad6265SDimitry Andric
mayReadFromMemory() const8981ad6265SDimitry Andric bool VPRecipeBase::mayReadFromMemory() const {
9081ad6265SDimitry Andric switch (getVPDefID()) {
91*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC:
92*0fca6ea1SDimitry Andric case VPWidenLoadSC:
93*0fca6ea1SDimitry Andric return true;
9481ad6265SDimitry Andric case VPReplicateSC:
9581ad6265SDimitry Andric return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
9681ad6265SDimitry Andric ->mayReadFromMemory();
97*0fca6ea1SDimitry Andric case VPWidenCallSC:
98*0fca6ea1SDimitry Andric return !cast<VPWidenCallRecipe>(this)
99*0fca6ea1SDimitry Andric ->getCalledScalarFunction()
100*0fca6ea1SDimitry Andric ->onlyWritesMemory();
10181ad6265SDimitry Andric case VPBranchOnMaskSC:
10206c3fb27SDimitry Andric case VPPredInstPHISC:
103*0fca6ea1SDimitry Andric case VPScalarIVStepsSC:
104*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC:
105*0fca6ea1SDimitry Andric case VPWidenStoreSC:
10681ad6265SDimitry Andric return false;
10781ad6265SDimitry Andric case VPBlendSC:
108*0fca6ea1SDimitry Andric case VPReductionEVLSC:
10981ad6265SDimitry Andric case VPReductionSC:
11006c3fb27SDimitry Andric case VPWidenCanonicalIVSC:
11106c3fb27SDimitry Andric case VPWidenCastSC:
11206c3fb27SDimitry Andric case VPWidenGEPSC:
11306c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC:
11406c3fb27SDimitry Andric case VPWidenPHISC:
11506c3fb27SDimitry Andric case VPWidenSC:
11681ad6265SDimitry Andric case VPWidenSelectSC: {
11781ad6265SDimitry Andric const Instruction *I =
11881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
11981ad6265SDimitry Andric (void)I;
12081ad6265SDimitry Andric assert((!I || !I->mayReadFromMemory()) &&
12181ad6265SDimitry Andric "underlying instruction may read from memory");
12281ad6265SDimitry Andric return false;
12381ad6265SDimitry Andric }
12481ad6265SDimitry Andric default:
12581ad6265SDimitry Andric return true;
12681ad6265SDimitry Andric }
12781ad6265SDimitry Andric }
12881ad6265SDimitry Andric
mayHaveSideEffects() const12981ad6265SDimitry Andric bool VPRecipeBase::mayHaveSideEffects() const {
13081ad6265SDimitry Andric switch (getVPDefID()) {
131bdd1243dSDimitry Andric case VPDerivedIVSC:
132bdd1243dSDimitry Andric case VPPredInstPHISC:
133*0fca6ea1SDimitry Andric case VPScalarCastSC:
134bdd1243dSDimitry Andric return false;
1355f757f3fSDimitry Andric case VPInstructionSC:
1365f757f3fSDimitry Andric switch (cast<VPInstruction>(this)->getOpcode()) {
1371db9f3b2SDimitry Andric case Instruction::Or:
1385f757f3fSDimitry Andric case Instruction::ICmp:
1391db9f3b2SDimitry Andric case Instruction::Select:
1405f757f3fSDimitry Andric case VPInstruction::Not:
1415f757f3fSDimitry Andric case VPInstruction::CalculateTripCountMinusVF:
1425f757f3fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart:
143*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd:
144*0fca6ea1SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice:
145*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd:
146*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd:
1475f757f3fSDimitry Andric return false;
1485f757f3fSDimitry Andric default:
1495f757f3fSDimitry Andric return true;
1505f757f3fSDimitry Andric }
151*0fca6ea1SDimitry Andric case VPWidenCallSC: {
152*0fca6ea1SDimitry Andric Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
153*0fca6ea1SDimitry Andric return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();
154*0fca6ea1SDimitry Andric }
15581ad6265SDimitry Andric case VPBlendSC:
156*0fca6ea1SDimitry Andric case VPReductionEVLSC:
15781ad6265SDimitry Andric case VPReductionSC:
15806c3fb27SDimitry Andric case VPScalarIVStepsSC:
15906c3fb27SDimitry Andric case VPWidenCanonicalIVSC:
16006c3fb27SDimitry Andric case VPWidenCastSC:
16106c3fb27SDimitry Andric case VPWidenGEPSC:
16206c3fb27SDimitry Andric case VPWidenIntOrFpInductionSC:
16306c3fb27SDimitry Andric case VPWidenPHISC:
16406c3fb27SDimitry Andric case VPWidenPointerInductionSC:
16506c3fb27SDimitry Andric case VPWidenSC:
16606c3fb27SDimitry Andric case VPWidenSelectSC: {
16781ad6265SDimitry Andric const Instruction *I =
16881ad6265SDimitry Andric dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
16981ad6265SDimitry Andric (void)I;
17081ad6265SDimitry Andric assert((!I || !I->mayHaveSideEffects()) &&
17181ad6265SDimitry Andric "underlying instruction has side-effects");
17281ad6265SDimitry Andric return false;
17381ad6265SDimitry Andric }
1745f757f3fSDimitry Andric case VPInterleaveSC:
1755f757f3fSDimitry Andric return mayWriteToMemory();
176*0fca6ea1SDimitry Andric case VPWidenLoadEVLSC:
177*0fca6ea1SDimitry Andric case VPWidenLoadSC:
178*0fca6ea1SDimitry Andric case VPWidenStoreEVLSC:
179*0fca6ea1SDimitry Andric case VPWidenStoreSC:
180*0fca6ea1SDimitry Andric assert(
181*0fca6ea1SDimitry Andric cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
182*0fca6ea1SDimitry Andric mayWriteToMemory() &&
18306c3fb27SDimitry Andric "mayHaveSideffects result for ingredient differs from this "
18406c3fb27SDimitry Andric "implementation");
18506c3fb27SDimitry Andric return mayWriteToMemory();
18681ad6265SDimitry Andric case VPReplicateSC: {
18781ad6265SDimitry Andric auto *R = cast<VPReplicateRecipe>(this);
18881ad6265SDimitry Andric return R->getUnderlyingInstr()->mayHaveSideEffects();
18981ad6265SDimitry Andric }
19081ad6265SDimitry Andric default:
19181ad6265SDimitry Andric return true;
19281ad6265SDimitry Andric }
19381ad6265SDimitry Andric }
19481ad6265SDimitry Andric
fixPhi(VPlan & Plan,VPTransformState & State)19581ad6265SDimitry Andric void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
19681ad6265SDimitry Andric VPValue *ExitValue = getOperand(0);
197*0fca6ea1SDimitry Andric auto Lane = vputils::isUniformAfterVectorization(ExitValue)
198*0fca6ea1SDimitry Andric ? VPLane::getFirstLane()
199*0fca6ea1SDimitry Andric : VPLane::getLastLaneForVF(State.VF);
2005f757f3fSDimitry Andric VPBasicBlock *MiddleVPBB =
2015f757f3fSDimitry Andric cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
202*0fca6ea1SDimitry Andric VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe();
203*0fca6ea1SDimitry Andric auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr;
204*0fca6ea1SDimitry Andric // Values leaving the vector loop reach live out phi's in the exiting block
205*0fca6ea1SDimitry Andric // via middle block.
206*0fca6ea1SDimitry Andric auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion()
207*0fca6ea1SDimitry Andric ? MiddleVPBB
208*0fca6ea1SDimitry Andric : ExitingVPBB;
209*0fca6ea1SDimitry Andric BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
210*0fca6ea1SDimitry Andric // Set insertion point in PredBB in case an extract needs to be generated.
211*0fca6ea1SDimitry Andric // TODO: Model extracts explicitly.
212*0fca6ea1SDimitry Andric State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
213*0fca6ea1SDimitry Andric Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane));
214*0fca6ea1SDimitry Andric if (Phi->getBasicBlockIndex(PredBB) != -1)
215*0fca6ea1SDimitry Andric Phi->setIncomingValueForBlock(PredBB, V);
216*0fca6ea1SDimitry Andric else
217*0fca6ea1SDimitry Andric Phi->addIncoming(V, PredBB);
21881ad6265SDimitry Andric }
21981ad6265SDimitry Andric
22006c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,VPSlotTracker & SlotTracker) const22106c3fb27SDimitry Andric void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
22206c3fb27SDimitry Andric O << "Live-out ";
22306c3fb27SDimitry Andric getPhi()->printAsOperand(O);
22406c3fb27SDimitry Andric O << " = ";
22506c3fb27SDimitry Andric getOperand(0)->printAsOperand(O, SlotTracker);
22606c3fb27SDimitry Andric O << "\n";
22706c3fb27SDimitry Andric }
22806c3fb27SDimitry Andric #endif
22906c3fb27SDimitry Andric
insertBefore(VPRecipeBase * InsertPos)23081ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
23181ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock");
23281ad6265SDimitry Andric assert(InsertPos->getParent() &&
23381ad6265SDimitry Andric "Insertion position not in any VPBasicBlock");
234*0fca6ea1SDimitry Andric InsertPos->getParent()->insert(this, InsertPos->getIterator());
23581ad6265SDimitry Andric }
23681ad6265SDimitry Andric
insertBefore(VPBasicBlock & BB,iplist<VPRecipeBase>::iterator I)23781ad6265SDimitry Andric void VPRecipeBase::insertBefore(VPBasicBlock &BB,
23881ad6265SDimitry Andric iplist<VPRecipeBase>::iterator I) {
23981ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock");
24081ad6265SDimitry Andric assert(I == BB.end() || I->getParent() == &BB);
241*0fca6ea1SDimitry Andric BB.insert(this, I);
24281ad6265SDimitry Andric }
24381ad6265SDimitry Andric
insertAfter(VPRecipeBase * InsertPos)24481ad6265SDimitry Andric void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
24581ad6265SDimitry Andric assert(!Parent && "Recipe already in some VPBasicBlock");
24681ad6265SDimitry Andric assert(InsertPos->getParent() &&
24781ad6265SDimitry Andric "Insertion position not in any VPBasicBlock");
248*0fca6ea1SDimitry Andric InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));
24981ad6265SDimitry Andric }
25081ad6265SDimitry Andric
removeFromParent()25181ad6265SDimitry Andric void VPRecipeBase::removeFromParent() {
25281ad6265SDimitry Andric assert(getParent() && "Recipe not in any VPBasicBlock");
25381ad6265SDimitry Andric getParent()->getRecipeList().remove(getIterator());
25481ad6265SDimitry Andric Parent = nullptr;
25581ad6265SDimitry Andric }
25681ad6265SDimitry Andric
eraseFromParent()25781ad6265SDimitry Andric iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
25881ad6265SDimitry Andric assert(getParent() && "Recipe not in any VPBasicBlock");
25981ad6265SDimitry Andric return getParent()->getRecipeList().erase(getIterator());
26081ad6265SDimitry Andric }
26181ad6265SDimitry Andric
moveAfter(VPRecipeBase * InsertPos)26281ad6265SDimitry Andric void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
26381ad6265SDimitry Andric removeFromParent();
26481ad6265SDimitry Andric insertAfter(InsertPos);
26581ad6265SDimitry Andric }
26681ad6265SDimitry Andric
moveBefore(VPBasicBlock & BB,iplist<VPRecipeBase>::iterator I)26781ad6265SDimitry Andric void VPRecipeBase::moveBefore(VPBasicBlock &BB,
26881ad6265SDimitry Andric iplist<VPRecipeBase>::iterator I) {
26981ad6265SDimitry Andric removeFromParent();
27081ad6265SDimitry Andric insertBefore(BB, I);
27181ad6265SDimitry Andric }
27281ad6265SDimitry Andric
273*0fca6ea1SDimitry Andric /// Return the underlying instruction to be used for computing \p R's cost via
274*0fca6ea1SDimitry Andric /// the legacy cost model. Return nullptr if there's no suitable instruction.
getInstructionForCost(const VPRecipeBase * R)275*0fca6ea1SDimitry Andric static Instruction *getInstructionForCost(const VPRecipeBase *R) {
276*0fca6ea1SDimitry Andric if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
277*0fca6ea1SDimitry Andric return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
278*0fca6ea1SDimitry Andric if (auto *IG = dyn_cast<VPInterleaveRecipe>(R))
279*0fca6ea1SDimitry Andric return IG->getInsertPos();
280*0fca6ea1SDimitry Andric if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
281*0fca6ea1SDimitry Andric return &WidenMem->getIngredient();
282*0fca6ea1SDimitry Andric return nullptr;
283*0fca6ea1SDimitry Andric }
284*0fca6ea1SDimitry Andric
cost(ElementCount VF,VPCostContext & Ctx)285*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
286*0fca6ea1SDimitry Andric if (auto *UI = getInstructionForCost(this))
287*0fca6ea1SDimitry Andric if (Ctx.skipCostComputation(UI, VF.isVector()))
288*0fca6ea1SDimitry Andric return 0;
289*0fca6ea1SDimitry Andric
290*0fca6ea1SDimitry Andric InstructionCost RecipeCost = computeCost(VF, Ctx);
291*0fca6ea1SDimitry Andric if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&
292*0fca6ea1SDimitry Andric RecipeCost.isValid())
293*0fca6ea1SDimitry Andric RecipeCost = InstructionCost(ForceTargetInstructionCost);
294*0fca6ea1SDimitry Andric
295*0fca6ea1SDimitry Andric LLVM_DEBUG({
296*0fca6ea1SDimitry Andric dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";
297*0fca6ea1SDimitry Andric dump();
298*0fca6ea1SDimitry Andric });
299*0fca6ea1SDimitry Andric return RecipeCost;
300*0fca6ea1SDimitry Andric }
301*0fca6ea1SDimitry Andric
computeCost(ElementCount VF,VPCostContext & Ctx) const302*0fca6ea1SDimitry Andric InstructionCost VPRecipeBase::computeCost(ElementCount VF,
303*0fca6ea1SDimitry Andric VPCostContext &Ctx) const {
304*0fca6ea1SDimitry Andric // Compute the cost for the recipe falling back to the legacy cost model using
305*0fca6ea1SDimitry Andric // the underlying instruction. If there is no underlying instruction, returns
306*0fca6ea1SDimitry Andric // 0.
307*0fca6ea1SDimitry Andric Instruction *UI = getInstructionForCost(this);
308*0fca6ea1SDimitry Andric if (UI && isa<VPReplicateRecipe>(this)) {
309*0fca6ea1SDimitry Andric // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan
310*0fca6ea1SDimitry Andric // transform, avoid computing their cost multiple times for now.
311*0fca6ea1SDimitry Andric Ctx.SkipCostComputation.insert(UI);
312*0fca6ea1SDimitry Andric }
313*0fca6ea1SDimitry Andric return UI ? Ctx.getLegacyCost(UI, VF) : 0;
314*0fca6ea1SDimitry Andric }
315*0fca6ea1SDimitry Andric
getFastMathFlags() const3165f757f3fSDimitry Andric FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
3175f757f3fSDimitry Andric assert(OpType == OperationType::FPMathOp &&
3185f757f3fSDimitry Andric "recipe doesn't have fast math flags");
3195f757f3fSDimitry Andric FastMathFlags Res;
3205f757f3fSDimitry Andric Res.setAllowReassoc(FMFs.AllowReassoc);
3215f757f3fSDimitry Andric Res.setNoNaNs(FMFs.NoNaNs);
3225f757f3fSDimitry Andric Res.setNoInfs(FMFs.NoInfs);
3235f757f3fSDimitry Andric Res.setNoSignedZeros(FMFs.NoSignedZeros);
3245f757f3fSDimitry Andric Res.setAllowReciprocal(FMFs.AllowReciprocal);
3255f757f3fSDimitry Andric Res.setAllowContract(FMFs.AllowContract);
3265f757f3fSDimitry Andric Res.setApproxFunc(FMFs.ApproxFunc);
3275f757f3fSDimitry Andric return Res;
3285f757f3fSDimitry Andric }
3295f757f3fSDimitry Andric
VPInstruction(unsigned Opcode,CmpInst::Predicate Pred,VPValue * A,VPValue * B,DebugLoc DL,const Twine & Name)3305f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
3315f757f3fSDimitry Andric VPValue *A, VPValue *B, DebugLoc DL,
3325f757f3fSDimitry Andric const Twine &Name)
3335f757f3fSDimitry Andric : VPRecipeWithIRFlags(VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
3345f757f3fSDimitry Andric Pred, DL),
3357a6dacacSDimitry Andric Opcode(Opcode), Name(Name.str()) {
3365f757f3fSDimitry Andric assert(Opcode == Instruction::ICmp &&
3375f757f3fSDimitry Andric "only ICmp predicates supported at the moment");
3385f757f3fSDimitry Andric }
3395f757f3fSDimitry Andric
VPInstruction(unsigned Opcode,std::initializer_list<VPValue * > Operands,FastMathFlags FMFs,DebugLoc DL,const Twine & Name)3405f757f3fSDimitry Andric VPInstruction::VPInstruction(unsigned Opcode,
3415f757f3fSDimitry Andric std::initializer_list<VPValue *> Operands,
3425f757f3fSDimitry Andric FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
3435f757f3fSDimitry Andric : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, FMFs, DL),
3447a6dacacSDimitry Andric Opcode(Opcode), Name(Name.str()) {
3455f757f3fSDimitry Andric // Make sure the VPInstruction is a floating-point operation.
3465f757f3fSDimitry Andric assert(isFPMathOp() && "this op can't take fast-math flags");
3475f757f3fSDimitry Andric }
3485f757f3fSDimitry Andric
doesGeneratePerAllLanes() const349*0fca6ea1SDimitry Andric bool VPInstruction::doesGeneratePerAllLanes() const {
350*0fca6ea1SDimitry Andric return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
351*0fca6ea1SDimitry Andric }
352*0fca6ea1SDimitry Andric
canGenerateScalarForFirstLane() const353*0fca6ea1SDimitry Andric bool VPInstruction::canGenerateScalarForFirstLane() const {
354*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode()))
355*0fca6ea1SDimitry Andric return true;
356*0fca6ea1SDimitry Andric if (isSingleScalar() || isVectorToScalar())
357*0fca6ea1SDimitry Andric return true;
358*0fca6ea1SDimitry Andric switch (Opcode) {
359*0fca6ea1SDimitry Andric case Instruction::ICmp:
360*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond:
361*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount:
362*0fca6ea1SDimitry Andric case VPInstruction::CalculateTripCountMinusVF:
363*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart:
364*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd:
365*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength:
366*0fca6ea1SDimitry Andric return true;
367*0fca6ea1SDimitry Andric default:
368*0fca6ea1SDimitry Andric return false;
369*0fca6ea1SDimitry Andric }
370*0fca6ea1SDimitry Andric }
371*0fca6ea1SDimitry Andric
generatePerLane(VPTransformState & State,const VPIteration & Lane)372*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerLane(VPTransformState &State,
373*0fca6ea1SDimitry Andric const VPIteration &Lane) {
37481ad6265SDimitry Andric IRBuilderBase &Builder = State.Builder;
375*0fca6ea1SDimitry Andric
376*0fca6ea1SDimitry Andric assert(getOpcode() == VPInstruction::PtrAdd &&
377*0fca6ea1SDimitry Andric "only PtrAdd opcodes are supported for now");
378*0fca6ea1SDimitry Andric return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
379*0fca6ea1SDimitry Andric State.get(getOperand(1), Lane), Name);
380*0fca6ea1SDimitry Andric }
381*0fca6ea1SDimitry Andric
generatePerPart(VPTransformState & State,unsigned Part)382*0fca6ea1SDimitry Andric Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
383*0fca6ea1SDimitry Andric IRBuilderBase &Builder = State.Builder;
38481ad6265SDimitry Andric
38581ad6265SDimitry Andric if (Instruction::isBinaryOp(getOpcode())) {
386*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
387*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
388*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
3895f757f3fSDimitry Andric auto *Res =
3905f757f3fSDimitry Andric Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
3915f757f3fSDimitry Andric if (auto *I = dyn_cast<Instruction>(Res))
3925f757f3fSDimitry Andric setFlags(I);
3935f757f3fSDimitry Andric return Res;
39481ad6265SDimitry Andric }
39581ad6265SDimitry Andric
39681ad6265SDimitry Andric switch (getOpcode()) {
39781ad6265SDimitry Andric case VPInstruction::Not: {
39881ad6265SDimitry Andric Value *A = State.get(getOperand(0), Part);
39906c3fb27SDimitry Andric return Builder.CreateNot(A, Name);
40081ad6265SDimitry Andric }
4015f757f3fSDimitry Andric case Instruction::ICmp: {
402*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
403*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
404*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
4055f757f3fSDimitry Andric return Builder.CreateCmp(getPredicate(), A, B, Name);
40681ad6265SDimitry Andric }
40781ad6265SDimitry Andric case Instruction::Select: {
40881ad6265SDimitry Andric Value *Cond = State.get(getOperand(0), Part);
40981ad6265SDimitry Andric Value *Op1 = State.get(getOperand(1), Part);
41081ad6265SDimitry Andric Value *Op2 = State.get(getOperand(2), Part);
41106c3fb27SDimitry Andric return Builder.CreateSelect(Cond, Op1, Op2, Name);
41281ad6265SDimitry Andric }
41381ad6265SDimitry Andric case VPInstruction::ActiveLaneMask: {
41481ad6265SDimitry Andric // Get first lane of vector induction variable.
41581ad6265SDimitry Andric Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
41681ad6265SDimitry Andric // Get the original loop tripcount.
41706c3fb27SDimitry Andric Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
41881ad6265SDimitry Andric
419*0fca6ea1SDimitry Andric // If this part of the active lane mask is scalar, generate the CMP directly
420*0fca6ea1SDimitry Andric // to avoid unnecessary extracts.
421*0fca6ea1SDimitry Andric if (State.VF.isScalar())
422*0fca6ea1SDimitry Andric return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,
423*0fca6ea1SDimitry Andric Name);
424*0fca6ea1SDimitry Andric
42581ad6265SDimitry Andric auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
42681ad6265SDimitry Andric auto *PredTy = VectorType::get(Int1Ty, State.VF);
42706c3fb27SDimitry Andric return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
42806c3fb27SDimitry Andric {PredTy, ScalarTC->getType()},
429753f127fSDimitry Andric {VIVElem0, ScalarTC}, nullptr, Name);
43081ad6265SDimitry Andric }
43181ad6265SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice: {
43281ad6265SDimitry Andric // Generate code to combine the previous and current values in vector v3.
43381ad6265SDimitry Andric //
43481ad6265SDimitry Andric // vector.ph:
43581ad6265SDimitry Andric // v_init = vector(..., ..., ..., a[-1])
43681ad6265SDimitry Andric // br vector.body
43781ad6265SDimitry Andric //
43881ad6265SDimitry Andric // vector.body
43981ad6265SDimitry Andric // i = phi [0, vector.ph], [i+4, vector.body]
44081ad6265SDimitry Andric // v1 = phi [v_init, vector.ph], [v2, vector.body]
44181ad6265SDimitry Andric // v2 = a[i, i+1, i+2, i+3];
44281ad6265SDimitry Andric // v3 = vector(v1(3), v2(0, 1, 2))
44381ad6265SDimitry Andric
44481ad6265SDimitry Andric // For the first part, use the recurrence phi (v1), otherwise v2.
44581ad6265SDimitry Andric auto *V1 = State.get(getOperand(0), 0);
44681ad6265SDimitry Andric Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
44706c3fb27SDimitry Andric if (!PartMinus1->getType()->isVectorTy())
44806c3fb27SDimitry Andric return PartMinus1;
44981ad6265SDimitry Andric Value *V2 = State.get(getOperand(1), Part);
45006c3fb27SDimitry Andric return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
45181ad6265SDimitry Andric }
45206c3fb27SDimitry Andric case VPInstruction::CalculateTripCountMinusVF: {
453*0fca6ea1SDimitry Andric if (Part != 0)
454*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true);
455*0fca6ea1SDimitry Andric
45606c3fb27SDimitry Andric Value *ScalarTC = State.get(getOperand(0), {0, 0});
45706c3fb27SDimitry Andric Value *Step =
45806c3fb27SDimitry Andric createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
45906c3fb27SDimitry Andric Value *Sub = Builder.CreateSub(ScalarTC, Step);
46006c3fb27SDimitry Andric Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
46106c3fb27SDimitry Andric Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
46206c3fb27SDimitry Andric return Builder.CreateSelect(Cmp, Sub, Zero);
46381ad6265SDimitry Andric }
464*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength: {
465*0fca6ea1SDimitry Andric // Compute EVL
466*0fca6ea1SDimitry Andric auto GetEVL = [=](VPTransformState &State, Value *AVL) {
467*0fca6ea1SDimitry Andric assert(AVL->getType()->isIntegerTy() &&
468*0fca6ea1SDimitry Andric "Requested vector length should be an integer.");
469*0fca6ea1SDimitry Andric
470*0fca6ea1SDimitry Andric // TODO: Add support for MaxSafeDist for correct loop emission.
471*0fca6ea1SDimitry Andric assert(State.VF.isScalable() && "Expected scalable vector factor.");
472*0fca6ea1SDimitry Andric Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue());
473*0fca6ea1SDimitry Andric
474*0fca6ea1SDimitry Andric Value *EVL = State.Builder.CreateIntrinsic(
475*0fca6ea1SDimitry Andric State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
476*0fca6ea1SDimitry Andric {AVL, VFArg, State.Builder.getTrue()});
477*0fca6ea1SDimitry Andric return EVL;
478*0fca6ea1SDimitry Andric };
479*0fca6ea1SDimitry Andric // TODO: Restructure this code with an explicit remainder loop, vsetvli can
480*0fca6ea1SDimitry Andric // be outside of the main loop.
481*0fca6ea1SDimitry Andric assert(Part == 0 && "No unrolling expected for predicated vectorization.");
482*0fca6ea1SDimitry Andric // Compute VTC - IV as the AVL (requested vector length).
483*0fca6ea1SDimitry Andric Value *Index = State.get(getOperand(0), VPIteration(0, 0));
484*0fca6ea1SDimitry Andric Value *TripCount = State.get(getOperand(1), VPIteration(0, 0));
485*0fca6ea1SDimitry Andric Value *AVL = State.Builder.CreateSub(TripCount, Index);
486*0fca6ea1SDimitry Andric Value *EVL = GetEVL(State, AVL);
487*0fca6ea1SDimitry Andric return EVL;
488*0fca6ea1SDimitry Andric }
4895f757f3fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart: {
490753f127fSDimitry Andric auto *IV = State.get(getOperand(0), VPIteration(0, 0));
49106c3fb27SDimitry Andric if (Part == 0)
49206c3fb27SDimitry Andric return IV;
493753f127fSDimitry Andric
494753f127fSDimitry Andric // The canonical IV is incremented by the vectorization factor (num of SIMD
495753f127fSDimitry Andric // elements) times the unroll part.
496753f127fSDimitry Andric Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
4975f757f3fSDimitry Andric return Builder.CreateAdd(IV, Step, Name, hasNoUnsignedWrap(),
4985f757f3fSDimitry Andric hasNoSignedWrap());
499753f127fSDimitry Andric }
50081ad6265SDimitry Andric case VPInstruction::BranchOnCond: {
50181ad6265SDimitry Andric if (Part != 0)
50206c3fb27SDimitry Andric return nullptr;
50381ad6265SDimitry Andric
50481ad6265SDimitry Andric Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
50581ad6265SDimitry Andric // Replace the temporary unreachable terminator with a new conditional
50681ad6265SDimitry Andric // branch, hooking it up to backward destination for exiting blocks now and
50781ad6265SDimitry Andric // to forward destination(s) later when they are created.
50881ad6265SDimitry Andric BranchInst *CondBr =
50981ad6265SDimitry Andric Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
51081ad6265SDimitry Andric CondBr->setSuccessor(0, nullptr);
51181ad6265SDimitry Andric Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
512*0fca6ea1SDimitry Andric
513*0fca6ea1SDimitry Andric if (!getParent()->isExiting())
514*0fca6ea1SDimitry Andric return CondBr;
515*0fca6ea1SDimitry Andric
516*0fca6ea1SDimitry Andric VPRegionBlock *ParentRegion = getParent()->getParent();
517*0fca6ea1SDimitry Andric VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
518*0fca6ea1SDimitry Andric CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
51906c3fb27SDimitry Andric return CondBr;
52081ad6265SDimitry Andric }
52181ad6265SDimitry Andric case VPInstruction::BranchOnCount: {
52281ad6265SDimitry Andric if (Part != 0)
52306c3fb27SDimitry Andric return nullptr;
52481ad6265SDimitry Andric // First create the compare.
525*0fca6ea1SDimitry Andric Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
526*0fca6ea1SDimitry Andric Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
52781ad6265SDimitry Andric Value *Cond = Builder.CreateICmpEQ(IV, TC);
52881ad6265SDimitry Andric
52981ad6265SDimitry Andric // Now create the branch.
53081ad6265SDimitry Andric auto *Plan = getParent()->getPlan();
53181ad6265SDimitry Andric VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
53281ad6265SDimitry Andric VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
53381ad6265SDimitry Andric
53481ad6265SDimitry Andric // Replace the temporary unreachable terminator with a new conditional
53581ad6265SDimitry Andric // branch, hooking it up to backward destination (the header) now and to the
53681ad6265SDimitry Andric // forward destination (the exit/middle block) later when it is created.
53781ad6265SDimitry Andric // Note that CreateCondBr expects a valid BB as first argument, so we need
53881ad6265SDimitry Andric // to set it to nullptr later.
53981ad6265SDimitry Andric BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
54081ad6265SDimitry Andric State.CFG.VPBB2IRBB[Header]);
54181ad6265SDimitry Andric CondBr->setSuccessor(0, nullptr);
54281ad6265SDimitry Andric Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
54306c3fb27SDimitry Andric return CondBr;
54481ad6265SDimitry Andric }
5451db9f3b2SDimitry Andric case VPInstruction::ComputeReductionResult: {
5461db9f3b2SDimitry Andric if (Part != 0)
547*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true);
5481db9f3b2SDimitry Andric
5491db9f3b2SDimitry Andric // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
5501db9f3b2SDimitry Andric // and will be removed by breaking up the recipe further.
5511db9f3b2SDimitry Andric auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
5521db9f3b2SDimitry Andric auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
5531db9f3b2SDimitry Andric // Get its reduction variable descriptor.
5541db9f3b2SDimitry Andric const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
5551db9f3b2SDimitry Andric
5561db9f3b2SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind();
5571db9f3b2SDimitry Andric
5581db9f3b2SDimitry Andric VPValue *LoopExitingDef = getOperand(1);
5591db9f3b2SDimitry Andric Type *PhiTy = OrigPhi->getType();
5601db9f3b2SDimitry Andric VectorParts RdxParts(State.UF);
5611db9f3b2SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part)
562*0fca6ea1SDimitry Andric RdxParts[Part] = State.get(LoopExitingDef, Part, PhiR->isInLoop());
5631db9f3b2SDimitry Andric
5641db9f3b2SDimitry Andric // If the vector reduction can be performed in a smaller type, we truncate
5651db9f3b2SDimitry Andric // then extend the loop exit value to enable InstCombine to evaluate the
5661db9f3b2SDimitry Andric // entire expression in the smaller type.
5671db9f3b2SDimitry Andric // TODO: Handle this in truncateToMinBW.
5681db9f3b2SDimitry Andric if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
5691db9f3b2SDimitry Andric Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), State.VF);
5701db9f3b2SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part)
5711db9f3b2SDimitry Andric RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
5721db9f3b2SDimitry Andric }
5731db9f3b2SDimitry Andric // Reduce all of the unrolled parts into a single vector.
5741db9f3b2SDimitry Andric Value *ReducedPartRdx = RdxParts[0];
5751db9f3b2SDimitry Andric unsigned Op = RecurrenceDescriptor::getOpcode(RK);
576*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
577*0fca6ea1SDimitry Andric Op = Instruction::Or;
5781db9f3b2SDimitry Andric
5791db9f3b2SDimitry Andric if (PhiR->isOrdered()) {
5801db9f3b2SDimitry Andric ReducedPartRdx = RdxParts[State.UF - 1];
5811db9f3b2SDimitry Andric } else {
5821db9f3b2SDimitry Andric // Floating-point operations should have some FMF to enable the reduction.
5831db9f3b2SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFG(Builder);
5841db9f3b2SDimitry Andric Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
5851db9f3b2SDimitry Andric for (unsigned Part = 1; Part < State.UF; ++Part) {
5861db9f3b2SDimitry Andric Value *RdxPart = RdxParts[Part];
5871db9f3b2SDimitry Andric if (Op != Instruction::ICmp && Op != Instruction::FCmp)
5881db9f3b2SDimitry Andric ReducedPartRdx = Builder.CreateBinOp(
5891db9f3b2SDimitry Andric (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
590*0fca6ea1SDimitry Andric else
5911db9f3b2SDimitry Andric ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
5921db9f3b2SDimitry Andric }
5931db9f3b2SDimitry Andric }
5941db9f3b2SDimitry Andric
5951db9f3b2SDimitry Andric // Create the reduction after the loop. Note that inloop reductions create
5961db9f3b2SDimitry Andric // the target reduction in the loop using a Reduction recipe.
597*0fca6ea1SDimitry Andric if ((State.VF.isVector() ||
598*0fca6ea1SDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
599*0fca6ea1SDimitry Andric !PhiR->isInLoop()) {
6001db9f3b2SDimitry Andric ReducedPartRdx =
6011db9f3b2SDimitry Andric createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi);
6021db9f3b2SDimitry Andric // If the reduction can be performed in a smaller type, we need to extend
6031db9f3b2SDimitry Andric // the reduction to the wider type before we branch to the original loop.
6041db9f3b2SDimitry Andric if (PhiTy != RdxDesc.getRecurrenceType())
6051db9f3b2SDimitry Andric ReducedPartRdx = RdxDesc.isSigned()
6061db9f3b2SDimitry Andric ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
6071db9f3b2SDimitry Andric : Builder.CreateZExt(ReducedPartRdx, PhiTy);
6081db9f3b2SDimitry Andric }
6091db9f3b2SDimitry Andric
6101db9f3b2SDimitry Andric // If there were stores of the reduction value to a uniform memory address
6111db9f3b2SDimitry Andric // inside the loop, create the final store here.
6121db9f3b2SDimitry Andric if (StoreInst *SI = RdxDesc.IntermediateStore) {
6131db9f3b2SDimitry Andric auto *NewSI = Builder.CreateAlignedStore(
6141db9f3b2SDimitry Andric ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
6151db9f3b2SDimitry Andric propagateMetadata(NewSI, SI);
6161db9f3b2SDimitry Andric }
6171db9f3b2SDimitry Andric
6181db9f3b2SDimitry Andric return ReducedPartRdx;
6191db9f3b2SDimitry Andric }
620*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd: {
621*0fca6ea1SDimitry Andric if (Part != 0)
622*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true);
623*0fca6ea1SDimitry Andric
624*0fca6ea1SDimitry Andric auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
625*0fca6ea1SDimitry Andric unsigned Offset = CI->getZExtValue();
626*0fca6ea1SDimitry Andric assert(Offset > 0 && "Offset from end must be positive");
627*0fca6ea1SDimitry Andric Value *Res;
628*0fca6ea1SDimitry Andric if (State.VF.isVector()) {
629*0fca6ea1SDimitry Andric assert(Offset <= State.VF.getKnownMinValue() &&
630*0fca6ea1SDimitry Andric "invalid offset to extract from");
631*0fca6ea1SDimitry Andric // Extract lane VF - Offset from the operand.
632*0fca6ea1SDimitry Andric Res = State.get(
633*0fca6ea1SDimitry Andric getOperand(0),
634*0fca6ea1SDimitry Andric VPIteration(State.UF - 1, VPLane::getLaneFromEnd(State.VF, Offset)));
635*0fca6ea1SDimitry Andric } else {
636*0fca6ea1SDimitry Andric assert(Offset <= State.UF && "invalid offset to extract from");
637*0fca6ea1SDimitry Andric // When loop is unrolled without vectorizing, retrieve UF - Offset.
638*0fca6ea1SDimitry Andric Res = State.get(getOperand(0), State.UF - Offset);
639*0fca6ea1SDimitry Andric }
640*0fca6ea1SDimitry Andric if (isa<ExtractElementInst>(Res))
641*0fca6ea1SDimitry Andric Res->setName(Name);
642*0fca6ea1SDimitry Andric return Res;
643*0fca6ea1SDimitry Andric }
644*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd: {
645*0fca6ea1SDimitry Andric Value *A = State.get(getOperand(0), Part);
646*0fca6ea1SDimitry Andric Value *B = State.get(getOperand(1), Part);
647*0fca6ea1SDimitry Andric return Builder.CreateLogicalAnd(A, B, Name);
648*0fca6ea1SDimitry Andric }
649*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd: {
650*0fca6ea1SDimitry Andric assert(vputils::onlyFirstLaneUsed(this) &&
651*0fca6ea1SDimitry Andric "can only generate first lane for PtrAdd");
652*0fca6ea1SDimitry Andric Value *Ptr = State.get(getOperand(0), Part, /* IsScalar */ true);
653*0fca6ea1SDimitry Andric Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true);
654*0fca6ea1SDimitry Andric return Builder.CreatePtrAdd(Ptr, Addend, Name);
655*0fca6ea1SDimitry Andric }
656*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi: {
657*0fca6ea1SDimitry Andric if (Part != 0)
658*0fca6ea1SDimitry Andric return State.get(this, 0, /*IsScalar*/ true);
659*0fca6ea1SDimitry Andric Value *IncomingFromVPlanPred =
660*0fca6ea1SDimitry Andric State.get(getOperand(0), Part, /* IsScalar */ true);
661*0fca6ea1SDimitry Andric Value *IncomingFromOtherPreds =
662*0fca6ea1SDimitry Andric State.get(getOperand(1), Part, /* IsScalar */ true);
663*0fca6ea1SDimitry Andric auto *NewPhi =
664*0fca6ea1SDimitry Andric Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name);
665*0fca6ea1SDimitry Andric BasicBlock *VPlanPred =
666*0fca6ea1SDimitry Andric State.CFG
667*0fca6ea1SDimitry Andric .VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
668*0fca6ea1SDimitry Andric NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
669*0fca6ea1SDimitry Andric for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
670*0fca6ea1SDimitry Andric assert(OtherPred != VPlanPred &&
671*0fca6ea1SDimitry Andric "VPlan predecessors should not be connected yet");
672*0fca6ea1SDimitry Andric NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);
673*0fca6ea1SDimitry Andric }
674*0fca6ea1SDimitry Andric return NewPhi;
675*0fca6ea1SDimitry Andric }
676*0fca6ea1SDimitry Andric
67781ad6265SDimitry Andric default:
67881ad6265SDimitry Andric llvm_unreachable("Unsupported opcode for instruction");
67981ad6265SDimitry Andric }
68081ad6265SDimitry Andric }
68181ad6265SDimitry Andric
isVectorToScalar() const682*0fca6ea1SDimitry Andric bool VPInstruction::isVectorToScalar() const {
683*0fca6ea1SDimitry Andric return getOpcode() == VPInstruction::ExtractFromEnd ||
684*0fca6ea1SDimitry Andric getOpcode() == VPInstruction::ComputeReductionResult;
685*0fca6ea1SDimitry Andric }
686*0fca6ea1SDimitry Andric
isSingleScalar() const687*0fca6ea1SDimitry Andric bool VPInstruction::isSingleScalar() const {
688*0fca6ea1SDimitry Andric return getOpcode() == VPInstruction::ResumePhi;
689*0fca6ea1SDimitry Andric }
690*0fca6ea1SDimitry Andric
6915f757f3fSDimitry Andric #if !defined(NDEBUG)
isFPMathOp() const6925f757f3fSDimitry Andric bool VPInstruction::isFPMathOp() const {
6935f757f3fSDimitry Andric // Inspired by FPMathOperator::classof. Notable differences are that we don't
6945f757f3fSDimitry Andric // support Call, PHI and Select opcodes here yet.
6955f757f3fSDimitry Andric return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
6965f757f3fSDimitry Andric Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
6975f757f3fSDimitry Andric Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
6985f757f3fSDimitry Andric Opcode == Instruction::FCmp || Opcode == Instruction::Select;
6995f757f3fSDimitry Andric }
7005f757f3fSDimitry Andric #endif
7015f757f3fSDimitry Andric
execute(VPTransformState & State)70281ad6265SDimitry Andric void VPInstruction::execute(VPTransformState &State) {
70381ad6265SDimitry Andric assert(!State.Instance && "VPInstruction executing an Instance");
70481ad6265SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
7055f757f3fSDimitry Andric assert((hasFastMathFlags() == isFPMathOp() ||
7065f757f3fSDimitry Andric getOpcode() == Instruction::Select) &&
7075f757f3fSDimitry Andric "Recipe not a FPMathOp but has fast-math flags?");
7085f757f3fSDimitry Andric if (hasFastMathFlags())
7095f757f3fSDimitry Andric State.Builder.setFastMathFlags(getFastMathFlags());
710*0fca6ea1SDimitry Andric State.setDebugLocFrom(getDebugLoc());
711*0fca6ea1SDimitry Andric bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&
712*0fca6ea1SDimitry Andric (vputils::onlyFirstLaneUsed(this) ||
713*0fca6ea1SDimitry Andric isVectorToScalar() || isSingleScalar());
714*0fca6ea1SDimitry Andric bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
715*0fca6ea1SDimitry Andric bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
71606c3fb27SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
717*0fca6ea1SDimitry Andric if (GeneratesPerAllLanes) {
718*0fca6ea1SDimitry Andric for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
719*0fca6ea1SDimitry Andric Lane != NumLanes; ++Lane) {
720*0fca6ea1SDimitry Andric Value *GeneratedValue = generatePerLane(State, VPIteration(Part, Lane));
721*0fca6ea1SDimitry Andric assert(GeneratedValue && "generatePerLane must produce a value");
722*0fca6ea1SDimitry Andric State.set(this, GeneratedValue, VPIteration(Part, Lane));
723*0fca6ea1SDimitry Andric }
724*0fca6ea1SDimitry Andric continue;
725*0fca6ea1SDimitry Andric }
726*0fca6ea1SDimitry Andric
727*0fca6ea1SDimitry Andric if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
728*0fca6ea1SDimitry Andric Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
729*0fca6ea1SDimitry Andric State.set(this, Part0, Part,
730*0fca6ea1SDimitry Andric /*IsScalar*/ GeneratesPerFirstLaneOnly);
731*0fca6ea1SDimitry Andric continue;
732*0fca6ea1SDimitry Andric }
733*0fca6ea1SDimitry Andric
734*0fca6ea1SDimitry Andric Value *GeneratedValue = generatePerPart(State, Part);
73506c3fb27SDimitry Andric if (!hasResult())
73606c3fb27SDimitry Andric continue;
737*0fca6ea1SDimitry Andric assert(GeneratedValue && "generatePerPart must produce a value");
738*0fca6ea1SDimitry Andric assert((GeneratedValue->getType()->isVectorTy() ==
739*0fca6ea1SDimitry Andric !GeneratesPerFirstLaneOnly ||
740*0fca6ea1SDimitry Andric State.VF.isScalar()) &&
741*0fca6ea1SDimitry Andric "scalar value but not only first lane defined");
742*0fca6ea1SDimitry Andric State.set(this, GeneratedValue, Part,
743*0fca6ea1SDimitry Andric /*IsScalar*/ GeneratesPerFirstLaneOnly);
74406c3fb27SDimitry Andric }
74581ad6265SDimitry Andric }
74681ad6265SDimitry Andric
onlyFirstLaneUsed(const VPValue * Op) const747*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
748*0fca6ea1SDimitry Andric assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
749*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode()))
750*0fca6ea1SDimitry Andric return vputils::onlyFirstLaneUsed(this);
751*0fca6ea1SDimitry Andric
752*0fca6ea1SDimitry Andric switch (getOpcode()) {
753*0fca6ea1SDimitry Andric default:
754*0fca6ea1SDimitry Andric return false;
755*0fca6ea1SDimitry Andric case Instruction::ICmp:
756*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd:
757*0fca6ea1SDimitry Andric // TODO: Cover additional opcodes.
758*0fca6ea1SDimitry Andric return vputils::onlyFirstLaneUsed(this);
759*0fca6ea1SDimitry Andric case VPInstruction::ActiveLaneMask:
760*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength:
761*0fca6ea1SDimitry Andric case VPInstruction::CalculateTripCountMinusVF:
762*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart:
763*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount:
764*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond:
765*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi:
766*0fca6ea1SDimitry Andric return true;
767*0fca6ea1SDimitry Andric };
768*0fca6ea1SDimitry Andric llvm_unreachable("switch should return");
769*0fca6ea1SDimitry Andric }
770*0fca6ea1SDimitry Andric
onlyFirstPartUsed(const VPValue * Op) const771*0fca6ea1SDimitry Andric bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
772*0fca6ea1SDimitry Andric assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
773*0fca6ea1SDimitry Andric if (Instruction::isBinaryOp(getOpcode()))
774*0fca6ea1SDimitry Andric return vputils::onlyFirstPartUsed(this);
775*0fca6ea1SDimitry Andric
776*0fca6ea1SDimitry Andric switch (getOpcode()) {
777*0fca6ea1SDimitry Andric default:
778*0fca6ea1SDimitry Andric return false;
779*0fca6ea1SDimitry Andric case Instruction::ICmp:
780*0fca6ea1SDimitry Andric case Instruction::Select:
781*0fca6ea1SDimitry Andric return vputils::onlyFirstPartUsed(this);
782*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCount:
783*0fca6ea1SDimitry Andric case VPInstruction::BranchOnCond:
784*0fca6ea1SDimitry Andric case VPInstruction::CanonicalIVIncrementForPart:
785*0fca6ea1SDimitry Andric return true;
786*0fca6ea1SDimitry Andric };
787*0fca6ea1SDimitry Andric llvm_unreachable("switch should return");
788*0fca6ea1SDimitry Andric }
789*0fca6ea1SDimitry Andric
79081ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump() const79181ad6265SDimitry Andric void VPInstruction::dump() const {
79281ad6265SDimitry Andric VPSlotTracker SlotTracker(getParent()->getPlan());
79381ad6265SDimitry Andric print(dbgs(), "", SlotTracker);
79481ad6265SDimitry Andric }
79581ad6265SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const79681ad6265SDimitry Andric void VPInstruction::print(raw_ostream &O, const Twine &Indent,
79781ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
79881ad6265SDimitry Andric O << Indent << "EMIT ";
79981ad6265SDimitry Andric
80081ad6265SDimitry Andric if (hasResult()) {
80181ad6265SDimitry Andric printAsOperand(O, SlotTracker);
80281ad6265SDimitry Andric O << " = ";
80381ad6265SDimitry Andric }
80481ad6265SDimitry Andric
80581ad6265SDimitry Andric switch (getOpcode()) {
80681ad6265SDimitry Andric case VPInstruction::Not:
80781ad6265SDimitry Andric O << "not";
80881ad6265SDimitry Andric break;
80981ad6265SDimitry Andric case VPInstruction::SLPLoad:
81081ad6265SDimitry Andric O << "combined load";
81181ad6265SDimitry Andric break;
81281ad6265SDimitry Andric case VPInstruction::SLPStore:
81381ad6265SDimitry Andric O << "combined store";
81481ad6265SDimitry Andric break;
81581ad6265SDimitry Andric case VPInstruction::ActiveLaneMask:
81681ad6265SDimitry Andric O << "active lane mask";
81781ad6265SDimitry Andric break;
818*0fca6ea1SDimitry Andric case VPInstruction::ResumePhi:
819*0fca6ea1SDimitry Andric O << "resume-phi";
820*0fca6ea1SDimitry Andric break;
821*0fca6ea1SDimitry Andric case VPInstruction::ExplicitVectorLength:
822*0fca6ea1SDimitry Andric O << "EXPLICIT-VECTOR-LENGTH";
823*0fca6ea1SDimitry Andric break;
82481ad6265SDimitry Andric case VPInstruction::FirstOrderRecurrenceSplice:
82581ad6265SDimitry Andric O << "first-order splice";
82681ad6265SDimitry Andric break;
82781ad6265SDimitry Andric case VPInstruction::BranchOnCond:
82881ad6265SDimitry Andric O << "branch-on-cond";
82981ad6265SDimitry Andric break;
83006c3fb27SDimitry Andric case VPInstruction::CalculateTripCountMinusVF:
83106c3fb27SDimitry Andric O << "TC > VF ? TC - VF : 0";
83206c3fb27SDimitry Andric break;
833753f127fSDimitry Andric case VPInstruction::CanonicalIVIncrementForPart:
834753f127fSDimitry Andric O << "VF * Part +";
835753f127fSDimitry Andric break;
83681ad6265SDimitry Andric case VPInstruction::BranchOnCount:
83781ad6265SDimitry Andric O << "branch-on-count";
83881ad6265SDimitry Andric break;
839*0fca6ea1SDimitry Andric case VPInstruction::ExtractFromEnd:
840*0fca6ea1SDimitry Andric O << "extract-from-end";
841*0fca6ea1SDimitry Andric break;
8421db9f3b2SDimitry Andric case VPInstruction::ComputeReductionResult:
8431db9f3b2SDimitry Andric O << "compute-reduction-result";
8441db9f3b2SDimitry Andric break;
845*0fca6ea1SDimitry Andric case VPInstruction::LogicalAnd:
846*0fca6ea1SDimitry Andric O << "logical-and";
847*0fca6ea1SDimitry Andric break;
848*0fca6ea1SDimitry Andric case VPInstruction::PtrAdd:
849*0fca6ea1SDimitry Andric O << "ptradd";
850*0fca6ea1SDimitry Andric break;
85181ad6265SDimitry Andric default:
85281ad6265SDimitry Andric O << Instruction::getOpcodeName(getOpcode());
85381ad6265SDimitry Andric }
85481ad6265SDimitry Andric
8555f757f3fSDimitry Andric printFlags(O);
8565f757f3fSDimitry Andric printOperands(O, SlotTracker);
85781ad6265SDimitry Andric
8585f757f3fSDimitry Andric if (auto DL = getDebugLoc()) {
85981ad6265SDimitry Andric O << ", !dbg ";
86081ad6265SDimitry Andric DL.print(O);
86181ad6265SDimitry Andric }
86281ad6265SDimitry Andric }
86381ad6265SDimitry Andric #endif
86481ad6265SDimitry Andric
execute(VPTransformState & State)865bdd1243dSDimitry Andric void VPWidenCallRecipe::execute(VPTransformState &State) {
86606c3fb27SDimitry Andric assert(State.VF.isVector() && "not widening");
867*0fca6ea1SDimitry Andric Function *CalledScalarFn = getCalledScalarFunction();
868*0fca6ea1SDimitry Andric assert(!isDbgInfoIntrinsic(CalledScalarFn->getIntrinsicID()) &&
869bdd1243dSDimitry Andric "DbgInfoIntrinsic should have been dropped during VPlan construction");
8707a6dacacSDimitry Andric State.setDebugLocFrom(getDebugLoc());
871bdd1243dSDimitry Andric
872647cbc5dSDimitry Andric bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
8735f757f3fSDimitry Andric FunctionType *VFTy = nullptr;
8745f757f3fSDimitry Andric if (Variant)
8755f757f3fSDimitry Andric VFTy = Variant->getFunctionType();
876bdd1243dSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
87706c3fb27SDimitry Andric SmallVector<Type *, 2> TysForDecl;
87806c3fb27SDimitry Andric // Add return type if intrinsic is overloaded on it.
879647cbc5dSDimitry Andric if (UseIntrinsic &&
880647cbc5dSDimitry Andric isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1))
881*0fca6ea1SDimitry Andric TysForDecl.push_back(VectorType::get(
882*0fca6ea1SDimitry Andric CalledScalarFn->getReturnType()->getScalarType(), State.VF));
883bdd1243dSDimitry Andric SmallVector<Value *, 4> Args;
884*0fca6ea1SDimitry Andric for (const auto &I : enumerate(arg_operands())) {
885bdd1243dSDimitry Andric // Some intrinsics have a scalar argument - don't replace it with a
886bdd1243dSDimitry Andric // vector.
887bdd1243dSDimitry Andric Value *Arg;
888b3edf446SDimitry Andric if (UseIntrinsic &&
889b3edf446SDimitry Andric isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
890bdd1243dSDimitry Andric Arg = State.get(I.value(), VPIteration(0, 0));
891b3edf446SDimitry Andric // Some vectorized function variants may also take a scalar argument,
892b3edf446SDimitry Andric // e.g. linear parameters for pointers. This needs to be the scalar value
893b3edf446SDimitry Andric // from the start of the respective part when interleaving.
894b3edf446SDimitry Andric else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
895b3edf446SDimitry Andric Arg = State.get(I.value(), VPIteration(Part, 0));
8965f757f3fSDimitry Andric else
8975f757f3fSDimitry Andric Arg = State.get(I.value(), Part);
898647cbc5dSDimitry Andric if (UseIntrinsic &&
899647cbc5dSDimitry Andric isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
900bdd1243dSDimitry Andric TysForDecl.push_back(Arg->getType());
901bdd1243dSDimitry Andric Args.push_back(Arg);
902bdd1243dSDimitry Andric }
903bdd1243dSDimitry Andric
904bdd1243dSDimitry Andric Function *VectorF;
905647cbc5dSDimitry Andric if (UseIntrinsic) {
906bdd1243dSDimitry Andric // Use vector version of the intrinsic.
907bdd1243dSDimitry Andric Module *M = State.Builder.GetInsertBlock()->getModule();
908bdd1243dSDimitry Andric VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
909bdd1243dSDimitry Andric assert(VectorF && "Can't retrieve vector intrinsic.");
910bdd1243dSDimitry Andric } else {
911bdd1243dSDimitry Andric #ifndef NDEBUG
91206c3fb27SDimitry Andric assert(Variant != nullptr && "Can't create vector function.");
913bdd1243dSDimitry Andric #endif
91406c3fb27SDimitry Andric VectorF = Variant;
915bdd1243dSDimitry Andric }
91606c3fb27SDimitry Andric
917*0fca6ea1SDimitry Andric auto *CI = cast_or_null<CallInst>(getUnderlyingInstr());
918bdd1243dSDimitry Andric SmallVector<OperandBundleDef, 1> OpBundles;
919*0fca6ea1SDimitry Andric if (CI)
920*0fca6ea1SDimitry Andric CI->getOperandBundlesAsDefs(OpBundles);
921*0fca6ea1SDimitry Andric
922bdd1243dSDimitry Andric CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
923bdd1243dSDimitry Andric
924bdd1243dSDimitry Andric if (isa<FPMathOperator>(V))
925*0fca6ea1SDimitry Andric V->copyFastMathFlags(CI);
926bdd1243dSDimitry Andric
927*0fca6ea1SDimitry Andric if (!V->getType()->isVoidTy())
928bdd1243dSDimitry Andric State.set(this, V, Part);
929*0fca6ea1SDimitry Andric State.addMetadata(V, CI);
930bdd1243dSDimitry Andric }
931bdd1243dSDimitry Andric }
932bdd1243dSDimitry Andric
93381ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const93481ad6265SDimitry Andric void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
93581ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
93681ad6265SDimitry Andric O << Indent << "WIDEN-CALL ";
93781ad6265SDimitry Andric
938*0fca6ea1SDimitry Andric Function *CalledFn = getCalledScalarFunction();
939*0fca6ea1SDimitry Andric if (CalledFn->getReturnType()->isVoidTy())
94081ad6265SDimitry Andric O << "void ";
94181ad6265SDimitry Andric else {
94281ad6265SDimitry Andric printAsOperand(O, SlotTracker);
94381ad6265SDimitry Andric O << " = ";
94481ad6265SDimitry Andric }
94581ad6265SDimitry Andric
946*0fca6ea1SDimitry Andric O << "call @" << CalledFn->getName() << "(";
947*0fca6ea1SDimitry Andric interleaveComma(arg_operands(), O, [&O, &SlotTracker](VPValue *Op) {
948*0fca6ea1SDimitry Andric Op->printAsOperand(O, SlotTracker);
949*0fca6ea1SDimitry Andric });
95081ad6265SDimitry Andric O << ")";
951bdd1243dSDimitry Andric
952bdd1243dSDimitry Andric if (VectorIntrinsicID)
953bdd1243dSDimitry Andric O << " (using vector intrinsic)";
95406c3fb27SDimitry Andric else {
95506c3fb27SDimitry Andric O << " (using library function";
95606c3fb27SDimitry Andric if (Variant->hasName())
95706c3fb27SDimitry Andric O << ": " << Variant->getName();
95806c3fb27SDimitry Andric O << ")";
95906c3fb27SDimitry Andric }
96081ad6265SDimitry Andric }
96181ad6265SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const96281ad6265SDimitry Andric void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
96381ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
96481ad6265SDimitry Andric O << Indent << "WIDEN-SELECT ";
96581ad6265SDimitry Andric printAsOperand(O, SlotTracker);
96681ad6265SDimitry Andric O << " = select ";
96781ad6265SDimitry Andric getOperand(0)->printAsOperand(O, SlotTracker);
96881ad6265SDimitry Andric O << ", ";
96981ad6265SDimitry Andric getOperand(1)->printAsOperand(O, SlotTracker);
97081ad6265SDimitry Andric O << ", ";
97181ad6265SDimitry Andric getOperand(2)->printAsOperand(O, SlotTracker);
97206c3fb27SDimitry Andric O << (isInvariantCond() ? " (condition is loop invariant)" : "");
97381ad6265SDimitry Andric }
974753f127fSDimitry Andric #endif
97581ad6265SDimitry Andric
execute(VPTransformState & State)976753f127fSDimitry Andric void VPWidenSelectRecipe::execute(VPTransformState &State) {
9775f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc());
978753f127fSDimitry Andric
979753f127fSDimitry Andric // The condition can be loop invariant but still defined inside the
980753f127fSDimitry Andric // loop. This means that we can't just use the original 'cond' value.
981753f127fSDimitry Andric // We have to take the 'vectorized' value and pick the first lane.
982753f127fSDimitry Andric // Instcombine will make this a no-op.
983753f127fSDimitry Andric auto *InvarCond =
98406c3fb27SDimitry Andric isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
985753f127fSDimitry Andric
986753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
98706c3fb27SDimitry Andric Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
988753f127fSDimitry Andric Value *Op0 = State.get(getOperand(1), Part);
989753f127fSDimitry Andric Value *Op1 = State.get(getOperand(2), Part);
990753f127fSDimitry Andric Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
991753f127fSDimitry Andric State.set(this, Sel, Part);
9925f757f3fSDimitry Andric State.addMetadata(Sel, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
993753f127fSDimitry Andric }
994753f127fSDimitry Andric }
995753f127fSDimitry Andric
FastMathFlagsTy(const FastMathFlags & FMF)9965f757f3fSDimitry Andric VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
9975f757f3fSDimitry Andric const FastMathFlags &FMF) {
9985f757f3fSDimitry Andric AllowReassoc = FMF.allowReassoc();
9995f757f3fSDimitry Andric NoNaNs = FMF.noNaNs();
10005f757f3fSDimitry Andric NoInfs = FMF.noInfs();
10015f757f3fSDimitry Andric NoSignedZeros = FMF.noSignedZeros();
10025f757f3fSDimitry Andric AllowReciprocal = FMF.allowReciprocal();
10035f757f3fSDimitry Andric AllowContract = FMF.allowContract();
10045f757f3fSDimitry Andric ApproxFunc = FMF.approxFunc();
10055f757f3fSDimitry Andric }
10065f757f3fSDimitry Andric
100706c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
printFlags(raw_ostream & O) const100806c3fb27SDimitry Andric void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
100906c3fb27SDimitry Andric switch (OpType) {
10105f757f3fSDimitry Andric case OperationType::Cmp:
10115f757f3fSDimitry Andric O << " " << CmpInst::getPredicateName(getPredicate());
10125f757f3fSDimitry Andric break;
10135f757f3fSDimitry Andric case OperationType::DisjointOp:
10145f757f3fSDimitry Andric if (DisjointFlags.IsDisjoint)
10155f757f3fSDimitry Andric O << " disjoint";
10165f757f3fSDimitry Andric break;
101706c3fb27SDimitry Andric case OperationType::PossiblyExactOp:
101806c3fb27SDimitry Andric if (ExactFlags.IsExact)
101906c3fb27SDimitry Andric O << " exact";
102006c3fb27SDimitry Andric break;
102106c3fb27SDimitry Andric case OperationType::OverflowingBinOp:
102206c3fb27SDimitry Andric if (WrapFlags.HasNUW)
102306c3fb27SDimitry Andric O << " nuw";
102406c3fb27SDimitry Andric if (WrapFlags.HasNSW)
102506c3fb27SDimitry Andric O << " nsw";
102606c3fb27SDimitry Andric break;
102706c3fb27SDimitry Andric case OperationType::FPMathOp:
102806c3fb27SDimitry Andric getFastMathFlags().print(O);
102906c3fb27SDimitry Andric break;
103006c3fb27SDimitry Andric case OperationType::GEPOp:
103106c3fb27SDimitry Andric if (GEPFlags.IsInBounds)
103206c3fb27SDimitry Andric O << " inbounds";
103306c3fb27SDimitry Andric break;
10345f757f3fSDimitry Andric case OperationType::NonNegOp:
10355f757f3fSDimitry Andric if (NonNegFlags.NonNeg)
10365f757f3fSDimitry Andric O << " nneg";
10375f757f3fSDimitry Andric break;
103806c3fb27SDimitry Andric case OperationType::Other:
103906c3fb27SDimitry Andric break;
104006c3fb27SDimitry Andric }
10415f757f3fSDimitry Andric if (getNumOperands() > 0)
104206c3fb27SDimitry Andric O << " ";
104306c3fb27SDimitry Andric }
104406c3fb27SDimitry Andric #endif
104506c3fb27SDimitry Andric
execute(VPTransformState & State)1046753f127fSDimitry Andric void VPWidenRecipe::execute(VPTransformState &State) {
10475f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc());
1048753f127fSDimitry Andric auto &Builder = State.Builder;
10495f757f3fSDimitry Andric switch (Opcode) {
1050753f127fSDimitry Andric case Instruction::Call:
1051753f127fSDimitry Andric case Instruction::Br:
1052753f127fSDimitry Andric case Instruction::PHI:
1053753f127fSDimitry Andric case Instruction::GetElementPtr:
1054753f127fSDimitry Andric case Instruction::Select:
1055753f127fSDimitry Andric llvm_unreachable("This instruction is handled by a different recipe.");
1056753f127fSDimitry Andric case Instruction::UDiv:
1057753f127fSDimitry Andric case Instruction::SDiv:
1058753f127fSDimitry Andric case Instruction::SRem:
1059753f127fSDimitry Andric case Instruction::URem:
1060753f127fSDimitry Andric case Instruction::Add:
1061753f127fSDimitry Andric case Instruction::FAdd:
1062753f127fSDimitry Andric case Instruction::Sub:
1063753f127fSDimitry Andric case Instruction::FSub:
1064753f127fSDimitry Andric case Instruction::FNeg:
1065753f127fSDimitry Andric case Instruction::Mul:
1066753f127fSDimitry Andric case Instruction::FMul:
1067753f127fSDimitry Andric case Instruction::FDiv:
1068753f127fSDimitry Andric case Instruction::FRem:
1069753f127fSDimitry Andric case Instruction::Shl:
1070753f127fSDimitry Andric case Instruction::LShr:
1071753f127fSDimitry Andric case Instruction::AShr:
1072753f127fSDimitry Andric case Instruction::And:
1073753f127fSDimitry Andric case Instruction::Or:
1074753f127fSDimitry Andric case Instruction::Xor: {
1075753f127fSDimitry Andric // Just widen unops and binops.
1076753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1077753f127fSDimitry Andric SmallVector<Value *, 2> Ops;
1078753f127fSDimitry Andric for (VPValue *VPOp : operands())
1079753f127fSDimitry Andric Ops.push_back(State.get(VPOp, Part));
1080753f127fSDimitry Andric
10815f757f3fSDimitry Andric Value *V = Builder.CreateNAryOp(Opcode, Ops);
1082753f127fSDimitry Andric
108306c3fb27SDimitry Andric if (auto *VecOp = dyn_cast<Instruction>(V))
108406c3fb27SDimitry Andric setFlags(VecOp);
1085753f127fSDimitry Andric
1086753f127fSDimitry Andric // Use this vector value for all users of the original instruction.
1087753f127fSDimitry Andric State.set(this, V, Part);
10885f757f3fSDimitry Andric State.addMetadata(V, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1089753f127fSDimitry Andric }
1090753f127fSDimitry Andric
1091753f127fSDimitry Andric break;
1092753f127fSDimitry Andric }
1093753f127fSDimitry Andric case Instruction::Freeze: {
1094753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1095753f127fSDimitry Andric Value *Op = State.get(getOperand(0), Part);
1096753f127fSDimitry Andric
1097753f127fSDimitry Andric Value *Freeze = Builder.CreateFreeze(Op);
1098753f127fSDimitry Andric State.set(this, Freeze, Part);
1099753f127fSDimitry Andric }
1100753f127fSDimitry Andric break;
1101753f127fSDimitry Andric }
1102753f127fSDimitry Andric case Instruction::ICmp:
1103753f127fSDimitry Andric case Instruction::FCmp: {
1104753f127fSDimitry Andric // Widen compares. Generate vector compares.
11055f757f3fSDimitry Andric bool FCmp = Opcode == Instruction::FCmp;
1106753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1107753f127fSDimitry Andric Value *A = State.get(getOperand(0), Part);
1108753f127fSDimitry Andric Value *B = State.get(getOperand(1), Part);
1109753f127fSDimitry Andric Value *C = nullptr;
1110753f127fSDimitry Andric if (FCmp) {
1111753f127fSDimitry Andric // Propagate fast math flags.
1112753f127fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(Builder);
11135f757f3fSDimitry Andric if (auto *I = dyn_cast_or_null<Instruction>(getUnderlyingValue()))
11145f757f3fSDimitry Andric Builder.setFastMathFlags(I->getFastMathFlags());
11155f757f3fSDimitry Andric C = Builder.CreateFCmp(getPredicate(), A, B);
1116753f127fSDimitry Andric } else {
11175f757f3fSDimitry Andric C = Builder.CreateICmp(getPredicate(), A, B);
1118753f127fSDimitry Andric }
1119753f127fSDimitry Andric State.set(this, C, Part);
11205f757f3fSDimitry Andric State.addMetadata(C, dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1121753f127fSDimitry Andric }
1122753f127fSDimitry Andric
1123753f127fSDimitry Andric break;
1124753f127fSDimitry Andric }
1125753f127fSDimitry Andric default:
1126753f127fSDimitry Andric // This instruction is not vectorized by simple widening.
11275f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
11285f757f3fSDimitry Andric << Instruction::getOpcodeName(Opcode));
1129753f127fSDimitry Andric llvm_unreachable("Unhandled instruction!");
1130753f127fSDimitry Andric } // end of switch.
11315f757f3fSDimitry Andric
11325f757f3fSDimitry Andric #if !defined(NDEBUG)
11335f757f3fSDimitry Andric // Verify that VPlan type inference results agree with the type of the
11345f757f3fSDimitry Andric // generated values.
11355f757f3fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
11365f757f3fSDimitry Andric assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
11375f757f3fSDimitry Andric State.VF) == State.get(this, Part)->getType() &&
11385f757f3fSDimitry Andric "inferred type and type from generated instructions do not match");
1139753f127fSDimitry Andric }
11405f757f3fSDimitry Andric #endif
11415f757f3fSDimitry Andric }
11425f757f3fSDimitry Andric
1143753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const114481ad6265SDimitry Andric void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
114581ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
114681ad6265SDimitry Andric O << Indent << "WIDEN ";
114781ad6265SDimitry Andric printAsOperand(O, SlotTracker);
11485f757f3fSDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode);
114906c3fb27SDimitry Andric printFlags(O);
115081ad6265SDimitry Andric printOperands(O, SlotTracker);
115181ad6265SDimitry Andric }
115206c3fb27SDimitry Andric #endif
115306c3fb27SDimitry Andric
execute(VPTransformState & State)115406c3fb27SDimitry Andric void VPWidenCastRecipe::execute(VPTransformState &State) {
11555f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc());
115606c3fb27SDimitry Andric auto &Builder = State.Builder;
115706c3fb27SDimitry Andric /// Vectorize casts.
115806c3fb27SDimitry Andric assert(State.VF.isVector() && "Not vectorizing?");
115906c3fb27SDimitry Andric Type *DestTy = VectorType::get(getResultType(), State.VF);
11605f757f3fSDimitry Andric VPValue *Op = getOperand(0);
116106c3fb27SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
11625f757f3fSDimitry Andric if (Part > 0 && Op->isLiveIn()) {
11635f757f3fSDimitry Andric // FIXME: Remove once explicit unrolling is implemented using VPlan.
11645f757f3fSDimitry Andric State.set(this, State.get(this, 0), Part);
11655f757f3fSDimitry Andric continue;
11665f757f3fSDimitry Andric }
11675f757f3fSDimitry Andric Value *A = State.get(Op, Part);
116806c3fb27SDimitry Andric Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
116906c3fb27SDimitry Andric State.set(this, Cast, Part);
11705f757f3fSDimitry Andric State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
117106c3fb27SDimitry Andric }
117206c3fb27SDimitry Andric }
117306c3fb27SDimitry Andric
117406c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const117506c3fb27SDimitry Andric void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
117606c3fb27SDimitry Andric VPSlotTracker &SlotTracker) const {
117706c3fb27SDimitry Andric O << Indent << "WIDEN-CAST ";
117806c3fb27SDimitry Andric printAsOperand(O, SlotTracker);
117906c3fb27SDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode) << " ";
11805f757f3fSDimitry Andric printFlags(O);
118106c3fb27SDimitry Andric printOperands(O, SlotTracker);
118206c3fb27SDimitry Andric O << " to " << *getResultType();
118306c3fb27SDimitry Andric }
11845f757f3fSDimitry Andric #endif
118581ad6265SDimitry Andric
11865f757f3fSDimitry Andric /// This function adds
11875f757f3fSDimitry Andric /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
11885f757f3fSDimitry Andric /// to each vector element of Val. The sequence starts at StartIndex.
11895f757f3fSDimitry Andric /// \p Opcode is relevant for FP induction variable.
getStepVector(Value * Val,Value * StartIdx,Value * Step,Instruction::BinaryOps BinOp,ElementCount VF,IRBuilderBase & Builder)11905f757f3fSDimitry Andric static Value *getStepVector(Value *Val, Value *StartIdx, Value *Step,
11915f757f3fSDimitry Andric Instruction::BinaryOps BinOp, ElementCount VF,
11925f757f3fSDimitry Andric IRBuilderBase &Builder) {
11935f757f3fSDimitry Andric assert(VF.isVector() && "only vector VFs are supported");
11945f757f3fSDimitry Andric
11955f757f3fSDimitry Andric // Create and check the types.
11965f757f3fSDimitry Andric auto *ValVTy = cast<VectorType>(Val->getType());
11975f757f3fSDimitry Andric ElementCount VLen = ValVTy->getElementCount();
11985f757f3fSDimitry Andric
11995f757f3fSDimitry Andric Type *STy = Val->getType()->getScalarType();
12005f757f3fSDimitry Andric assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
12015f757f3fSDimitry Andric "Induction Step must be an integer or FP");
12025f757f3fSDimitry Andric assert(Step->getType() == STy && "Step has wrong type");
12035f757f3fSDimitry Andric
12045f757f3fSDimitry Andric SmallVector<Constant *, 8> Indices;
12055f757f3fSDimitry Andric
12065f757f3fSDimitry Andric // Create a vector of consecutive numbers from zero to VF.
12075f757f3fSDimitry Andric VectorType *InitVecValVTy = ValVTy;
12085f757f3fSDimitry Andric if (STy->isFloatingPointTy()) {
12095f757f3fSDimitry Andric Type *InitVecValSTy =
12105f757f3fSDimitry Andric IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
12115f757f3fSDimitry Andric InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
12125f757f3fSDimitry Andric }
12135f757f3fSDimitry Andric Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
12145f757f3fSDimitry Andric
12155f757f3fSDimitry Andric // Splat the StartIdx
12165f757f3fSDimitry Andric Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
12175f757f3fSDimitry Andric
12185f757f3fSDimitry Andric if (STy->isIntegerTy()) {
12195f757f3fSDimitry Andric InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
12205f757f3fSDimitry Andric Step = Builder.CreateVectorSplat(VLen, Step);
12215f757f3fSDimitry Andric assert(Step->getType() == Val->getType() && "Invalid step vec");
12225f757f3fSDimitry Andric // FIXME: The newly created binary instructions should contain nsw/nuw
12235f757f3fSDimitry Andric // flags, which can be found from the original scalar operations.
12245f757f3fSDimitry Andric Step = Builder.CreateMul(InitVec, Step);
12255f757f3fSDimitry Andric return Builder.CreateAdd(Val, Step, "induction");
12265f757f3fSDimitry Andric }
12275f757f3fSDimitry Andric
12285f757f3fSDimitry Andric // Floating point induction.
12295f757f3fSDimitry Andric assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
12305f757f3fSDimitry Andric "Binary Opcode should be specified for FP induction");
12315f757f3fSDimitry Andric InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
12325f757f3fSDimitry Andric InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
12335f757f3fSDimitry Andric
12345f757f3fSDimitry Andric Step = Builder.CreateVectorSplat(VLen, Step);
12355f757f3fSDimitry Andric Value *MulOp = Builder.CreateFMul(InitVec, Step);
12365f757f3fSDimitry Andric return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
12375f757f3fSDimitry Andric }
12385f757f3fSDimitry Andric
12395f757f3fSDimitry Andric /// A helper function that returns an integer or floating-point constant with
12405f757f3fSDimitry Andric /// value C.
getSignedIntOrFpConstant(Type * Ty,int64_t C)12415f757f3fSDimitry Andric static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
12425f757f3fSDimitry Andric return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
12435f757f3fSDimitry Andric : ConstantFP::get(Ty, C);
12445f757f3fSDimitry Andric }
12455f757f3fSDimitry Andric
getRuntimeVFAsFloat(IRBuilderBase & B,Type * FTy,ElementCount VF)12465f757f3fSDimitry Andric static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
12475f757f3fSDimitry Andric ElementCount VF) {
12485f757f3fSDimitry Andric assert(FTy->isFloatingPointTy() && "Expected floating point type!");
12495f757f3fSDimitry Andric Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
12505f757f3fSDimitry Andric Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
12515f757f3fSDimitry Andric return B.CreateUIToFP(RuntimeVF, FTy);
12525f757f3fSDimitry Andric }
12535f757f3fSDimitry Andric
execute(VPTransformState & State)12545f757f3fSDimitry Andric void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
12555f757f3fSDimitry Andric assert(!State.Instance && "Int or FP induction being replicated.");
12565f757f3fSDimitry Andric
12575f757f3fSDimitry Andric Value *Start = getStartValue()->getLiveInIRValue();
12585f757f3fSDimitry Andric const InductionDescriptor &ID = getInductionDescriptor();
12595f757f3fSDimitry Andric TruncInst *Trunc = getTruncInst();
12605f757f3fSDimitry Andric IRBuilderBase &Builder = State.Builder;
12615f757f3fSDimitry Andric assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
12625f757f3fSDimitry Andric assert(State.VF.isVector() && "must have vector VF");
12635f757f3fSDimitry Andric
12645f757f3fSDimitry Andric // The value from the original loop to which we are mapping the new induction
12655f757f3fSDimitry Andric // variable.
12665f757f3fSDimitry Andric Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
12675f757f3fSDimitry Andric
12685f757f3fSDimitry Andric // Fast-math-flags propagate from the original induction instruction.
12695f757f3fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(Builder);
12705f757f3fSDimitry Andric if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
12715f757f3fSDimitry Andric Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
12725f757f3fSDimitry Andric
12735f757f3fSDimitry Andric // Now do the actual transformations, and start with fetching the step value.
12745f757f3fSDimitry Andric Value *Step = State.get(getStepValue(), VPIteration(0, 0));
12755f757f3fSDimitry Andric
12765f757f3fSDimitry Andric assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
12775f757f3fSDimitry Andric "Expected either an induction phi-node or a truncate of it!");
12785f757f3fSDimitry Andric
12795f757f3fSDimitry Andric // Construct the initial value of the vector IV in the vector loop preheader
12805f757f3fSDimitry Andric auto CurrIP = Builder.saveIP();
12815f757f3fSDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
12825f757f3fSDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator());
12835f757f3fSDimitry Andric if (isa<TruncInst>(EntryVal)) {
12845f757f3fSDimitry Andric assert(Start->getType()->isIntegerTy() &&
12855f757f3fSDimitry Andric "Truncation requires an integer type");
12865f757f3fSDimitry Andric auto *TruncType = cast<IntegerType>(EntryVal->getType());
12875f757f3fSDimitry Andric Step = Builder.CreateTrunc(Step, TruncType);
12885f757f3fSDimitry Andric Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
12895f757f3fSDimitry Andric }
12905f757f3fSDimitry Andric
12915f757f3fSDimitry Andric Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
12925f757f3fSDimitry Andric Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
12935f757f3fSDimitry Andric Value *SteppedStart = getStepVector(
12945f757f3fSDimitry Andric SplatStart, Zero, Step, ID.getInductionOpcode(), State.VF, State.Builder);
12955f757f3fSDimitry Andric
12965f757f3fSDimitry Andric // We create vector phi nodes for both integer and floating-point induction
12975f757f3fSDimitry Andric // variables. Here, we determine the kind of arithmetic we will perform.
12985f757f3fSDimitry Andric Instruction::BinaryOps AddOp;
12995f757f3fSDimitry Andric Instruction::BinaryOps MulOp;
13005f757f3fSDimitry Andric if (Step->getType()->isIntegerTy()) {
13015f757f3fSDimitry Andric AddOp = Instruction::Add;
13025f757f3fSDimitry Andric MulOp = Instruction::Mul;
13035f757f3fSDimitry Andric } else {
13045f757f3fSDimitry Andric AddOp = ID.getInductionOpcode();
13055f757f3fSDimitry Andric MulOp = Instruction::FMul;
13065f757f3fSDimitry Andric }
13075f757f3fSDimitry Andric
13085f757f3fSDimitry Andric // Multiply the vectorization factor by the step using integer or
13095f757f3fSDimitry Andric // floating-point arithmetic as appropriate.
13105f757f3fSDimitry Andric Type *StepType = Step->getType();
13115f757f3fSDimitry Andric Value *RuntimeVF;
13125f757f3fSDimitry Andric if (Step->getType()->isFloatingPointTy())
13135f757f3fSDimitry Andric RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, State.VF);
13145f757f3fSDimitry Andric else
13155f757f3fSDimitry Andric RuntimeVF = getRuntimeVF(Builder, StepType, State.VF);
13165f757f3fSDimitry Andric Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
13175f757f3fSDimitry Andric
13185f757f3fSDimitry Andric // Create a vector splat to use in the induction update.
13195f757f3fSDimitry Andric //
13205f757f3fSDimitry Andric // FIXME: If the step is non-constant, we create the vector splat with
13215f757f3fSDimitry Andric // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
13225f757f3fSDimitry Andric // handle a constant vector splat.
13235f757f3fSDimitry Andric Value *SplatVF = isa<Constant>(Mul)
13245f757f3fSDimitry Andric ? ConstantVector::getSplat(State.VF, cast<Constant>(Mul))
13255f757f3fSDimitry Andric : Builder.CreateVectorSplat(State.VF, Mul);
13265f757f3fSDimitry Andric Builder.restoreIP(CurrIP);
13275f757f3fSDimitry Andric
13285f757f3fSDimitry Andric // We may need to add the step a number of times, depending on the unroll
13295f757f3fSDimitry Andric // factor. The last of those goes into the PHI.
13305f757f3fSDimitry Andric PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind");
13315f757f3fSDimitry Andric VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
13325f757f3fSDimitry Andric VecInd->setDebugLoc(EntryVal->getDebugLoc());
13335f757f3fSDimitry Andric Instruction *LastInduction = VecInd;
13345f757f3fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
13355f757f3fSDimitry Andric State.set(this, LastInduction, Part);
13365f757f3fSDimitry Andric
13375f757f3fSDimitry Andric if (isa<TruncInst>(EntryVal))
13385f757f3fSDimitry Andric State.addMetadata(LastInduction, EntryVal);
13395f757f3fSDimitry Andric
13405f757f3fSDimitry Andric LastInduction = cast<Instruction>(
13415f757f3fSDimitry Andric Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
13425f757f3fSDimitry Andric LastInduction->setDebugLoc(EntryVal->getDebugLoc());
13435f757f3fSDimitry Andric }
13445f757f3fSDimitry Andric
13455f757f3fSDimitry Andric LastInduction->setName("vec.ind.next");
13465f757f3fSDimitry Andric VecInd->addIncoming(SteppedStart, VectorPH);
13475f757f3fSDimitry Andric // Add induction update using an incorrect block temporarily. The phi node
13485f757f3fSDimitry Andric // will be fixed after VPlan execution. Note that at this point the latch
13495f757f3fSDimitry Andric // block cannot be used, as it does not exist yet.
13505f757f3fSDimitry Andric // TODO: Model increment value in VPlan, by turning the recipe into a
13515f757f3fSDimitry Andric // multi-def and a subclass of VPHeaderPHIRecipe.
13525f757f3fSDimitry Andric VecInd->addIncoming(LastInduction, VectorPH);
13535f757f3fSDimitry Andric }
13545f757f3fSDimitry Andric
13555f757f3fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const135681ad6265SDimitry Andric void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
135781ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
135881ad6265SDimitry Andric O << Indent << "WIDEN-INDUCTION";
135981ad6265SDimitry Andric if (getTruncInst()) {
136081ad6265SDimitry Andric O << "\\l\"";
136181ad6265SDimitry Andric O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
136281ad6265SDimitry Andric O << " +\n" << Indent << "\" ";
136381ad6265SDimitry Andric getVPValue(0)->printAsOperand(O, SlotTracker);
136481ad6265SDimitry Andric } else
136581ad6265SDimitry Andric O << " " << VPlanIngredient(IV);
136681ad6265SDimitry Andric
136781ad6265SDimitry Andric O << ", ";
136881ad6265SDimitry Andric getStepValue()->printAsOperand(O, SlotTracker);
136981ad6265SDimitry Andric }
137081ad6265SDimitry Andric #endif
137181ad6265SDimitry Andric
isCanonical() const137281ad6265SDimitry Andric bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
137306c3fb27SDimitry Andric // The step may be defined by a recipe in the preheader (e.g. if it requires
137406c3fb27SDimitry Andric // SCEV expansion), but for the canonical induction the step is required to be
137506c3fb27SDimitry Andric // 1, which is represented as live-in.
137606c3fb27SDimitry Andric if (getStepValue()->getDefiningRecipe())
137706c3fb27SDimitry Andric return false;
137806c3fb27SDimitry Andric auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
137981ad6265SDimitry Andric auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
1380*0fca6ea1SDimitry Andric auto *CanIV = cast<VPCanonicalIVPHIRecipe>(&*getParent()->begin());
1381*0fca6ea1SDimitry Andric return StartC && StartC->isZero() && StepC && StepC->isOne() &&
1382*0fca6ea1SDimitry Andric getScalarType() == CanIV->getScalarType();
138381ad6265SDimitry Andric }
138481ad6265SDimitry Andric
1385bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const1386bdd1243dSDimitry Andric void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1387bdd1243dSDimitry Andric VPSlotTracker &SlotTracker) const {
1388bdd1243dSDimitry Andric O << Indent;
1389bdd1243dSDimitry Andric printAsOperand(O, SlotTracker);
1390bdd1243dSDimitry Andric O << Indent << "= DERIVED-IV ";
1391bdd1243dSDimitry Andric getStartValue()->printAsOperand(O, SlotTracker);
1392bdd1243dSDimitry Andric O << " + ";
1393*0fca6ea1SDimitry Andric getOperand(1)->printAsOperand(O, SlotTracker);
1394bdd1243dSDimitry Andric O << " * ";
1395bdd1243dSDimitry Andric getStepValue()->printAsOperand(O, SlotTracker);
139681ad6265SDimitry Andric }
1397bdd1243dSDimitry Andric #endif
139881ad6265SDimitry Andric
execute(VPTransformState & State)13995f757f3fSDimitry Andric void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
14005f757f3fSDimitry Andric // Fast-math-flags propagate from the original induction instruction.
14015f757f3fSDimitry Andric IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
14025f757f3fSDimitry Andric if (hasFastMathFlags())
14035f757f3fSDimitry Andric State.Builder.setFastMathFlags(getFastMathFlags());
14045f757f3fSDimitry Andric
14055f757f3fSDimitry Andric /// Compute scalar induction steps. \p ScalarIV is the scalar induction
14065f757f3fSDimitry Andric /// variable on which to base the steps, \p Step is the size of the step.
14075f757f3fSDimitry Andric
14085f757f3fSDimitry Andric Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
14095f757f3fSDimitry Andric Value *Step = State.get(getStepValue(), VPIteration(0, 0));
14105f757f3fSDimitry Andric IRBuilderBase &Builder = State.Builder;
14115f757f3fSDimitry Andric
14125f757f3fSDimitry Andric // Ensure step has the same type as that of scalar IV.
14135f757f3fSDimitry Andric Type *BaseIVTy = BaseIV->getType()->getScalarType();
1414*0fca6ea1SDimitry Andric assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
14155f757f3fSDimitry Andric
14165f757f3fSDimitry Andric // We build scalar steps for both integer and floating-point induction
14175f757f3fSDimitry Andric // variables. Here, we determine the kind of arithmetic we will perform.
14185f757f3fSDimitry Andric Instruction::BinaryOps AddOp;
14195f757f3fSDimitry Andric Instruction::BinaryOps MulOp;
14205f757f3fSDimitry Andric if (BaseIVTy->isIntegerTy()) {
14215f757f3fSDimitry Andric AddOp = Instruction::Add;
14225f757f3fSDimitry Andric MulOp = Instruction::Mul;
14235f757f3fSDimitry Andric } else {
14245f757f3fSDimitry Andric AddOp = InductionOpcode;
14255f757f3fSDimitry Andric MulOp = Instruction::FMul;
14265f757f3fSDimitry Andric }
14275f757f3fSDimitry Andric
14285f757f3fSDimitry Andric // Determine the number of scalars we need to generate for each unroll
14295f757f3fSDimitry Andric // iteration.
14305f757f3fSDimitry Andric bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
14315f757f3fSDimitry Andric // Compute the scalar steps and save the results in State.
14325f757f3fSDimitry Andric Type *IntStepTy =
14335f757f3fSDimitry Andric IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
14345f757f3fSDimitry Andric Type *VecIVTy = nullptr;
14355f757f3fSDimitry Andric Value *UnitStepVec = nullptr, *SplatStep = nullptr, *SplatIV = nullptr;
14365f757f3fSDimitry Andric if (!FirstLaneOnly && State.VF.isScalable()) {
14375f757f3fSDimitry Andric VecIVTy = VectorType::get(BaseIVTy, State.VF);
14385f757f3fSDimitry Andric UnitStepVec =
14395f757f3fSDimitry Andric Builder.CreateStepVector(VectorType::get(IntStepTy, State.VF));
14405f757f3fSDimitry Andric SplatStep = Builder.CreateVectorSplat(State.VF, Step);
14415f757f3fSDimitry Andric SplatIV = Builder.CreateVectorSplat(State.VF, BaseIV);
14425f757f3fSDimitry Andric }
14435f757f3fSDimitry Andric
14445f757f3fSDimitry Andric unsigned StartPart = 0;
14455f757f3fSDimitry Andric unsigned EndPart = State.UF;
14465f757f3fSDimitry Andric unsigned StartLane = 0;
14475f757f3fSDimitry Andric unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
14485f757f3fSDimitry Andric if (State.Instance) {
14495f757f3fSDimitry Andric StartPart = State.Instance->Part;
14505f757f3fSDimitry Andric EndPart = StartPart + 1;
14515f757f3fSDimitry Andric StartLane = State.Instance->Lane.getKnownLane();
14525f757f3fSDimitry Andric EndLane = StartLane + 1;
14535f757f3fSDimitry Andric }
14545f757f3fSDimitry Andric for (unsigned Part = StartPart; Part < EndPart; ++Part) {
14555f757f3fSDimitry Andric Value *StartIdx0 = createStepForVF(Builder, IntStepTy, State.VF, Part);
14565f757f3fSDimitry Andric
14575f757f3fSDimitry Andric if (!FirstLaneOnly && State.VF.isScalable()) {
14585f757f3fSDimitry Andric auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);
14595f757f3fSDimitry Andric auto *InitVec = Builder.CreateAdd(SplatStartIdx, UnitStepVec);
14605f757f3fSDimitry Andric if (BaseIVTy->isFloatingPointTy())
14615f757f3fSDimitry Andric InitVec = Builder.CreateSIToFP(InitVec, VecIVTy);
14625f757f3fSDimitry Andric auto *Mul = Builder.CreateBinOp(MulOp, InitVec, SplatStep);
14635f757f3fSDimitry Andric auto *Add = Builder.CreateBinOp(AddOp, SplatIV, Mul);
14645f757f3fSDimitry Andric State.set(this, Add, Part);
14655f757f3fSDimitry Andric // It's useful to record the lane values too for the known minimum number
14665f757f3fSDimitry Andric // of elements so we do those below. This improves the code quality when
14675f757f3fSDimitry Andric // trying to extract the first element, for example.
14685f757f3fSDimitry Andric }
14695f757f3fSDimitry Andric
14705f757f3fSDimitry Andric if (BaseIVTy->isFloatingPointTy())
14715f757f3fSDimitry Andric StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
14725f757f3fSDimitry Andric
14735f757f3fSDimitry Andric for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
14745f757f3fSDimitry Andric Value *StartIdx = Builder.CreateBinOp(
14755f757f3fSDimitry Andric AddOp, StartIdx0, getSignedIntOrFpConstant(BaseIVTy, Lane));
14765f757f3fSDimitry Andric // The step returned by `createStepForVF` is a runtime-evaluated value
14775f757f3fSDimitry Andric // when VF is scalable. Otherwise, it should be folded into a Constant.
14785f757f3fSDimitry Andric assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&
14795f757f3fSDimitry Andric "Expected StartIdx to be folded to a constant when VF is not "
14805f757f3fSDimitry Andric "scalable");
14815f757f3fSDimitry Andric auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
14825f757f3fSDimitry Andric auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
14835f757f3fSDimitry Andric State.set(this, Add, VPIteration(Part, Lane));
14845f757f3fSDimitry Andric }
14855f757f3fSDimitry Andric }
14865f757f3fSDimitry Andric }
14875f757f3fSDimitry Andric
148881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const148981ad6265SDimitry Andric void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
149081ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
149181ad6265SDimitry Andric O << Indent;
149281ad6265SDimitry Andric printAsOperand(O, SlotTracker);
14935f757f3fSDimitry Andric O << " = SCALAR-STEPS ";
149481ad6265SDimitry Andric printOperands(O, SlotTracker);
149581ad6265SDimitry Andric }
1496753f127fSDimitry Andric #endif
149781ad6265SDimitry Andric
execute(VPTransformState & State)1498753f127fSDimitry Andric void VPWidenGEPRecipe::execute(VPTransformState &State) {
149906c3fb27SDimitry Andric assert(State.VF.isVector() && "not widening");
1500753f127fSDimitry Andric auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
1501753f127fSDimitry Andric // Construct a vector GEP by widening the operands of the scalar GEP as
1502753f127fSDimitry Andric // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1503753f127fSDimitry Andric // results in a vector of pointers when at least one operand of the GEP
1504753f127fSDimitry Andric // is vector-typed. Thus, to keep the representation compact, we only use
1505753f127fSDimitry Andric // vector-typed operands for loop-varying values.
1506753f127fSDimitry Andric
150706c3fb27SDimitry Andric if (areAllOperandsInvariant()) {
1508753f127fSDimitry Andric // If we are vectorizing, but the GEP has only loop-invariant operands,
1509753f127fSDimitry Andric // the GEP we build (by only using vector-typed operands for
1510753f127fSDimitry Andric // loop-varying values) would be a scalar pointer. Thus, to ensure we
1511753f127fSDimitry Andric // produce a vector of pointers, we need to either arbitrarily pick an
1512753f127fSDimitry Andric // operand to broadcast, or broadcast a clone of the original GEP.
1513753f127fSDimitry Andric // Here, we broadcast a clone of the original.
1514753f127fSDimitry Andric //
1515753f127fSDimitry Andric // TODO: If at some point we decide to scalarize instructions having
1516753f127fSDimitry Andric // loop-invariant operands, this special case will no longer be
1517753f127fSDimitry Andric // required. We would add the scalarization decision to
1518753f127fSDimitry Andric // collectLoopScalars() and teach getVectorValue() to broadcast
1519753f127fSDimitry Andric // the lane-zero scalar value.
152006c3fb27SDimitry Andric SmallVector<Value *> Ops;
152106c3fb27SDimitry Andric for (unsigned I = 0, E = getNumOperands(); I != E; I++)
152206c3fb27SDimitry Andric Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
152306c3fb27SDimitry Andric
152406c3fb27SDimitry Andric auto *NewGEP =
152506c3fb27SDimitry Andric State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
152606c3fb27SDimitry Andric ArrayRef(Ops).drop_front(), "", isInBounds());
1527753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
152806c3fb27SDimitry Andric Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
1529753f127fSDimitry Andric State.set(this, EntryPart, Part);
1530753f127fSDimitry Andric State.addMetadata(EntryPart, GEP);
1531753f127fSDimitry Andric }
1532753f127fSDimitry Andric } else {
1533753f127fSDimitry Andric // If the GEP has at least one loop-varying operand, we are sure to
1534753f127fSDimitry Andric // produce a vector of pointers. But if we are only unrolling, we want
1535753f127fSDimitry Andric // to produce a scalar GEP for each unroll part. Thus, the GEP we
1536753f127fSDimitry Andric // produce with the code below will be scalar (if VF == 1) or vector
1537753f127fSDimitry Andric // (otherwise). Note that for the unroll-only case, we still maintain
1538753f127fSDimitry Andric // values in the vector mapping with initVector, as we do for other
1539753f127fSDimitry Andric // instructions.
1540753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1541753f127fSDimitry Andric // The pointer operand of the new GEP. If it's loop-invariant, we
1542753f127fSDimitry Andric // won't broadcast it.
154306c3fb27SDimitry Andric auto *Ptr = isPointerLoopInvariant()
1544753f127fSDimitry Andric ? State.get(getOperand(0), VPIteration(0, 0))
1545753f127fSDimitry Andric : State.get(getOperand(0), Part);
1546753f127fSDimitry Andric
1547753f127fSDimitry Andric // Collect all the indices for the new GEP. If any index is
1548753f127fSDimitry Andric // loop-invariant, we won't broadcast it.
1549753f127fSDimitry Andric SmallVector<Value *, 4> Indices;
1550753f127fSDimitry Andric for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
1551753f127fSDimitry Andric VPValue *Operand = getOperand(I);
155206c3fb27SDimitry Andric if (isIndexLoopInvariant(I - 1))
1553753f127fSDimitry Andric Indices.push_back(State.get(Operand, VPIteration(0, 0)));
1554753f127fSDimitry Andric else
1555753f127fSDimitry Andric Indices.push_back(State.get(Operand, Part));
1556753f127fSDimitry Andric }
1557753f127fSDimitry Andric
1558753f127fSDimitry Andric // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1559753f127fSDimitry Andric // but it should be a vector, otherwise.
1560753f127fSDimitry Andric auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
156106c3fb27SDimitry Andric Indices, "", isInBounds());
1562753f127fSDimitry Andric assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
1563753f127fSDimitry Andric "NewGEP is not a pointer vector");
1564753f127fSDimitry Andric State.set(this, NewGEP, Part);
1565753f127fSDimitry Andric State.addMetadata(NewGEP, GEP);
1566753f127fSDimitry Andric }
1567753f127fSDimitry Andric }
1568753f127fSDimitry Andric }
1569753f127fSDimitry Andric
1570753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const157181ad6265SDimitry Andric void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
157281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
157381ad6265SDimitry Andric O << Indent << "WIDEN-GEP ";
157406c3fb27SDimitry Andric O << (isPointerLoopInvariant() ? "Inv" : "Var");
157506c3fb27SDimitry Andric for (size_t I = 0; I < getNumOperands() - 1; ++I)
157606c3fb27SDimitry Andric O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
157781ad6265SDimitry Andric
157881ad6265SDimitry Andric O << " ";
157981ad6265SDimitry Andric printAsOperand(O, SlotTracker);
158081ad6265SDimitry Andric O << " = getelementptr";
158106c3fb27SDimitry Andric printFlags(O);
158281ad6265SDimitry Andric printOperands(O, SlotTracker);
158381ad6265SDimitry Andric }
1584753f127fSDimitry Andric #endif
158581ad6265SDimitry Andric
execute(VPTransformState & State)1586647cbc5dSDimitry Andric void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1587647cbc5dSDimitry Andric auto &Builder = State.Builder;
1588647cbc5dSDimitry Andric State.setDebugLocFrom(getDebugLoc());
1589647cbc5dSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1590647cbc5dSDimitry Andric // Calculate the pointer for the specific unroll-part.
1591647cbc5dSDimitry Andric Value *PartPtr = nullptr;
1592647cbc5dSDimitry Andric // Use i32 for the gep index type when the value is constant,
1593647cbc5dSDimitry Andric // or query DataLayout for a more suitable index type otherwise.
1594647cbc5dSDimitry Andric const DataLayout &DL =
1595*0fca6ea1SDimitry Andric Builder.GetInsertBlock()->getDataLayout();
1596647cbc5dSDimitry Andric Type *IndexTy = State.VF.isScalable() && (IsReverse || Part > 0)
1597647cbc5dSDimitry Andric ? DL.getIndexType(IndexedTy->getPointerTo())
1598647cbc5dSDimitry Andric : Builder.getInt32Ty();
1599647cbc5dSDimitry Andric Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
16001db9f3b2SDimitry Andric bool InBounds = isInBounds();
1601647cbc5dSDimitry Andric if (IsReverse) {
1602647cbc5dSDimitry Andric // If the address is consecutive but reversed, then the
1603647cbc5dSDimitry Andric // wide store needs to start at the last vector element.
1604647cbc5dSDimitry Andric // RunTimeVF = VScale * VF.getKnownMinValue()
1605647cbc5dSDimitry Andric // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1606647cbc5dSDimitry Andric Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1607647cbc5dSDimitry Andric // NumElt = -Part * RunTimeVF
1608647cbc5dSDimitry Andric Value *NumElt = Builder.CreateMul(
1609647cbc5dSDimitry Andric ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
1610647cbc5dSDimitry Andric // LastLane = 1 - RunTimeVF
1611647cbc5dSDimitry Andric Value *LastLane =
1612647cbc5dSDimitry Andric Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1613647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1614647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, PartPtr, LastLane, "", InBounds);
1615647cbc5dSDimitry Andric } else {
1616647cbc5dSDimitry Andric Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
1617647cbc5dSDimitry Andric PartPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1618647cbc5dSDimitry Andric }
1619647cbc5dSDimitry Andric
1620*0fca6ea1SDimitry Andric State.set(this, PartPtr, Part, /*IsScalar*/ true);
1621647cbc5dSDimitry Andric }
1622647cbc5dSDimitry Andric }
1623647cbc5dSDimitry Andric
1624647cbc5dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const1625647cbc5dSDimitry Andric void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1626647cbc5dSDimitry Andric VPSlotTracker &SlotTracker) const {
1627647cbc5dSDimitry Andric O << Indent;
1628647cbc5dSDimitry Andric printAsOperand(O, SlotTracker);
1629647cbc5dSDimitry Andric O << " = vector-pointer ";
1630647cbc5dSDimitry Andric if (IsReverse)
1631647cbc5dSDimitry Andric O << "(reverse) ";
1632647cbc5dSDimitry Andric
1633647cbc5dSDimitry Andric printOperands(O, SlotTracker);
1634647cbc5dSDimitry Andric }
1635647cbc5dSDimitry Andric #endif
1636647cbc5dSDimitry Andric
execute(VPTransformState & State)1637753f127fSDimitry Andric void VPBlendRecipe::execute(VPTransformState &State) {
16385f757f3fSDimitry Andric State.setDebugLocFrom(getDebugLoc());
1639753f127fSDimitry Andric // We know that all PHIs in non-header blocks are converted into
1640753f127fSDimitry Andric // selects, so we don't have to worry about the insertion order and we
1641753f127fSDimitry Andric // can just use the builder.
1642753f127fSDimitry Andric // At this point we generate the predication tree. There may be
1643753f127fSDimitry Andric // duplications since this is a simple recursive scan, but future
1644753f127fSDimitry Andric // optimizations will clean it up.
1645753f127fSDimitry Andric
1646753f127fSDimitry Andric unsigned NumIncoming = getNumIncomingValues();
1647753f127fSDimitry Andric
1648753f127fSDimitry Andric // Generate a sequence of selects of the form:
1649753f127fSDimitry Andric // SELECT(Mask3, In3,
1650753f127fSDimitry Andric // SELECT(Mask2, In2,
1651753f127fSDimitry Andric // SELECT(Mask1, In1,
1652753f127fSDimitry Andric // In0)))
1653753f127fSDimitry Andric // Note that Mask0 is never used: lanes for which no path reaches this phi and
1654753f127fSDimitry Andric // are essentially undef are taken from In0.
1655753f127fSDimitry Andric VectorParts Entry(State.UF);
1656*0fca6ea1SDimitry Andric bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
1657753f127fSDimitry Andric for (unsigned In = 0; In < NumIncoming; ++In) {
1658753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1659753f127fSDimitry Andric // We might have single edge PHIs (blocks) - use an identity
1660753f127fSDimitry Andric // 'select' for the first PHI operand.
1661*0fca6ea1SDimitry Andric Value *In0 = State.get(getIncomingValue(In), Part, OnlyFirstLaneUsed);
1662753f127fSDimitry Andric if (In == 0)
1663753f127fSDimitry Andric Entry[Part] = In0; // Initialize with the first incoming value.
1664753f127fSDimitry Andric else {
1665753f127fSDimitry Andric // Select between the current value and the previous incoming edge
1666753f127fSDimitry Andric // based on the incoming mask.
1667*0fca6ea1SDimitry Andric Value *Cond = State.get(getMask(In), Part, OnlyFirstLaneUsed);
1668753f127fSDimitry Andric Entry[Part] =
1669753f127fSDimitry Andric State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
1670753f127fSDimitry Andric }
1671753f127fSDimitry Andric }
1672753f127fSDimitry Andric }
1673753f127fSDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part)
1674*0fca6ea1SDimitry Andric State.set(this, Entry[Part], Part, OnlyFirstLaneUsed);
1675753f127fSDimitry Andric }
1676753f127fSDimitry Andric
1677753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const167881ad6265SDimitry Andric void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
167981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
168081ad6265SDimitry Andric O << Indent << "BLEND ";
16815f757f3fSDimitry Andric printAsOperand(O, SlotTracker);
168281ad6265SDimitry Andric O << " =";
168381ad6265SDimitry Andric if (getNumIncomingValues() == 1) {
168481ad6265SDimitry Andric // Not a User of any mask: not really blending, this is a
168581ad6265SDimitry Andric // single-predecessor phi.
168681ad6265SDimitry Andric O << " ";
168781ad6265SDimitry Andric getIncomingValue(0)->printAsOperand(O, SlotTracker);
168881ad6265SDimitry Andric } else {
168981ad6265SDimitry Andric for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
169081ad6265SDimitry Andric O << " ";
169181ad6265SDimitry Andric getIncomingValue(I)->printAsOperand(O, SlotTracker);
1692*0fca6ea1SDimitry Andric if (I == 0)
1693*0fca6ea1SDimitry Andric continue;
169481ad6265SDimitry Andric O << "/";
169581ad6265SDimitry Andric getMask(I)->printAsOperand(O, SlotTracker);
169681ad6265SDimitry Andric }
169781ad6265SDimitry Andric }
169881ad6265SDimitry Andric }
1699*0fca6ea1SDimitry Andric #endif
170081ad6265SDimitry Andric
execute(VPTransformState & State)1701*0fca6ea1SDimitry Andric void VPReductionRecipe::execute(VPTransformState &State) {
1702*0fca6ea1SDimitry Andric assert(!State.Instance && "Reduction being replicated.");
1703*0fca6ea1SDimitry Andric Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true);
1704*0fca6ea1SDimitry Andric RecurKind Kind = RdxDesc.getRecurrenceKind();
1705*0fca6ea1SDimitry Andric // Propagate the fast-math flags carried by the underlying instruction.
1706*0fca6ea1SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1707*0fca6ea1SDimitry Andric State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1708*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
1709*0fca6ea1SDimitry Andric Value *NewVecOp = State.get(getVecOp(), Part);
1710*0fca6ea1SDimitry Andric if (VPValue *Cond = getCondOp()) {
1711*0fca6ea1SDimitry Andric Value *NewCond = State.get(Cond, Part, State.VF.isScalar());
1712*0fca6ea1SDimitry Andric VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
1713*0fca6ea1SDimitry Andric Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1714*0fca6ea1SDimitry Andric Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy,
1715*0fca6ea1SDimitry Andric RdxDesc.getFastMathFlags());
1716*0fca6ea1SDimitry Andric if (State.VF.isVector()) {
1717*0fca6ea1SDimitry Andric Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
1718*0fca6ea1SDimitry Andric }
1719*0fca6ea1SDimitry Andric
1720*0fca6ea1SDimitry Andric Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden);
1721*0fca6ea1SDimitry Andric NewVecOp = Select;
1722*0fca6ea1SDimitry Andric }
1723*0fca6ea1SDimitry Andric Value *NewRed;
1724*0fca6ea1SDimitry Andric Value *NextInChain;
1725*0fca6ea1SDimitry Andric if (IsOrdered) {
1726*0fca6ea1SDimitry Andric if (State.VF.isVector())
1727*0fca6ea1SDimitry Andric NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp,
1728*0fca6ea1SDimitry Andric PrevInChain);
1729*0fca6ea1SDimitry Andric else
1730*0fca6ea1SDimitry Andric NewRed = State.Builder.CreateBinOp(
1731*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain,
1732*0fca6ea1SDimitry Andric NewVecOp);
1733*0fca6ea1SDimitry Andric PrevInChain = NewRed;
1734*0fca6ea1SDimitry Andric } else {
1735*0fca6ea1SDimitry Andric PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true);
1736*0fca6ea1SDimitry Andric NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp);
1737*0fca6ea1SDimitry Andric }
1738*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1739*0fca6ea1SDimitry Andric NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
1740*0fca6ea1SDimitry Andric NewRed, PrevInChain);
1741*0fca6ea1SDimitry Andric } else if (IsOrdered)
1742*0fca6ea1SDimitry Andric NextInChain = NewRed;
1743*0fca6ea1SDimitry Andric else
1744*0fca6ea1SDimitry Andric NextInChain = State.Builder.CreateBinOp(
1745*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain);
1746*0fca6ea1SDimitry Andric State.set(this, NextInChain, Part, /*IsScalar*/ true);
1747*0fca6ea1SDimitry Andric }
1748*0fca6ea1SDimitry Andric }
1749*0fca6ea1SDimitry Andric
execute(VPTransformState & State)1750*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::execute(VPTransformState &State) {
1751*0fca6ea1SDimitry Andric assert(!State.Instance && "Reduction being replicated.");
1752*0fca6ea1SDimitry Andric assert(State.UF == 1 &&
1753*0fca6ea1SDimitry Andric "Expected only UF == 1 when vectorizing with explicit vector length.");
1754*0fca6ea1SDimitry Andric
1755*0fca6ea1SDimitry Andric auto &Builder = State.Builder;
1756*0fca6ea1SDimitry Andric // Propagate the fast-math flags carried by the underlying instruction.
1757*0fca6ea1SDimitry Andric IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
1758*0fca6ea1SDimitry Andric const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
1759*0fca6ea1SDimitry Andric Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1760*0fca6ea1SDimitry Andric
1761*0fca6ea1SDimitry Andric RecurKind Kind = RdxDesc.getRecurrenceKind();
1762*0fca6ea1SDimitry Andric Value *Prev = State.get(getChainOp(), 0, /*IsScalar*/ true);
1763*0fca6ea1SDimitry Andric Value *VecOp = State.get(getVecOp(), 0);
1764*0fca6ea1SDimitry Andric Value *EVL = State.get(getEVL(), VPIteration(0, 0));
1765*0fca6ea1SDimitry Andric
1766*0fca6ea1SDimitry Andric VectorBuilder VBuilder(Builder);
1767*0fca6ea1SDimitry Andric VBuilder.setEVL(EVL);
1768*0fca6ea1SDimitry Andric Value *Mask;
1769*0fca6ea1SDimitry Andric // TODO: move the all-true mask generation into VectorBuilder.
1770*0fca6ea1SDimitry Andric if (VPValue *CondOp = getCondOp())
1771*0fca6ea1SDimitry Andric Mask = State.get(CondOp, 0);
1772*0fca6ea1SDimitry Andric else
1773*0fca6ea1SDimitry Andric Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
1774*0fca6ea1SDimitry Andric VBuilder.setMask(Mask);
1775*0fca6ea1SDimitry Andric
1776*0fca6ea1SDimitry Andric Value *NewRed;
1777*0fca6ea1SDimitry Andric if (isOrdered()) {
1778*0fca6ea1SDimitry Andric NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
1779*0fca6ea1SDimitry Andric } else {
1780*0fca6ea1SDimitry Andric NewRed = createSimpleTargetReduction(VBuilder, VecOp, RdxDesc);
1781*0fca6ea1SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
1782*0fca6ea1SDimitry Andric NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
1783*0fca6ea1SDimitry Andric else
1784*0fca6ea1SDimitry Andric NewRed = Builder.CreateBinOp(
1785*0fca6ea1SDimitry Andric (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, Prev);
1786*0fca6ea1SDimitry Andric }
1787*0fca6ea1SDimitry Andric State.set(this, NewRed, 0, /*IsScalar*/ true);
1788*0fca6ea1SDimitry Andric }
1789*0fca6ea1SDimitry Andric
1790*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const179181ad6265SDimitry Andric void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
179281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
179381ad6265SDimitry Andric O << Indent << "REDUCE ";
179481ad6265SDimitry Andric printAsOperand(O, SlotTracker);
179581ad6265SDimitry Andric O << " = ";
179681ad6265SDimitry Andric getChainOp()->printAsOperand(O, SlotTracker);
179781ad6265SDimitry Andric O << " +";
179881ad6265SDimitry Andric if (isa<FPMathOperator>(getUnderlyingInstr()))
179981ad6265SDimitry Andric O << getUnderlyingInstr()->getFastMathFlags();
18005f757f3fSDimitry Andric O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
180181ad6265SDimitry Andric getVecOp()->printAsOperand(O, SlotTracker);
1802*0fca6ea1SDimitry Andric if (isConditional()) {
1803*0fca6ea1SDimitry Andric O << ", ";
1804*0fca6ea1SDimitry Andric getCondOp()->printAsOperand(O, SlotTracker);
1805*0fca6ea1SDimitry Andric }
1806*0fca6ea1SDimitry Andric O << ")";
1807*0fca6ea1SDimitry Andric if (RdxDesc.IntermediateStore)
1808*0fca6ea1SDimitry Andric O << " (with final reduction value stored in invariant address sank "
1809*0fca6ea1SDimitry Andric "outside of loop)";
1810*0fca6ea1SDimitry Andric }
1811*0fca6ea1SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const1812*0fca6ea1SDimitry Andric void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1813*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
1814*0fca6ea1SDimitry Andric const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
1815*0fca6ea1SDimitry Andric O << Indent << "REDUCE ";
1816*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker);
1817*0fca6ea1SDimitry Andric O << " = ";
1818*0fca6ea1SDimitry Andric getChainOp()->printAsOperand(O, SlotTracker);
1819*0fca6ea1SDimitry Andric O << " +";
1820*0fca6ea1SDimitry Andric if (isa<FPMathOperator>(getUnderlyingInstr()))
1821*0fca6ea1SDimitry Andric O << getUnderlyingInstr()->getFastMathFlags();
1822*0fca6ea1SDimitry Andric O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
1823*0fca6ea1SDimitry Andric getVecOp()->printAsOperand(O, SlotTracker);
1824*0fca6ea1SDimitry Andric O << ", ";
1825*0fca6ea1SDimitry Andric getEVL()->printAsOperand(O, SlotTracker);
1826*0fca6ea1SDimitry Andric if (isConditional()) {
182781ad6265SDimitry Andric O << ", ";
182881ad6265SDimitry Andric getCondOp()->printAsOperand(O, SlotTracker);
182981ad6265SDimitry Andric }
183081ad6265SDimitry Andric O << ")";
18315f757f3fSDimitry Andric if (RdxDesc.IntermediateStore)
183281ad6265SDimitry Andric O << " (with final reduction value stored in invariant address sank "
183381ad6265SDimitry Andric "outside of loop)";
183481ad6265SDimitry Andric }
183506c3fb27SDimitry Andric #endif
183681ad6265SDimitry Andric
shouldPack() const183706c3fb27SDimitry Andric bool VPReplicateRecipe::shouldPack() const {
183806c3fb27SDimitry Andric // Find if the recipe is used by a widened recipe via an intervening
183906c3fb27SDimitry Andric // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
184006c3fb27SDimitry Andric return any_of(users(), [](const VPUser *U) {
184106c3fb27SDimitry Andric if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
184206c3fb27SDimitry Andric return any_of(PredR->users(), [PredR](const VPUser *U) {
184306c3fb27SDimitry Andric return !U->usesScalars(PredR);
184406c3fb27SDimitry Andric });
184506c3fb27SDimitry Andric return false;
184606c3fb27SDimitry Andric });
184706c3fb27SDimitry Andric }
184806c3fb27SDimitry Andric
184906c3fb27SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const185081ad6265SDimitry Andric void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
185181ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
185281ad6265SDimitry Andric O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
185381ad6265SDimitry Andric
185481ad6265SDimitry Andric if (!getUnderlyingInstr()->getType()->isVoidTy()) {
185581ad6265SDimitry Andric printAsOperand(O, SlotTracker);
185681ad6265SDimitry Andric O << " = ";
185781ad6265SDimitry Andric }
185881ad6265SDimitry Andric if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
185906c3fb27SDimitry Andric O << "call";
186006c3fb27SDimitry Andric printFlags(O);
186106c3fb27SDimitry Andric O << "@" << CB->getCalledFunction()->getName() << "(";
186281ad6265SDimitry Andric interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
186381ad6265SDimitry Andric O, [&O, &SlotTracker](VPValue *Op) {
186481ad6265SDimitry Andric Op->printAsOperand(O, SlotTracker);
186581ad6265SDimitry Andric });
186681ad6265SDimitry Andric O << ")";
186781ad6265SDimitry Andric } else {
186806c3fb27SDimitry Andric O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
186906c3fb27SDimitry Andric printFlags(O);
187081ad6265SDimitry Andric printOperands(O, SlotTracker);
187181ad6265SDimitry Andric }
187281ad6265SDimitry Andric
187306c3fb27SDimitry Andric if (shouldPack())
187481ad6265SDimitry Andric O << " (S->V)";
187581ad6265SDimitry Andric }
1876753f127fSDimitry Andric #endif
187781ad6265SDimitry Andric
1878*0fca6ea1SDimitry Andric /// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1879*0fca6ea1SDimitry Andric /// if it is either defined outside the vector region or its operand is known to
1880*0fca6ea1SDimitry Andric /// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1881*0fca6ea1SDimitry Andric /// TODO: Uniformity should be associated with a VPValue and there should be a
1882*0fca6ea1SDimitry Andric /// generic way to check.
isUniformAcrossVFsAndUFs(VPScalarCastRecipe * C)1883*0fca6ea1SDimitry Andric static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1884*0fca6ea1SDimitry Andric return C->isDefinedOutsideVectorRegions() ||
1885*0fca6ea1SDimitry Andric isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1886*0fca6ea1SDimitry Andric isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1887*0fca6ea1SDimitry Andric }
1888*0fca6ea1SDimitry Andric
generate(VPTransformState & State,unsigned Part)1889*0fca6ea1SDimitry Andric Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1890*0fca6ea1SDimitry Andric assert(vputils::onlyFirstLaneUsed(this) &&
1891*0fca6ea1SDimitry Andric "Codegen only implemented for first lane.");
1892*0fca6ea1SDimitry Andric switch (Opcode) {
1893*0fca6ea1SDimitry Andric case Instruction::SExt:
1894*0fca6ea1SDimitry Andric case Instruction::ZExt:
1895*0fca6ea1SDimitry Andric case Instruction::Trunc: {
1896*0fca6ea1SDimitry Andric // Note: SExt/ZExt not used yet.
1897*0fca6ea1SDimitry Andric Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1898*0fca6ea1SDimitry Andric return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1899*0fca6ea1SDimitry Andric }
1900*0fca6ea1SDimitry Andric default:
1901*0fca6ea1SDimitry Andric llvm_unreachable("opcode not implemented yet");
1902*0fca6ea1SDimitry Andric }
1903*0fca6ea1SDimitry Andric }
1904*0fca6ea1SDimitry Andric
execute(VPTransformState & State)1905*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::execute(VPTransformState &State) {
1906*0fca6ea1SDimitry Andric bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1907*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part != State.UF; ++Part) {
1908*0fca6ea1SDimitry Andric Value *Res;
1909*0fca6ea1SDimitry Andric // Only generate a single instance, if the recipe is uniform across UFs and
1910*0fca6ea1SDimitry Andric // VFs.
1911*0fca6ea1SDimitry Andric if (Part > 0 && IsUniformAcrossVFsAndUFs)
1912*0fca6ea1SDimitry Andric Res = State.get(this, VPIteration(0, 0));
1913*0fca6ea1SDimitry Andric else
1914*0fca6ea1SDimitry Andric Res = generate(State, Part);
1915*0fca6ea1SDimitry Andric State.set(this, Res, VPIteration(Part, 0));
1916*0fca6ea1SDimitry Andric }
1917*0fca6ea1SDimitry Andric }
1918*0fca6ea1SDimitry Andric
1919*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const1920*0fca6ea1SDimitry Andric void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1921*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
1922*0fca6ea1SDimitry Andric O << Indent << "SCALAR-CAST ";
1923*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker);
1924*0fca6ea1SDimitry Andric O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1925*0fca6ea1SDimitry Andric printOperands(O, SlotTracker);
1926*0fca6ea1SDimitry Andric O << " to " << *ResultTy;
1927*0fca6ea1SDimitry Andric }
1928*0fca6ea1SDimitry Andric #endif
1929*0fca6ea1SDimitry Andric
execute(VPTransformState & State)1930753f127fSDimitry Andric void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1931753f127fSDimitry Andric assert(State.Instance && "Branch on Mask works only on single instance.");
1932753f127fSDimitry Andric
1933753f127fSDimitry Andric unsigned Part = State.Instance->Part;
1934753f127fSDimitry Andric unsigned Lane = State.Instance->Lane.getKnownLane();
1935753f127fSDimitry Andric
1936753f127fSDimitry Andric Value *ConditionBit = nullptr;
1937753f127fSDimitry Andric VPValue *BlockInMask = getMask();
1938753f127fSDimitry Andric if (BlockInMask) {
1939753f127fSDimitry Andric ConditionBit = State.get(BlockInMask, Part);
1940753f127fSDimitry Andric if (ConditionBit->getType()->isVectorTy())
1941753f127fSDimitry Andric ConditionBit = State.Builder.CreateExtractElement(
1942753f127fSDimitry Andric ConditionBit, State.Builder.getInt32(Lane));
1943753f127fSDimitry Andric } else // Block in mask is all-one.
1944753f127fSDimitry Andric ConditionBit = State.Builder.getTrue();
1945753f127fSDimitry Andric
1946753f127fSDimitry Andric // Replace the temporary unreachable terminator with a new conditional branch,
1947753f127fSDimitry Andric // whose two destinations will be set later when they are created.
1948753f127fSDimitry Andric auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1949753f127fSDimitry Andric assert(isa<UnreachableInst>(CurrentTerminator) &&
1950753f127fSDimitry Andric "Expected to replace unreachable terminator with conditional branch.");
1951753f127fSDimitry Andric auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
1952753f127fSDimitry Andric CondBr->setSuccessor(0, nullptr);
1953753f127fSDimitry Andric ReplaceInstWithInst(CurrentTerminator, CondBr);
1954753f127fSDimitry Andric }
1955753f127fSDimitry Andric
execute(VPTransformState & State)1956fcaf7f86SDimitry Andric void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1957fcaf7f86SDimitry Andric assert(State.Instance && "Predicated instruction PHI works per instance.");
1958fcaf7f86SDimitry Andric Instruction *ScalarPredInst =
1959fcaf7f86SDimitry Andric cast<Instruction>(State.get(getOperand(0), *State.Instance));
1960fcaf7f86SDimitry Andric BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1961fcaf7f86SDimitry Andric BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1962fcaf7f86SDimitry Andric assert(PredicatingBB && "Predicated block has no single predecessor.");
1963fcaf7f86SDimitry Andric assert(isa<VPReplicateRecipe>(getOperand(0)) &&
1964fcaf7f86SDimitry Andric "operand must be VPReplicateRecipe");
1965fcaf7f86SDimitry Andric
1966fcaf7f86SDimitry Andric // By current pack/unpack logic we need to generate only a single phi node: if
1967fcaf7f86SDimitry Andric // a vector value for the predicated instruction exists at this point it means
1968fcaf7f86SDimitry Andric // the instruction has vector users only, and a phi for the vector value is
1969fcaf7f86SDimitry Andric // needed. In this case the recipe of the predicated instruction is marked to
1970fcaf7f86SDimitry Andric // also do that packing, thereby "hoisting" the insert-element sequence.
1971fcaf7f86SDimitry Andric // Otherwise, a phi node for the scalar value is needed.
1972fcaf7f86SDimitry Andric unsigned Part = State.Instance->Part;
1973fcaf7f86SDimitry Andric if (State.hasVectorValue(getOperand(0), Part)) {
1974fcaf7f86SDimitry Andric Value *VectorValue = State.get(getOperand(0), Part);
1975fcaf7f86SDimitry Andric InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
1976fcaf7f86SDimitry Andric PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
1977fcaf7f86SDimitry Andric VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
1978fcaf7f86SDimitry Andric VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
1979fcaf7f86SDimitry Andric if (State.hasVectorValue(this, Part))
1980fcaf7f86SDimitry Andric State.reset(this, VPhi, Part);
1981fcaf7f86SDimitry Andric else
1982fcaf7f86SDimitry Andric State.set(this, VPhi, Part);
1983fcaf7f86SDimitry Andric // NOTE: Currently we need to update the value of the operand, so the next
1984fcaf7f86SDimitry Andric // predicated iteration inserts its generated value in the correct vector.
1985fcaf7f86SDimitry Andric State.reset(getOperand(0), VPhi, Part);
1986fcaf7f86SDimitry Andric } else {
1987fcaf7f86SDimitry Andric Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
1988fcaf7f86SDimitry Andric PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
1989fcaf7f86SDimitry Andric Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1990fcaf7f86SDimitry Andric PredicatingBB);
1991fcaf7f86SDimitry Andric Phi->addIncoming(ScalarPredInst, PredicatedBB);
1992fcaf7f86SDimitry Andric if (State.hasScalarValue(this, *State.Instance))
1993fcaf7f86SDimitry Andric State.reset(this, Phi, *State.Instance);
1994fcaf7f86SDimitry Andric else
1995fcaf7f86SDimitry Andric State.set(this, Phi, *State.Instance);
1996fcaf7f86SDimitry Andric // NOTE: Currently we need to update the value of the operand, so the next
1997fcaf7f86SDimitry Andric // predicated iteration inserts its generated value in the correct vector.
1998fcaf7f86SDimitry Andric State.reset(getOperand(0), Phi, *State.Instance);
1999fcaf7f86SDimitry Andric }
2000fcaf7f86SDimitry Andric }
2001fcaf7f86SDimitry Andric
2002753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const200381ad6265SDimitry Andric void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
200481ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
200581ad6265SDimitry Andric O << Indent << "PHI-PREDICATED-INSTRUCTION ";
200681ad6265SDimitry Andric printAsOperand(O, SlotTracker);
200781ad6265SDimitry Andric O << " = ";
200881ad6265SDimitry Andric printOperands(O, SlotTracker);
200981ad6265SDimitry Andric }
201081ad6265SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2011*0fca6ea1SDimitry Andric void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
201281ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
201381ad6265SDimitry Andric O << Indent << "WIDEN ";
2014*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker);
2015*0fca6ea1SDimitry Andric O << " = load ";
201681ad6265SDimitry Andric printOperands(O, SlotTracker);
201781ad6265SDimitry Andric }
2018*0fca6ea1SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2019*0fca6ea1SDimitry Andric void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2020*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
2021*0fca6ea1SDimitry Andric O << Indent << "WIDEN ";
2022*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker);
2023*0fca6ea1SDimitry Andric O << " = vp.load ";
2024*0fca6ea1SDimitry Andric printOperands(O, SlotTracker);
2025*0fca6ea1SDimitry Andric }
2026*0fca6ea1SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2027*0fca6ea1SDimitry Andric void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
2028*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
2029*0fca6ea1SDimitry Andric O << Indent << "WIDEN store ";
2030*0fca6ea1SDimitry Andric printOperands(O, SlotTracker);
2031*0fca6ea1SDimitry Andric }
2032*0fca6ea1SDimitry Andric
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2033*0fca6ea1SDimitry Andric void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
2034*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
2035*0fca6ea1SDimitry Andric O << Indent << "WIDEN vp.store ";
2036*0fca6ea1SDimitry Andric printOperands(O, SlotTracker);
2037*0fca6ea1SDimitry Andric }
2038*0fca6ea1SDimitry Andric #endif
2039*0fca6ea1SDimitry Andric
createBitOrPointerCast(IRBuilderBase & Builder,Value * V,VectorType * DstVTy,const DataLayout & DL)2040*0fca6ea1SDimitry Andric static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,
2041*0fca6ea1SDimitry Andric VectorType *DstVTy, const DataLayout &DL) {
2042*0fca6ea1SDimitry Andric // Verify that V is a vector type with same number of elements as DstVTy.
2043*0fca6ea1SDimitry Andric auto VF = DstVTy->getElementCount();
2044*0fca6ea1SDimitry Andric auto *SrcVecTy = cast<VectorType>(V->getType());
2045*0fca6ea1SDimitry Andric assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
2046*0fca6ea1SDimitry Andric Type *SrcElemTy = SrcVecTy->getElementType();
2047*0fca6ea1SDimitry Andric Type *DstElemTy = DstVTy->getElementType();
2048*0fca6ea1SDimitry Andric assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
2049*0fca6ea1SDimitry Andric "Vector elements must have same size");
2050*0fca6ea1SDimitry Andric
2051*0fca6ea1SDimitry Andric // Do a direct cast if element types are castable.
2052*0fca6ea1SDimitry Andric if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {
2053*0fca6ea1SDimitry Andric return Builder.CreateBitOrPointerCast(V, DstVTy);
2054*0fca6ea1SDimitry Andric }
2055*0fca6ea1SDimitry Andric // V cannot be directly casted to desired vector type.
2056*0fca6ea1SDimitry Andric // May happen when V is a floating point vector but DstVTy is a vector of
2057*0fca6ea1SDimitry Andric // pointers or vice-versa. Handle this using a two-step bitcast using an
2058*0fca6ea1SDimitry Andric // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.
2059*0fca6ea1SDimitry Andric assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&
2060*0fca6ea1SDimitry Andric "Only one type should be a pointer type");
2061*0fca6ea1SDimitry Andric assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&
2062*0fca6ea1SDimitry Andric "Only one type should be a floating point type");
2063*0fca6ea1SDimitry Andric Type *IntTy =
2064*0fca6ea1SDimitry Andric IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
2065*0fca6ea1SDimitry Andric auto *VecIntTy = VectorType::get(IntTy, VF);
2066*0fca6ea1SDimitry Andric Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
2067*0fca6ea1SDimitry Andric return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
2068*0fca6ea1SDimitry Andric }
2069*0fca6ea1SDimitry Andric
2070*0fca6ea1SDimitry Andric /// Return a vector containing interleaved elements from multiple
2071*0fca6ea1SDimitry Andric /// smaller input vectors.
interleaveVectors(IRBuilderBase & Builder,ArrayRef<Value * > Vals,const Twine & Name)2072*0fca6ea1SDimitry Andric static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2073*0fca6ea1SDimitry Andric const Twine &Name) {
2074*0fca6ea1SDimitry Andric unsigned Factor = Vals.size();
2075*0fca6ea1SDimitry Andric assert(Factor > 1 && "Tried to interleave invalid number of vectors");
2076*0fca6ea1SDimitry Andric
2077*0fca6ea1SDimitry Andric VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
2078*0fca6ea1SDimitry Andric #ifndef NDEBUG
2079*0fca6ea1SDimitry Andric for (Value *Val : Vals)
2080*0fca6ea1SDimitry Andric assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
2081*0fca6ea1SDimitry Andric #endif
2082*0fca6ea1SDimitry Andric
2083*0fca6ea1SDimitry Andric // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2084*0fca6ea1SDimitry Andric // must use intrinsics to interleave.
2085*0fca6ea1SDimitry Andric if (VecTy->isScalableTy()) {
2086*0fca6ea1SDimitry Andric VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
2087*0fca6ea1SDimitry Andric return Builder.CreateIntrinsic(WideVecTy, Intrinsic::vector_interleave2,
2088*0fca6ea1SDimitry Andric Vals,
2089*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, Name);
2090*0fca6ea1SDimitry Andric }
2091*0fca6ea1SDimitry Andric
2092*0fca6ea1SDimitry Andric // Fixed length. Start by concatenating all vectors into a wide vector.
2093*0fca6ea1SDimitry Andric Value *WideVec = concatenateVectors(Builder, Vals);
2094*0fca6ea1SDimitry Andric
2095*0fca6ea1SDimitry Andric // Interleave the elements into the wide vector.
2096*0fca6ea1SDimitry Andric const unsigned NumElts = VecTy->getElementCount().getFixedValue();
2097*0fca6ea1SDimitry Andric return Builder.CreateShuffleVector(
2098*0fca6ea1SDimitry Andric WideVec, createInterleaveMask(NumElts, Factor), Name);
2099*0fca6ea1SDimitry Andric }
2100*0fca6ea1SDimitry Andric
2101*0fca6ea1SDimitry Andric // Try to vectorize the interleave group that \p Instr belongs to.
2102*0fca6ea1SDimitry Andric //
2103*0fca6ea1SDimitry Andric // E.g. Translate following interleaved load group (factor = 3):
2104*0fca6ea1SDimitry Andric // for (i = 0; i < N; i+=3) {
2105*0fca6ea1SDimitry Andric // R = Pic[i]; // Member of index 0
2106*0fca6ea1SDimitry Andric // G = Pic[i+1]; // Member of index 1
2107*0fca6ea1SDimitry Andric // B = Pic[i+2]; // Member of index 2
2108*0fca6ea1SDimitry Andric // ... // do something to R, G, B
2109*0fca6ea1SDimitry Andric // }
2110*0fca6ea1SDimitry Andric // To:
2111*0fca6ea1SDimitry Andric // %wide.vec = load <12 x i32> ; Read 4 tuples of R,G,B
2112*0fca6ea1SDimitry Andric // %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9> ; R elements
2113*0fca6ea1SDimitry Andric // %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10> ; G elements
2114*0fca6ea1SDimitry Andric // %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11> ; B elements
2115*0fca6ea1SDimitry Andric //
2116*0fca6ea1SDimitry Andric // Or translate following interleaved store group (factor = 3):
2117*0fca6ea1SDimitry Andric // for (i = 0; i < N; i+=3) {
2118*0fca6ea1SDimitry Andric // ... do something to R, G, B
2119*0fca6ea1SDimitry Andric // Pic[i] = R; // Member of index 0
2120*0fca6ea1SDimitry Andric // Pic[i+1] = G; // Member of index 1
2121*0fca6ea1SDimitry Andric // Pic[i+2] = B; // Member of index 2
2122*0fca6ea1SDimitry Andric // }
2123*0fca6ea1SDimitry Andric // To:
2124*0fca6ea1SDimitry Andric // %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>
2125*0fca6ea1SDimitry Andric // %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>
2126*0fca6ea1SDimitry Andric // %interleaved.vec = shuffle %R_G.vec, %B_U.vec,
2127*0fca6ea1SDimitry Andric // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
2128*0fca6ea1SDimitry Andric // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
execute(VPTransformState & State)2129*0fca6ea1SDimitry Andric void VPInterleaveRecipe::execute(VPTransformState &State) {
2130*0fca6ea1SDimitry Andric assert(!State.Instance && "Interleave group being replicated.");
2131*0fca6ea1SDimitry Andric const InterleaveGroup<Instruction> *Group = IG;
2132*0fca6ea1SDimitry Andric Instruction *Instr = Group->getInsertPos();
2133*0fca6ea1SDimitry Andric
2134*0fca6ea1SDimitry Andric // Prepare for the vector type of the interleaved load/store.
2135*0fca6ea1SDimitry Andric Type *ScalarTy = getLoadStoreType(Instr);
2136*0fca6ea1SDimitry Andric unsigned InterleaveFactor = Group->getFactor();
2137*0fca6ea1SDimitry Andric auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);
2138*0fca6ea1SDimitry Andric
2139*0fca6ea1SDimitry Andric // Prepare for the new pointers.
2140*0fca6ea1SDimitry Andric SmallVector<Value *, 2> AddrParts;
2141*0fca6ea1SDimitry Andric unsigned Index = Group->getIndex(Instr);
2142*0fca6ea1SDimitry Andric
2143*0fca6ea1SDimitry Andric // TODO: extend the masked interleaved-group support to reversed access.
2144*0fca6ea1SDimitry Andric VPValue *BlockInMask = getMask();
2145*0fca6ea1SDimitry Andric assert((!BlockInMask || !Group->isReverse()) &&
2146*0fca6ea1SDimitry Andric "Reversed masked interleave-group not supported.");
2147*0fca6ea1SDimitry Andric
2148*0fca6ea1SDimitry Andric Value *Idx;
2149*0fca6ea1SDimitry Andric // If the group is reverse, adjust the index to refer to the last vector lane
2150*0fca6ea1SDimitry Andric // instead of the first. We adjust the index from the first vector lane,
2151*0fca6ea1SDimitry Andric // rather than directly getting the pointer for lane VF - 1, because the
2152*0fca6ea1SDimitry Andric // pointer operand of the interleaved access is supposed to be uniform. For
2153*0fca6ea1SDimitry Andric // uniform instructions, we're only required to generate a value for the
2154*0fca6ea1SDimitry Andric // first vector lane in each unroll iteration.
2155*0fca6ea1SDimitry Andric if (Group->isReverse()) {
2156*0fca6ea1SDimitry Andric Value *RuntimeVF =
2157*0fca6ea1SDimitry Andric getRuntimeVF(State.Builder, State.Builder.getInt32Ty(), State.VF);
2158*0fca6ea1SDimitry Andric Idx = State.Builder.CreateSub(RuntimeVF, State.Builder.getInt32(1));
2159*0fca6ea1SDimitry Andric Idx = State.Builder.CreateMul(Idx,
2160*0fca6ea1SDimitry Andric State.Builder.getInt32(Group->getFactor()));
2161*0fca6ea1SDimitry Andric Idx = State.Builder.CreateAdd(Idx, State.Builder.getInt32(Index));
2162*0fca6ea1SDimitry Andric Idx = State.Builder.CreateNeg(Idx);
2163*0fca6ea1SDimitry Andric } else
2164*0fca6ea1SDimitry Andric Idx = State.Builder.getInt32(-Index);
2165*0fca6ea1SDimitry Andric
2166*0fca6ea1SDimitry Andric VPValue *Addr = getAddr();
2167*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) {
2168*0fca6ea1SDimitry Andric Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
2169*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(AddrPart))
2170*0fca6ea1SDimitry Andric State.setDebugLocFrom(I->getDebugLoc());
2171*0fca6ea1SDimitry Andric
2172*0fca6ea1SDimitry Andric // Notice current instruction could be any index. Need to adjust the address
2173*0fca6ea1SDimitry Andric // to the member of index 0.
2174*0fca6ea1SDimitry Andric //
2175*0fca6ea1SDimitry Andric // E.g. a = A[i+1]; // Member of index 1 (Current instruction)
2176*0fca6ea1SDimitry Andric // b = A[i]; // Member of index 0
2177*0fca6ea1SDimitry Andric // Current pointer is pointed to A[i+1], adjust it to A[i].
2178*0fca6ea1SDimitry Andric //
2179*0fca6ea1SDimitry Andric // E.g. A[i+1] = a; // Member of index 1
2180*0fca6ea1SDimitry Andric // A[i] = b; // Member of index 0
2181*0fca6ea1SDimitry Andric // A[i+2] = c; // Member of index 2 (Current instruction)
2182*0fca6ea1SDimitry Andric // Current pointer is pointed to A[i+2], adjust it to A[i].
2183*0fca6ea1SDimitry Andric
2184*0fca6ea1SDimitry Andric bool InBounds = false;
2185*0fca6ea1SDimitry Andric if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
2186*0fca6ea1SDimitry Andric InBounds = gep->isInBounds();
2187*0fca6ea1SDimitry Andric AddrPart = State.Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
2188*0fca6ea1SDimitry Andric AddrParts.push_back(AddrPart);
2189*0fca6ea1SDimitry Andric }
2190*0fca6ea1SDimitry Andric
2191*0fca6ea1SDimitry Andric State.setDebugLocFrom(Instr->getDebugLoc());
2192*0fca6ea1SDimitry Andric Value *PoisonVec = PoisonValue::get(VecTy);
2193*0fca6ea1SDimitry Andric
2194*0fca6ea1SDimitry Andric auto CreateGroupMask = [&BlockInMask, &State, &InterleaveFactor](
2195*0fca6ea1SDimitry Andric unsigned Part, Value *MaskForGaps) -> Value * {
2196*0fca6ea1SDimitry Andric if (State.VF.isScalable()) {
2197*0fca6ea1SDimitry Andric assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
2198*0fca6ea1SDimitry Andric assert(InterleaveFactor == 2 &&
2199*0fca6ea1SDimitry Andric "Unsupported deinterleave factor for scalable vectors");
2200*0fca6ea1SDimitry Andric auto *BlockInMaskPart = State.get(BlockInMask, Part);
2201*0fca6ea1SDimitry Andric SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
2202*0fca6ea1SDimitry Andric auto *MaskTy = VectorType::get(State.Builder.getInt1Ty(),
2203*0fca6ea1SDimitry Andric State.VF.getKnownMinValue() * 2, true);
2204*0fca6ea1SDimitry Andric return State.Builder.CreateIntrinsic(
2205*0fca6ea1SDimitry Andric MaskTy, Intrinsic::vector_interleave2, Ops,
2206*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, "interleaved.mask");
2207*0fca6ea1SDimitry Andric }
2208*0fca6ea1SDimitry Andric
2209*0fca6ea1SDimitry Andric if (!BlockInMask)
2210*0fca6ea1SDimitry Andric return MaskForGaps;
2211*0fca6ea1SDimitry Andric
2212*0fca6ea1SDimitry Andric Value *BlockInMaskPart = State.get(BlockInMask, Part);
2213*0fca6ea1SDimitry Andric Value *ShuffledMask = State.Builder.CreateShuffleVector(
2214*0fca6ea1SDimitry Andric BlockInMaskPart,
2215*0fca6ea1SDimitry Andric createReplicatedMask(InterleaveFactor, State.VF.getKnownMinValue()),
2216*0fca6ea1SDimitry Andric "interleaved.mask");
2217*0fca6ea1SDimitry Andric return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,
2218*0fca6ea1SDimitry Andric ShuffledMask, MaskForGaps)
2219*0fca6ea1SDimitry Andric : ShuffledMask;
2220*0fca6ea1SDimitry Andric };
2221*0fca6ea1SDimitry Andric
2222*0fca6ea1SDimitry Andric const DataLayout &DL = Instr->getDataLayout();
2223*0fca6ea1SDimitry Andric // Vectorize the interleaved load group.
2224*0fca6ea1SDimitry Andric if (isa<LoadInst>(Instr)) {
2225*0fca6ea1SDimitry Andric Value *MaskForGaps = nullptr;
2226*0fca6ea1SDimitry Andric if (NeedsMaskForGaps) {
2227*0fca6ea1SDimitry Andric MaskForGaps = createBitMaskForGaps(State.Builder,
2228*0fca6ea1SDimitry Andric State.VF.getKnownMinValue(), *Group);
2229*0fca6ea1SDimitry Andric assert(MaskForGaps && "Mask for Gaps is required but it is null");
2230*0fca6ea1SDimitry Andric }
2231*0fca6ea1SDimitry Andric
2232*0fca6ea1SDimitry Andric // For each unroll part, create a wide load for the group.
2233*0fca6ea1SDimitry Andric SmallVector<Value *, 2> NewLoads;
2234*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) {
2235*0fca6ea1SDimitry Andric Instruction *NewLoad;
2236*0fca6ea1SDimitry Andric if (BlockInMask || MaskForGaps) {
2237*0fca6ea1SDimitry Andric Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
2238*0fca6ea1SDimitry Andric NewLoad = State.Builder.CreateMaskedLoad(VecTy, AddrParts[Part],
2239*0fca6ea1SDimitry Andric Group->getAlign(), GroupMask,
2240*0fca6ea1SDimitry Andric PoisonVec, "wide.masked.vec");
2241*0fca6ea1SDimitry Andric } else
2242*0fca6ea1SDimitry Andric NewLoad = State.Builder.CreateAlignedLoad(
2243*0fca6ea1SDimitry Andric VecTy, AddrParts[Part], Group->getAlign(), "wide.vec");
2244*0fca6ea1SDimitry Andric Group->addMetadata(NewLoad);
2245*0fca6ea1SDimitry Andric NewLoads.push_back(NewLoad);
2246*0fca6ea1SDimitry Andric }
2247*0fca6ea1SDimitry Andric
2248*0fca6ea1SDimitry Andric ArrayRef<VPValue *> VPDefs = definedValues();
2249*0fca6ea1SDimitry Andric const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
2250*0fca6ea1SDimitry Andric if (VecTy->isScalableTy()) {
2251*0fca6ea1SDimitry Andric assert(InterleaveFactor == 2 &&
2252*0fca6ea1SDimitry Andric "Unsupported deinterleave factor for scalable vectors");
2253*0fca6ea1SDimitry Andric
2254*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; ++Part) {
2255*0fca6ea1SDimitry Andric // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2256*0fca6ea1SDimitry Andric // so must use intrinsics to deinterleave.
2257*0fca6ea1SDimitry Andric Value *DI = State.Builder.CreateIntrinsic(
2258*0fca6ea1SDimitry Andric Intrinsic::vector_deinterleave2, VecTy, NewLoads[Part],
2259*0fca6ea1SDimitry Andric /*FMFSource=*/nullptr, "strided.vec");
2260*0fca6ea1SDimitry Andric unsigned J = 0;
2261*0fca6ea1SDimitry Andric for (unsigned I = 0; I < InterleaveFactor; ++I) {
2262*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(I);
2263*0fca6ea1SDimitry Andric
2264*0fca6ea1SDimitry Andric if (!Member)
2265*0fca6ea1SDimitry Andric continue;
2266*0fca6ea1SDimitry Andric
2267*0fca6ea1SDimitry Andric Value *StridedVec = State.Builder.CreateExtractValue(DI, I);
2268*0fca6ea1SDimitry Andric // If this member has different type, cast the result type.
2269*0fca6ea1SDimitry Andric if (Member->getType() != ScalarTy) {
2270*0fca6ea1SDimitry Andric VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2271*0fca6ea1SDimitry Andric StridedVec =
2272*0fca6ea1SDimitry Andric createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2273*0fca6ea1SDimitry Andric }
2274*0fca6ea1SDimitry Andric
2275*0fca6ea1SDimitry Andric if (Group->isReverse())
2276*0fca6ea1SDimitry Andric StridedVec =
2277*0fca6ea1SDimitry Andric State.Builder.CreateVectorReverse(StridedVec, "reverse");
2278*0fca6ea1SDimitry Andric
2279*0fca6ea1SDimitry Andric State.set(VPDefs[J], StridedVec, Part);
2280*0fca6ea1SDimitry Andric ++J;
2281*0fca6ea1SDimitry Andric }
2282*0fca6ea1SDimitry Andric }
2283*0fca6ea1SDimitry Andric
2284*0fca6ea1SDimitry Andric return;
2285*0fca6ea1SDimitry Andric }
2286*0fca6ea1SDimitry Andric
2287*0fca6ea1SDimitry Andric // For each member in the group, shuffle out the appropriate data from the
2288*0fca6ea1SDimitry Andric // wide loads.
2289*0fca6ea1SDimitry Andric unsigned J = 0;
2290*0fca6ea1SDimitry Andric for (unsigned I = 0; I < InterleaveFactor; ++I) {
2291*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(I);
2292*0fca6ea1SDimitry Andric
2293*0fca6ea1SDimitry Andric // Skip the gaps in the group.
2294*0fca6ea1SDimitry Andric if (!Member)
2295*0fca6ea1SDimitry Andric continue;
2296*0fca6ea1SDimitry Andric
2297*0fca6ea1SDimitry Andric auto StrideMask =
2298*0fca6ea1SDimitry Andric createStrideMask(I, InterleaveFactor, State.VF.getKnownMinValue());
2299*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) {
2300*0fca6ea1SDimitry Andric Value *StridedVec = State.Builder.CreateShuffleVector(
2301*0fca6ea1SDimitry Andric NewLoads[Part], StrideMask, "strided.vec");
2302*0fca6ea1SDimitry Andric
2303*0fca6ea1SDimitry Andric // If this member has different type, cast the result type.
2304*0fca6ea1SDimitry Andric if (Member->getType() != ScalarTy) {
2305*0fca6ea1SDimitry Andric assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
2306*0fca6ea1SDimitry Andric VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);
2307*0fca6ea1SDimitry Andric StridedVec =
2308*0fca6ea1SDimitry Andric createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);
2309*0fca6ea1SDimitry Andric }
2310*0fca6ea1SDimitry Andric
2311*0fca6ea1SDimitry Andric if (Group->isReverse())
2312*0fca6ea1SDimitry Andric StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");
2313*0fca6ea1SDimitry Andric
2314*0fca6ea1SDimitry Andric State.set(VPDefs[J], StridedVec, Part);
2315*0fca6ea1SDimitry Andric }
2316*0fca6ea1SDimitry Andric ++J;
2317*0fca6ea1SDimitry Andric }
2318*0fca6ea1SDimitry Andric return;
2319*0fca6ea1SDimitry Andric }
2320*0fca6ea1SDimitry Andric
2321*0fca6ea1SDimitry Andric // The sub vector type for current instruction.
2322*0fca6ea1SDimitry Andric auto *SubVT = VectorType::get(ScalarTy, State.VF);
2323*0fca6ea1SDimitry Andric
2324*0fca6ea1SDimitry Andric // Vectorize the interleaved store group.
2325*0fca6ea1SDimitry Andric Value *MaskForGaps =
2326*0fca6ea1SDimitry Andric createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);
2327*0fca6ea1SDimitry Andric assert((!MaskForGaps || !State.VF.isScalable()) &&
2328*0fca6ea1SDimitry Andric "masking gaps for scalable vectors is not yet supported.");
2329*0fca6ea1SDimitry Andric ArrayRef<VPValue *> StoredValues = getStoredValues();
2330*0fca6ea1SDimitry Andric for (unsigned Part = 0; Part < State.UF; Part++) {
2331*0fca6ea1SDimitry Andric // Collect the stored vector from each member.
2332*0fca6ea1SDimitry Andric SmallVector<Value *, 4> StoredVecs;
2333*0fca6ea1SDimitry Andric unsigned StoredIdx = 0;
2334*0fca6ea1SDimitry Andric for (unsigned i = 0; i < InterleaveFactor; i++) {
2335*0fca6ea1SDimitry Andric assert((Group->getMember(i) || MaskForGaps) &&
2336*0fca6ea1SDimitry Andric "Fail to get a member from an interleaved store group");
2337*0fca6ea1SDimitry Andric Instruction *Member = Group->getMember(i);
2338*0fca6ea1SDimitry Andric
2339*0fca6ea1SDimitry Andric // Skip the gaps in the group.
2340*0fca6ea1SDimitry Andric if (!Member) {
2341*0fca6ea1SDimitry Andric Value *Undef = PoisonValue::get(SubVT);
2342*0fca6ea1SDimitry Andric StoredVecs.push_back(Undef);
2343*0fca6ea1SDimitry Andric continue;
2344*0fca6ea1SDimitry Andric }
2345*0fca6ea1SDimitry Andric
2346*0fca6ea1SDimitry Andric Value *StoredVec = State.get(StoredValues[StoredIdx], Part);
2347*0fca6ea1SDimitry Andric ++StoredIdx;
2348*0fca6ea1SDimitry Andric
2349*0fca6ea1SDimitry Andric if (Group->isReverse())
2350*0fca6ea1SDimitry Andric StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");
2351*0fca6ea1SDimitry Andric
2352*0fca6ea1SDimitry Andric // If this member has different type, cast it to a unified type.
2353*0fca6ea1SDimitry Andric
2354*0fca6ea1SDimitry Andric if (StoredVec->getType() != SubVT)
2355*0fca6ea1SDimitry Andric StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);
2356*0fca6ea1SDimitry Andric
2357*0fca6ea1SDimitry Andric StoredVecs.push_back(StoredVec);
2358*0fca6ea1SDimitry Andric }
2359*0fca6ea1SDimitry Andric
2360*0fca6ea1SDimitry Andric // Interleave all the smaller vectors into one wider vector.
2361*0fca6ea1SDimitry Andric Value *IVec =
2362*0fca6ea1SDimitry Andric interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");
2363*0fca6ea1SDimitry Andric Instruction *NewStoreInstr;
2364*0fca6ea1SDimitry Andric if (BlockInMask || MaskForGaps) {
2365*0fca6ea1SDimitry Andric Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
2366*0fca6ea1SDimitry Andric NewStoreInstr = State.Builder.CreateMaskedStore(
2367*0fca6ea1SDimitry Andric IVec, AddrParts[Part], Group->getAlign(), GroupMask);
2368*0fca6ea1SDimitry Andric } else
2369*0fca6ea1SDimitry Andric NewStoreInstr = State.Builder.CreateAlignedStore(IVec, AddrParts[Part],
2370*0fca6ea1SDimitry Andric Group->getAlign());
2371*0fca6ea1SDimitry Andric
2372*0fca6ea1SDimitry Andric Group->addMetadata(NewStoreInstr);
2373*0fca6ea1SDimitry Andric }
2374*0fca6ea1SDimitry Andric }
2375*0fca6ea1SDimitry Andric
2376*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2377*0fca6ea1SDimitry Andric void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
2378*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
2379*0fca6ea1SDimitry Andric O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
2380*0fca6ea1SDimitry Andric IG->getInsertPos()->printAsOperand(O, false);
2381*0fca6ea1SDimitry Andric O << ", ";
2382*0fca6ea1SDimitry Andric getAddr()->printAsOperand(O, SlotTracker);
2383*0fca6ea1SDimitry Andric VPValue *Mask = getMask();
2384*0fca6ea1SDimitry Andric if (Mask) {
2385*0fca6ea1SDimitry Andric O << ", ";
2386*0fca6ea1SDimitry Andric Mask->printAsOperand(O, SlotTracker);
2387*0fca6ea1SDimitry Andric }
2388*0fca6ea1SDimitry Andric
2389*0fca6ea1SDimitry Andric unsigned OpIdx = 0;
2390*0fca6ea1SDimitry Andric for (unsigned i = 0; i < IG->getFactor(); ++i) {
2391*0fca6ea1SDimitry Andric if (!IG->getMember(i))
2392*0fca6ea1SDimitry Andric continue;
2393*0fca6ea1SDimitry Andric if (getNumStoreOperands() > 0) {
2394*0fca6ea1SDimitry Andric O << "\n" << Indent << " store ";
2395*0fca6ea1SDimitry Andric getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
2396*0fca6ea1SDimitry Andric O << " to index " << i;
2397*0fca6ea1SDimitry Andric } else {
2398*0fca6ea1SDimitry Andric O << "\n" << Indent << " ";
2399*0fca6ea1SDimitry Andric getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
2400*0fca6ea1SDimitry Andric O << " = load from index " << i;
2401*0fca6ea1SDimitry Andric }
2402*0fca6ea1SDimitry Andric ++OpIdx;
2403*0fca6ea1SDimitry Andric }
2404*0fca6ea1SDimitry Andric }
240581ad6265SDimitry Andric #endif
240681ad6265SDimitry Andric
execute(VPTransformState & State)240781ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
240881ad6265SDimitry Andric Value *Start = getStartValue()->getLiveInIRValue();
24095f757f3fSDimitry Andric PHINode *EntryPart = PHINode::Create(Start->getType(), 2, "index");
24105f757f3fSDimitry Andric EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
241181ad6265SDimitry Andric
241281ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
241381ad6265SDimitry Andric EntryPart->addIncoming(Start, VectorPH);
24145f757f3fSDimitry Andric EntryPart->setDebugLoc(getDebugLoc());
241581ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
2416*0fca6ea1SDimitry Andric State.set(this, EntryPart, Part, /*IsScalar*/ true);
241781ad6265SDimitry Andric }
241881ad6265SDimitry Andric
241981ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const242081ad6265SDimitry Andric void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
242181ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
242281ad6265SDimitry Andric O << Indent << "EMIT ";
242381ad6265SDimitry Andric printAsOperand(O, SlotTracker);
242481ad6265SDimitry Andric O << " = CANONICAL-INDUCTION ";
24255f757f3fSDimitry Andric printOperands(O, SlotTracker);
242681ad6265SDimitry Andric }
242781ad6265SDimitry Andric #endif
242881ad6265SDimitry Andric
isCanonical(InductionDescriptor::InductionKind Kind,VPValue * Start,VPValue * Step) const242906c3fb27SDimitry Andric bool VPCanonicalIVPHIRecipe::isCanonical(
2430*0fca6ea1SDimitry Andric InductionDescriptor::InductionKind Kind, VPValue *Start,
2431*0fca6ea1SDimitry Andric VPValue *Step) const {
2432*0fca6ea1SDimitry Andric // Must be an integer induction.
2433*0fca6ea1SDimitry Andric if (Kind != InductionDescriptor::IK_IntInduction)
2434bdd1243dSDimitry Andric return false;
243506c3fb27SDimitry Andric // Start must match the start value of this canonical induction.
243606c3fb27SDimitry Andric if (Start != getStartValue())
2437bdd1243dSDimitry Andric return false;
2438bdd1243dSDimitry Andric
243906c3fb27SDimitry Andric // If the step is defined by a recipe, it is not a ConstantInt.
244006c3fb27SDimitry Andric if (Step->getDefiningRecipe())
244106c3fb27SDimitry Andric return false;
244206c3fb27SDimitry Andric
244306c3fb27SDimitry Andric ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
244406c3fb27SDimitry Andric return StepC && StepC->isOne();
2445bdd1243dSDimitry Andric }
2446bdd1243dSDimitry Andric
onlyScalarsGenerated(bool IsScalable)2447*0fca6ea1SDimitry Andric bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
24486246ae0bSDimitry Andric return IsScalarAfterVectorization &&
2449*0fca6ea1SDimitry Andric (!IsScalable || vputils::onlyFirstLaneUsed(this));
245081ad6265SDimitry Andric }
245181ad6265SDimitry Andric
245281ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const245381ad6265SDimitry Andric void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
245481ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
245581ad6265SDimitry Andric O << Indent << "EMIT ";
245681ad6265SDimitry Andric printAsOperand(O, SlotTracker);
245781ad6265SDimitry Andric O << " = WIDEN-POINTER-INDUCTION ";
245881ad6265SDimitry Andric getStartValue()->printAsOperand(O, SlotTracker);
245981ad6265SDimitry Andric O << ", " << *IndDesc.getStep();
246081ad6265SDimitry Andric }
246181ad6265SDimitry Andric #endif
246281ad6265SDimitry Andric
execute(VPTransformState & State)246381ad6265SDimitry Andric void VPExpandSCEVRecipe::execute(VPTransformState &State) {
246481ad6265SDimitry Andric assert(!State.Instance && "cannot be used in per-lane");
2465*0fca6ea1SDimitry Andric const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
246681ad6265SDimitry Andric SCEVExpander Exp(SE, DL, "induction");
246781ad6265SDimitry Andric
246881ad6265SDimitry Andric Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
246981ad6265SDimitry Andric &*State.Builder.GetInsertPoint());
247006c3fb27SDimitry Andric assert(!State.ExpandedSCEVs.contains(Expr) &&
247106c3fb27SDimitry Andric "Same SCEV expanded multiple times");
247206c3fb27SDimitry Andric State.ExpandedSCEVs[Expr] = Res;
247381ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
247406c3fb27SDimitry Andric State.set(this, Res, {Part, 0});
247581ad6265SDimitry Andric }
247681ad6265SDimitry Andric
247781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const247881ad6265SDimitry Andric void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
247981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
248081ad6265SDimitry Andric O << Indent << "EMIT ";
248181ad6265SDimitry Andric getVPSingleValue()->printAsOperand(O, SlotTracker);
248281ad6265SDimitry Andric O << " = EXPAND SCEV " << *Expr;
248381ad6265SDimitry Andric }
248481ad6265SDimitry Andric #endif
248581ad6265SDimitry Andric
execute(VPTransformState & State)248681ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
2487*0fca6ea1SDimitry Andric Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
248881ad6265SDimitry Andric Type *STy = CanonicalIV->getType();
248981ad6265SDimitry Andric IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
249081ad6265SDimitry Andric ElementCount VF = State.VF;
249181ad6265SDimitry Andric Value *VStart = VF.isScalar()
249281ad6265SDimitry Andric ? CanonicalIV
249381ad6265SDimitry Andric : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
249481ad6265SDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
249581ad6265SDimitry Andric Value *VStep = createStepForVF(Builder, STy, VF, Part);
249681ad6265SDimitry Andric if (VF.isVector()) {
249781ad6265SDimitry Andric VStep = Builder.CreateVectorSplat(VF, VStep);
249881ad6265SDimitry Andric VStep =
249981ad6265SDimitry Andric Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
250081ad6265SDimitry Andric }
250181ad6265SDimitry Andric Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
250281ad6265SDimitry Andric State.set(this, CanonicalVectorIV, Part);
250381ad6265SDimitry Andric }
250481ad6265SDimitry Andric }
250581ad6265SDimitry Andric
250681ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const250781ad6265SDimitry Andric void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
250881ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
250981ad6265SDimitry Andric O << Indent << "EMIT ";
251081ad6265SDimitry Andric printAsOperand(O, SlotTracker);
251181ad6265SDimitry Andric O << " = WIDEN-CANONICAL-INDUCTION ";
251281ad6265SDimitry Andric printOperands(O, SlotTracker);
251381ad6265SDimitry Andric }
251481ad6265SDimitry Andric #endif
251581ad6265SDimitry Andric
execute(VPTransformState & State)251681ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
251781ad6265SDimitry Andric auto &Builder = State.Builder;
251881ad6265SDimitry Andric // Create a vector from the initial value.
251981ad6265SDimitry Andric auto *VectorInit = getStartValue()->getLiveInIRValue();
252081ad6265SDimitry Andric
252181ad6265SDimitry Andric Type *VecTy = State.VF.isScalar()
252281ad6265SDimitry Andric ? VectorInit->getType()
252381ad6265SDimitry Andric : VectorType::get(VectorInit->getType(), State.VF);
252481ad6265SDimitry Andric
252581ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
252681ad6265SDimitry Andric if (State.VF.isVector()) {
252781ad6265SDimitry Andric auto *IdxTy = Builder.getInt32Ty();
252881ad6265SDimitry Andric auto *One = ConstantInt::get(IdxTy, 1);
252981ad6265SDimitry Andric IRBuilder<>::InsertPointGuard Guard(Builder);
253081ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator());
253181ad6265SDimitry Andric auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
253281ad6265SDimitry Andric auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
253381ad6265SDimitry Andric VectorInit = Builder.CreateInsertElement(
253481ad6265SDimitry Andric PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
253581ad6265SDimitry Andric }
253681ad6265SDimitry Andric
253781ad6265SDimitry Andric // Create a phi node for the new recurrence.
25385f757f3fSDimitry Andric PHINode *EntryPart = PHINode::Create(VecTy, 2, "vector.recur");
25395f757f3fSDimitry Andric EntryPart->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
254081ad6265SDimitry Andric EntryPart->addIncoming(VectorInit, VectorPH);
254181ad6265SDimitry Andric State.set(this, EntryPart, 0);
254281ad6265SDimitry Andric }
254381ad6265SDimitry Andric
254481ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const254581ad6265SDimitry Andric void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
254681ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
254781ad6265SDimitry Andric O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
254881ad6265SDimitry Andric printAsOperand(O, SlotTracker);
254981ad6265SDimitry Andric O << " = phi ";
255081ad6265SDimitry Andric printOperands(O, SlotTracker);
255181ad6265SDimitry Andric }
255281ad6265SDimitry Andric #endif
255381ad6265SDimitry Andric
execute(VPTransformState & State)255481ad6265SDimitry Andric void VPReductionPHIRecipe::execute(VPTransformState &State) {
255581ad6265SDimitry Andric auto &Builder = State.Builder;
255681ad6265SDimitry Andric
25577a6dacacSDimitry Andric // Reductions do not have to start at zero. They can start with
25587a6dacacSDimitry Andric // any loop invariant values.
25597a6dacacSDimitry Andric VPValue *StartVPV = getStartValue();
25607a6dacacSDimitry Andric Value *StartV = StartVPV->getLiveInIRValue();
25617a6dacacSDimitry Andric
256281ad6265SDimitry Andric // In order to support recurrences we need to be able to vectorize Phi nodes.
256381ad6265SDimitry Andric // Phi nodes have cycles, so we need to vectorize them in two stages. This is
256481ad6265SDimitry Andric // stage #1: We create a new vector PHI node with no incoming edges. We'll use
256581ad6265SDimitry Andric // this value when we vectorize all of the instructions that use the PHI.
256681ad6265SDimitry Andric bool ScalarPHI = State.VF.isScalar() || IsInLoop;
25677a6dacacSDimitry Andric Type *VecTy = ScalarPHI ? StartV->getType()
25687a6dacacSDimitry Andric : VectorType::get(StartV->getType(), State.VF);
256981ad6265SDimitry Andric
257081ad6265SDimitry Andric BasicBlock *HeaderBB = State.CFG.PrevBB;
257181ad6265SDimitry Andric assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
257281ad6265SDimitry Andric "recipe must be in the vector loop header");
257381ad6265SDimitry Andric unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
257481ad6265SDimitry Andric for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
25755f757f3fSDimitry Andric Instruction *EntryPart = PHINode::Create(VecTy, 2, "vec.phi");
25765f757f3fSDimitry Andric EntryPart->insertBefore(HeaderBB->getFirstInsertionPt());
2577*0fca6ea1SDimitry Andric State.set(this, EntryPart, Part, IsInLoop);
257881ad6265SDimitry Andric }
257981ad6265SDimitry Andric
258081ad6265SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
258181ad6265SDimitry Andric
258281ad6265SDimitry Andric Value *Iden = nullptr;
258381ad6265SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind();
258481ad6265SDimitry Andric if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
25855f757f3fSDimitry Andric RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
25865f757f3fSDimitry Andric // MinMax and AnyOf reductions have the start value as their identity.
258781ad6265SDimitry Andric if (ScalarPHI) {
258881ad6265SDimitry Andric Iden = StartV;
258981ad6265SDimitry Andric } else {
259081ad6265SDimitry Andric IRBuilderBase::InsertPointGuard IPBuilder(Builder);
259181ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator());
259281ad6265SDimitry Andric StartV = Iden =
259381ad6265SDimitry Andric Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
259481ad6265SDimitry Andric }
259581ad6265SDimitry Andric } else {
259681ad6265SDimitry Andric Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
259781ad6265SDimitry Andric RdxDesc.getFastMathFlags());
259881ad6265SDimitry Andric
259981ad6265SDimitry Andric if (!ScalarPHI) {
260081ad6265SDimitry Andric Iden = Builder.CreateVectorSplat(State.VF, Iden);
260181ad6265SDimitry Andric IRBuilderBase::InsertPointGuard IPBuilder(Builder);
260281ad6265SDimitry Andric Builder.SetInsertPoint(VectorPH->getTerminator());
260381ad6265SDimitry Andric Constant *Zero = Builder.getInt32(0);
260481ad6265SDimitry Andric StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
260581ad6265SDimitry Andric }
260681ad6265SDimitry Andric }
260781ad6265SDimitry Andric
260881ad6265SDimitry Andric for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
2609*0fca6ea1SDimitry Andric Value *EntryPart = State.get(this, Part, IsInLoop);
261081ad6265SDimitry Andric // Make sure to add the reduction start value only to the
261181ad6265SDimitry Andric // first unroll part.
261281ad6265SDimitry Andric Value *StartVal = (Part == 0) ? StartV : Iden;
261381ad6265SDimitry Andric cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
261481ad6265SDimitry Andric }
261581ad6265SDimitry Andric }
261681ad6265SDimitry Andric
261781ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const261881ad6265SDimitry Andric void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
261981ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
262081ad6265SDimitry Andric O << Indent << "WIDEN-REDUCTION-PHI ";
262181ad6265SDimitry Andric
262281ad6265SDimitry Andric printAsOperand(O, SlotTracker);
262381ad6265SDimitry Andric O << " = phi ";
262481ad6265SDimitry Andric printOperands(O, SlotTracker);
262581ad6265SDimitry Andric }
262681ad6265SDimitry Andric #endif
262781ad6265SDimitry Andric
execute(VPTransformState & State)262881ad6265SDimitry Andric void VPWidenPHIRecipe::execute(VPTransformState &State) {
262981ad6265SDimitry Andric assert(EnableVPlanNativePath &&
263081ad6265SDimitry Andric "Non-native vplans are not expected to have VPWidenPHIRecipes.");
263181ad6265SDimitry Andric
26325f757f3fSDimitry Andric Value *Op0 = State.get(getOperand(0), 0);
263381ad6265SDimitry Andric Type *VecTy = Op0->getType();
263481ad6265SDimitry Andric Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
263581ad6265SDimitry Andric State.set(this, VecPhi, 0);
263681ad6265SDimitry Andric }
263781ad6265SDimitry Andric
263881ad6265SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const263981ad6265SDimitry Andric void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
264081ad6265SDimitry Andric VPSlotTracker &SlotTracker) const {
264181ad6265SDimitry Andric O << Indent << "WIDEN-PHI ";
264281ad6265SDimitry Andric
264381ad6265SDimitry Andric auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
264481ad6265SDimitry Andric // Unless all incoming values are modeled in VPlan print the original PHI
264581ad6265SDimitry Andric // directly.
264681ad6265SDimitry Andric // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
264781ad6265SDimitry Andric // values as VPValues.
264881ad6265SDimitry Andric if (getNumOperands() != OriginalPhi->getNumOperands()) {
264981ad6265SDimitry Andric O << VPlanIngredient(OriginalPhi);
265081ad6265SDimitry Andric return;
265181ad6265SDimitry Andric }
265281ad6265SDimitry Andric
265381ad6265SDimitry Andric printAsOperand(O, SlotTracker);
265481ad6265SDimitry Andric O << " = phi ";
265581ad6265SDimitry Andric printOperands(O, SlotTracker);
265681ad6265SDimitry Andric }
265781ad6265SDimitry Andric #endif
2658753f127fSDimitry Andric
2659753f127fSDimitry Andric // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2660753f127fSDimitry Andric // remove VPActiveLaneMaskPHIRecipe.
execute(VPTransformState & State)2661753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2662753f127fSDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2663753f127fSDimitry Andric for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
2664753f127fSDimitry Andric Value *StartMask = State.get(getOperand(0), Part);
2665753f127fSDimitry Andric PHINode *EntryPart =
2666753f127fSDimitry Andric State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
2667753f127fSDimitry Andric EntryPart->addIncoming(StartMask, VectorPH);
26685f757f3fSDimitry Andric EntryPart->setDebugLoc(getDebugLoc());
2669753f127fSDimitry Andric State.set(this, EntryPart, Part);
2670753f127fSDimitry Andric }
2671753f127fSDimitry Andric }
2672753f127fSDimitry Andric
2673753f127fSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2674753f127fSDimitry Andric void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2675753f127fSDimitry Andric VPSlotTracker &SlotTracker) const {
2676753f127fSDimitry Andric O << Indent << "ACTIVE-LANE-MASK-PHI ";
2677753f127fSDimitry Andric
2678753f127fSDimitry Andric printAsOperand(O, SlotTracker);
2679753f127fSDimitry Andric O << " = phi ";
2680753f127fSDimitry Andric printOperands(O, SlotTracker);
2681753f127fSDimitry Andric }
2682753f127fSDimitry Andric #endif
2683*0fca6ea1SDimitry Andric
execute(VPTransformState & State)2684*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2685*0fca6ea1SDimitry Andric BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
2686*0fca6ea1SDimitry Andric assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization.");
2687*0fca6ea1SDimitry Andric Value *Start = State.get(getOperand(0), VPIteration(0, 0));
2688*0fca6ea1SDimitry Andric PHINode *EntryPart =
2689*0fca6ea1SDimitry Andric State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
2690*0fca6ea1SDimitry Andric EntryPart->addIncoming(Start, VectorPH);
2691*0fca6ea1SDimitry Andric EntryPart->setDebugLoc(getDebugLoc());
2692*0fca6ea1SDimitry Andric State.set(this, EntryPart, 0, /*IsScalar=*/true);
2693*0fca6ea1SDimitry Andric }
2694*0fca6ea1SDimitry Andric
2695*0fca6ea1SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(raw_ostream & O,const Twine & Indent,VPSlotTracker & SlotTracker) const2696*0fca6ea1SDimitry Andric void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2697*0fca6ea1SDimitry Andric VPSlotTracker &SlotTracker) const {
2698*0fca6ea1SDimitry Andric O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2699*0fca6ea1SDimitry Andric
2700*0fca6ea1SDimitry Andric printAsOperand(O, SlotTracker);
2701*0fca6ea1SDimitry Andric O << " = phi ";
2702*0fca6ea1SDimitry Andric printOperands(O, SlotTracker);
2703*0fca6ea1SDimitry Andric }
2704*0fca6ea1SDimitry Andric #endif
2705