xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (revision 5e3190f700637fcfc1a52daeaa4a031fdd2557c7)
1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains implementations for different VPlan recipes.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPlan.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Analysis/IVDescriptors.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Type.h"
24 #include "llvm/IR/Value.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
30 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
31 #include <cassert>
32 
33 using namespace llvm;
34 
35 using VectorParts = SmallVector<Value *, 2>;
36 
37 extern cl::opt<bool> EnableVPlanNativePath;
38 
39 #define LV_NAME "loop-vectorize"
40 #define DEBUG_TYPE LV_NAME
41 
42 bool VPRecipeBase::mayWriteToMemory() const {
43   switch (getVPDefID()) {
44   case VPWidenMemoryInstructionSC: {
45     return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
46   }
47   case VPReplicateSC:
48   case VPWidenCallSC:
49     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
50         ->mayWriteToMemory();
51   case VPBranchOnMaskSC:
52   case VPScalarIVStepsSC:
53     return false;
54   case VPWidenIntOrFpInductionSC:
55   case VPWidenCanonicalIVSC:
56   case VPWidenPHISC:
57   case VPBlendSC:
58   case VPWidenSC:
59   case VPWidenGEPSC:
60   case VPReductionSC:
61   case VPWidenSelectSC: {
62     const Instruction *I =
63         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
64     (void)I;
65     assert((!I || !I->mayWriteToMemory()) &&
66            "underlying instruction may write to memory");
67     return false;
68   }
69   default:
70     return true;
71   }
72 }
73 
74 bool VPRecipeBase::mayReadFromMemory() const {
75   switch (getVPDefID()) {
76   case VPWidenMemoryInstructionSC: {
77     return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
78   }
79   case VPReplicateSC:
80   case VPWidenCallSC:
81     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
82         ->mayReadFromMemory();
83   case VPBranchOnMaskSC:
84   case VPScalarIVStepsSC:
85     return false;
86   case VPWidenIntOrFpInductionSC:
87   case VPWidenCanonicalIVSC:
88   case VPWidenPHISC:
89   case VPBlendSC:
90   case VPWidenSC:
91   case VPWidenGEPSC:
92   case VPReductionSC:
93   case VPWidenSelectSC: {
94     const Instruction *I =
95         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
96     (void)I;
97     assert((!I || !I->mayReadFromMemory()) &&
98            "underlying instruction may read from memory");
99     return false;
100   }
101   default:
102     return true;
103   }
104 }
105 
106 bool VPRecipeBase::mayHaveSideEffects() const {
107   switch (getVPDefID()) {
108   case VPDerivedIVSC:
109   case VPPredInstPHISC:
110     return false;
111   case VPWidenIntOrFpInductionSC:
112   case VPWidenPointerInductionSC:
113   case VPWidenCanonicalIVSC:
114   case VPWidenPHISC:
115   case VPBlendSC:
116   case VPWidenSC:
117   case VPWidenGEPSC:
118   case VPReductionSC:
119   case VPWidenSelectSC:
120   case VPScalarIVStepsSC: {
121     const Instruction *I =
122         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
123     (void)I;
124     assert((!I || !I->mayHaveSideEffects()) &&
125            "underlying instruction has side-effects");
126     return false;
127   }
128   case VPReplicateSC: {
129     auto *R = cast<VPReplicateRecipe>(this);
130     return R->getUnderlyingInstr()->mayHaveSideEffects();
131   }
132   default:
133     return true;
134   }
135 }
136 
137 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
138   auto Lane = VPLane::getLastLaneForVF(State.VF);
139   VPValue *ExitValue = getOperand(0);
140   if (vputils::isUniformAfterVectorization(ExitValue))
141     Lane = VPLane::getFirstLane();
142   Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
143                    State.Builder.GetInsertBlock());
144 }
145 
146 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
147   assert(!Parent && "Recipe already in some VPBasicBlock");
148   assert(InsertPos->getParent() &&
149          "Insertion position not in any VPBasicBlock");
150   Parent = InsertPos->getParent();
151   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
152 }
153 
154 void VPRecipeBase::insertBefore(VPBasicBlock &BB,
155                                 iplist<VPRecipeBase>::iterator I) {
156   assert(!Parent && "Recipe already in some VPBasicBlock");
157   assert(I == BB.end() || I->getParent() == &BB);
158   Parent = &BB;
159   BB.getRecipeList().insert(I, this);
160 }
161 
162 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
163   assert(!Parent && "Recipe already in some VPBasicBlock");
164   assert(InsertPos->getParent() &&
165          "Insertion position not in any VPBasicBlock");
166   Parent = InsertPos->getParent();
167   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
168 }
169 
170 void VPRecipeBase::removeFromParent() {
171   assert(getParent() && "Recipe not in any VPBasicBlock");
172   getParent()->getRecipeList().remove(getIterator());
173   Parent = nullptr;
174 }
175 
176 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
177   assert(getParent() && "Recipe not in any VPBasicBlock");
178   return getParent()->getRecipeList().erase(getIterator());
179 }
180 
181 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
182   removeFromParent();
183   insertAfter(InsertPos);
184 }
185 
186 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
187                               iplist<VPRecipeBase>::iterator I) {
188   removeFromParent();
189   insertBefore(BB, I);
190 }
191 
192 void VPInstruction::generateInstruction(VPTransformState &State,
193                                         unsigned Part) {
194   IRBuilderBase &Builder = State.Builder;
195   Builder.SetCurrentDebugLocation(DL);
196 
197   if (Instruction::isBinaryOp(getOpcode())) {
198     Value *A = State.get(getOperand(0), Part);
199     Value *B = State.get(getOperand(1), Part);
200     Value *V =
201         Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
202     State.set(this, V, Part);
203     return;
204   }
205 
206   switch (getOpcode()) {
207   case VPInstruction::Not: {
208     Value *A = State.get(getOperand(0), Part);
209     Value *V = Builder.CreateNot(A, Name);
210     State.set(this, V, Part);
211     break;
212   }
213   case VPInstruction::ICmpULE: {
214     Value *IV = State.get(getOperand(0), Part);
215     Value *TC = State.get(getOperand(1), Part);
216     Value *V = Builder.CreateICmpULE(IV, TC, Name);
217     State.set(this, V, Part);
218     break;
219   }
220   case Instruction::Select: {
221     Value *Cond = State.get(getOperand(0), Part);
222     Value *Op1 = State.get(getOperand(1), Part);
223     Value *Op2 = State.get(getOperand(2), Part);
224     Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
225     State.set(this, V, Part);
226     break;
227   }
228   case VPInstruction::ActiveLaneMask: {
229     // Get first lane of vector induction variable.
230     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
231     // Get the original loop tripcount.
232     Value *ScalarTC = State.get(getOperand(1), Part);
233 
234     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
235     auto *PredTy = VectorType::get(Int1Ty, State.VF);
236     Instruction *Call = Builder.CreateIntrinsic(
237         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
238         {VIVElem0, ScalarTC}, nullptr, Name);
239     State.set(this, Call, Part);
240     break;
241   }
242   case VPInstruction::FirstOrderRecurrenceSplice: {
243     // Generate code to combine the previous and current values in vector v3.
244     //
245     //   vector.ph:
246     //     v_init = vector(..., ..., ..., a[-1])
247     //     br vector.body
248     //
249     //   vector.body
250     //     i = phi [0, vector.ph], [i+4, vector.body]
251     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
252     //     v2 = a[i, i+1, i+2, i+3];
253     //     v3 = vector(v1(3), v2(0, 1, 2))
254 
255     // For the first part, use the recurrence phi (v1), otherwise v2.
256     auto *V1 = State.get(getOperand(0), 0);
257     Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
258     if (!PartMinus1->getType()->isVectorTy()) {
259       State.set(this, PartMinus1, Part);
260     } else {
261       Value *V2 = State.get(getOperand(1), Part);
262       State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
263                 Part);
264     }
265     break;
266   }
267   case VPInstruction::CanonicalIVIncrement:
268   case VPInstruction::CanonicalIVIncrementNUW: {
269     Value *Next = nullptr;
270     if (Part == 0) {
271       bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
272       auto *Phi = State.get(getOperand(0), 0);
273       // The loop step is equal to the vectorization factor (num of SIMD
274       // elements) times the unroll factor (num of SIMD instructions).
275       Value *Step =
276           createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
277       Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
278     } else {
279       Next = State.get(this, 0);
280     }
281 
282     State.set(this, Next, Part);
283     break;
284   }
285 
286   case VPInstruction::CanonicalIVIncrementForPart:
287   case VPInstruction::CanonicalIVIncrementForPartNUW: {
288     bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
289     auto *IV = State.get(getOperand(0), VPIteration(0, 0));
290     if (Part == 0) {
291       State.set(this, IV, Part);
292       break;
293     }
294 
295     // The canonical IV is incremented by the vectorization factor (num of SIMD
296     // elements) times the unroll part.
297     Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
298     Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
299     State.set(this, Next, Part);
300     break;
301   }
302   case VPInstruction::BranchOnCond: {
303     if (Part != 0)
304       break;
305 
306     Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
307     VPRegionBlock *ParentRegion = getParent()->getParent();
308     VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
309 
310     // Replace the temporary unreachable terminator with a new conditional
311     // branch, hooking it up to backward destination for exiting blocks now and
312     // to forward destination(s) later when they are created.
313     BranchInst *CondBr =
314         Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
315 
316     if (getParent()->isExiting())
317       CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
318 
319     CondBr->setSuccessor(0, nullptr);
320     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
321     break;
322   }
323   case VPInstruction::BranchOnCount: {
324     if (Part != 0)
325       break;
326     // First create the compare.
327     Value *IV = State.get(getOperand(0), Part);
328     Value *TC = State.get(getOperand(1), Part);
329     Value *Cond = Builder.CreateICmpEQ(IV, TC);
330 
331     // Now create the branch.
332     auto *Plan = getParent()->getPlan();
333     VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
334     VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
335 
336     // Replace the temporary unreachable terminator with a new conditional
337     // branch, hooking it up to backward destination (the header) now and to the
338     // forward destination (the exit/middle block) later when it is created.
339     // Note that CreateCondBr expects a valid BB as first argument, so we need
340     // to set it to nullptr later.
341     BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
342                                               State.CFG.VPBB2IRBB[Header]);
343     CondBr->setSuccessor(0, nullptr);
344     Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
345     break;
346   }
347   default:
348     llvm_unreachable("Unsupported opcode for instruction");
349   }
350 }
351 
352 void VPInstruction::execute(VPTransformState &State) {
353   assert(!State.Instance && "VPInstruction executing an Instance");
354   IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
355   State.Builder.setFastMathFlags(FMF);
356   for (unsigned Part = 0; Part < State.UF; ++Part)
357     generateInstruction(State, Part);
358 }
359 
360 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
361 void VPInstruction::dump() const {
362   VPSlotTracker SlotTracker(getParent()->getPlan());
363   print(dbgs(), "", SlotTracker);
364 }
365 
366 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
367                           VPSlotTracker &SlotTracker) const {
368   O << Indent << "EMIT ";
369 
370   if (hasResult()) {
371     printAsOperand(O, SlotTracker);
372     O << " = ";
373   }
374 
375   switch (getOpcode()) {
376   case VPInstruction::Not:
377     O << "not";
378     break;
379   case VPInstruction::ICmpULE:
380     O << "icmp ule";
381     break;
382   case VPInstruction::SLPLoad:
383     O << "combined load";
384     break;
385   case VPInstruction::SLPStore:
386     O << "combined store";
387     break;
388   case VPInstruction::ActiveLaneMask:
389     O << "active lane mask";
390     break;
391   case VPInstruction::FirstOrderRecurrenceSplice:
392     O << "first-order splice";
393     break;
394   case VPInstruction::CanonicalIVIncrement:
395     O << "VF * UF + ";
396     break;
397   case VPInstruction::CanonicalIVIncrementNUW:
398     O << "VF * UF +(nuw) ";
399     break;
400   case VPInstruction::BranchOnCond:
401     O << "branch-on-cond";
402     break;
403   case VPInstruction::CanonicalIVIncrementForPart:
404     O << "VF * Part + ";
405     break;
406   case VPInstruction::CanonicalIVIncrementForPartNUW:
407     O << "VF * Part +(nuw) ";
408     break;
409   case VPInstruction::BranchOnCount:
410     O << "branch-on-count ";
411     break;
412   default:
413     O << Instruction::getOpcodeName(getOpcode());
414   }
415 
416   O << FMF;
417 
418   for (const VPValue *Operand : operands()) {
419     O << " ";
420     Operand->printAsOperand(O, SlotTracker);
421   }
422 
423   if (DL) {
424     O << ", !dbg ";
425     DL.print(O);
426   }
427 }
428 #endif
429 
430 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
431   // Make sure the VPInstruction is a floating-point operation.
432   assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
433           Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
434           Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
435           Opcode == Instruction::FCmp) &&
436          "this op can't take fast-math flags");
437   FMF = FMFNew;
438 }
439 
440 void VPWidenCallRecipe::execute(VPTransformState &State) {
441   auto &CI = *cast<CallInst>(getUnderlyingInstr());
442   assert(!isa<DbgInfoIntrinsic>(CI) &&
443          "DbgInfoIntrinsic should have been dropped during VPlan construction");
444   State.setDebugLocFromInst(&CI);
445 
446   SmallVector<Type *, 4> Tys;
447   for (Value *ArgOperand : CI.args())
448     Tys.push_back(
449         ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue()));
450 
451   for (unsigned Part = 0; Part < State.UF; ++Part) {
452     SmallVector<Type *, 2> TysForDecl = {CI.getType()};
453     SmallVector<Value *, 4> Args;
454     for (const auto &I : enumerate(operands())) {
455       // Some intrinsics have a scalar argument - don't replace it with a
456       // vector.
457       Value *Arg;
458       if (VectorIntrinsicID == Intrinsic::not_intrinsic ||
459           !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
460         Arg = State.get(I.value(), Part);
461       else
462         Arg = State.get(I.value(), VPIteration(0, 0));
463       if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index()))
464         TysForDecl.push_back(Arg->getType());
465       Args.push_back(Arg);
466     }
467 
468     Function *VectorF;
469     if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
470       // Use vector version of the intrinsic.
471       if (State.VF.isVector())
472         TysForDecl[0] =
473             VectorType::get(CI.getType()->getScalarType(), State.VF);
474       Module *M = State.Builder.GetInsertBlock()->getModule();
475       VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
476       assert(VectorF && "Can't retrieve vector intrinsic.");
477     } else {
478       // Use vector version of the function call.
479       const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/);
480 #ifndef NDEBUG
481       assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr &&
482              "Can't create vector function.");
483 #endif
484       VectorF = VFDatabase(CI).getVectorizedFunction(Shape);
485     }
486     SmallVector<OperandBundleDef, 1> OpBundles;
487     CI.getOperandBundlesAsDefs(OpBundles);
488     CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
489 
490     if (isa<FPMathOperator>(V))
491       V->copyFastMathFlags(&CI);
492 
493     State.set(this, V, Part);
494     State.addMetadata(V, &CI);
495   }
496 }
497 
498 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
499 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
500                               VPSlotTracker &SlotTracker) const {
501   O << Indent << "WIDEN-CALL ";
502 
503   auto *CI = cast<CallInst>(getUnderlyingInstr());
504   if (CI->getType()->isVoidTy())
505     O << "void ";
506   else {
507     printAsOperand(O, SlotTracker);
508     O << " = ";
509   }
510 
511   O << "call @" << CI->getCalledFunction()->getName() << "(";
512   printOperands(O, SlotTracker);
513   O << ")";
514 
515   if (VectorIntrinsicID)
516     O << " (using vector intrinsic)";
517   else
518     O << " (using library function)";
519 }
520 
521 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
522                                 VPSlotTracker &SlotTracker) const {
523   O << Indent << "WIDEN-SELECT ";
524   printAsOperand(O, SlotTracker);
525   O << " = select ";
526   getOperand(0)->printAsOperand(O, SlotTracker);
527   O << ", ";
528   getOperand(1)->printAsOperand(O, SlotTracker);
529   O << ", ";
530   getOperand(2)->printAsOperand(O, SlotTracker);
531   O << (InvariantCond ? " (condition is loop invariant)" : "");
532 }
533 #endif
534 
535 void VPWidenSelectRecipe::execute(VPTransformState &State) {
536   auto &I = *cast<SelectInst>(getUnderlyingInstr());
537   State.setDebugLocFromInst(&I);
538 
539   // The condition can be loop invariant but still defined inside the
540   // loop. This means that we can't just use the original 'cond' value.
541   // We have to take the 'vectorized' value and pick the first lane.
542   // Instcombine will make this a no-op.
543   auto *InvarCond =
544       InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
545 
546   for (unsigned Part = 0; Part < State.UF; ++Part) {
547     Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
548     Value *Op0 = State.get(getOperand(1), Part);
549     Value *Op1 = State.get(getOperand(2), Part);
550     Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
551     State.set(this, Sel, Part);
552     State.addMetadata(Sel, &I);
553   }
554 }
555 
556 void VPWidenRecipe::execute(VPTransformState &State) {
557   auto &I = *cast<Instruction>(getUnderlyingValue());
558   auto &Builder = State.Builder;
559   switch (I.getOpcode()) {
560   case Instruction::Call:
561   case Instruction::Br:
562   case Instruction::PHI:
563   case Instruction::GetElementPtr:
564   case Instruction::Select:
565     llvm_unreachable("This instruction is handled by a different recipe.");
566   case Instruction::UDiv:
567   case Instruction::SDiv:
568   case Instruction::SRem:
569   case Instruction::URem:
570   case Instruction::Add:
571   case Instruction::FAdd:
572   case Instruction::Sub:
573   case Instruction::FSub:
574   case Instruction::FNeg:
575   case Instruction::Mul:
576   case Instruction::FMul:
577   case Instruction::FDiv:
578   case Instruction::FRem:
579   case Instruction::Shl:
580   case Instruction::LShr:
581   case Instruction::AShr:
582   case Instruction::And:
583   case Instruction::Or:
584   case Instruction::Xor: {
585     // Just widen unops and binops.
586     State.setDebugLocFromInst(&I);
587 
588     for (unsigned Part = 0; Part < State.UF; ++Part) {
589       SmallVector<Value *, 2> Ops;
590       for (VPValue *VPOp : operands())
591         Ops.push_back(State.get(VPOp, Part));
592 
593       Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
594 
595       if (auto *VecOp = dyn_cast<Instruction>(V)) {
596         VecOp->copyIRFlags(&I);
597 
598         // If the instruction is vectorized and was in a basic block that needed
599         // predication, we can't propagate poison-generating flags (nuw/nsw,
600         // exact, etc.). The control flow has been linearized and the
601         // instruction is no longer guarded by the predicate, which could make
602         // the flag properties to no longer hold.
603         if (State.MayGeneratePoisonRecipes.contains(this))
604           VecOp->dropPoisonGeneratingFlags();
605       }
606 
607       // Use this vector value for all users of the original instruction.
608       State.set(this, V, Part);
609       State.addMetadata(V, &I);
610     }
611 
612     break;
613   }
614   case Instruction::Freeze: {
615     State.setDebugLocFromInst(&I);
616 
617     for (unsigned Part = 0; Part < State.UF; ++Part) {
618       Value *Op = State.get(getOperand(0), Part);
619 
620       Value *Freeze = Builder.CreateFreeze(Op);
621       State.set(this, Freeze, Part);
622     }
623     break;
624   }
625   case Instruction::ICmp:
626   case Instruction::FCmp: {
627     // Widen compares. Generate vector compares.
628     bool FCmp = (I.getOpcode() == Instruction::FCmp);
629     auto *Cmp = cast<CmpInst>(&I);
630     State.setDebugLocFromInst(Cmp);
631     for (unsigned Part = 0; Part < State.UF; ++Part) {
632       Value *A = State.get(getOperand(0), Part);
633       Value *B = State.get(getOperand(1), Part);
634       Value *C = nullptr;
635       if (FCmp) {
636         // Propagate fast math flags.
637         IRBuilder<>::FastMathFlagGuard FMFG(Builder);
638         Builder.setFastMathFlags(Cmp->getFastMathFlags());
639         C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
640       } else {
641         C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
642       }
643       State.set(this, C, Part);
644       State.addMetadata(C, &I);
645     }
646 
647     break;
648   }
649 
650   case Instruction::ZExt:
651   case Instruction::SExt:
652   case Instruction::FPToUI:
653   case Instruction::FPToSI:
654   case Instruction::FPExt:
655   case Instruction::PtrToInt:
656   case Instruction::IntToPtr:
657   case Instruction::SIToFP:
658   case Instruction::UIToFP:
659   case Instruction::Trunc:
660   case Instruction::FPTrunc:
661   case Instruction::BitCast: {
662     auto *CI = cast<CastInst>(&I);
663     State.setDebugLocFromInst(CI);
664 
665     /// Vectorize casts.
666     Type *DestTy = (State.VF.isScalar())
667                        ? CI->getType()
668                        : VectorType::get(CI->getType(), State.VF);
669 
670     for (unsigned Part = 0; Part < State.UF; ++Part) {
671       Value *A = State.get(getOperand(0), Part);
672       Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
673       State.set(this, Cast, Part);
674       State.addMetadata(Cast, &I);
675     }
676     break;
677   }
678   default:
679     // This instruction is not vectorized by simple widening.
680     LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
681     llvm_unreachable("Unhandled instruction!");
682   } // end of switch.
683 }
684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
686                           VPSlotTracker &SlotTracker) const {
687   O << Indent << "WIDEN ";
688   printAsOperand(O, SlotTracker);
689   const Instruction *UI = getUnderlyingInstr();
690   O << " = " << UI->getOpcodeName() << " ";
691   if (auto *Cmp = dyn_cast<CmpInst>(UI))
692     O << CmpInst::getPredicateName(Cmp->getPredicate()) << " ";
693   printOperands(O, SlotTracker);
694 }
695 
696 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
697                                           VPSlotTracker &SlotTracker) const {
698   O << Indent << "WIDEN-INDUCTION";
699   if (getTruncInst()) {
700     O << "\\l\"";
701     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
702     O << " +\n" << Indent << "\"  ";
703     getVPValue(0)->printAsOperand(O, SlotTracker);
704   } else
705     O << " " << VPlanIngredient(IV);
706 
707   O << ", ";
708   getStepValue()->printAsOperand(O, SlotTracker);
709 }
710 #endif
711 
712 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
713   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
714   auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
715   return StartC && StartC->isZero() && StepC && StepC->isOne();
716 }
717 
718 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
719 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
720                               VPSlotTracker &SlotTracker) const {
721   O << Indent;
722   printAsOperand(O, SlotTracker);
723   O << Indent << "= DERIVED-IV ";
724   getStartValue()->printAsOperand(O, SlotTracker);
725   O << " + ";
726   getCanonicalIV()->printAsOperand(O, SlotTracker);
727   O << " * ";
728   getStepValue()->printAsOperand(O, SlotTracker);
729 
730   if (IndDesc.getStep()->getType() != ResultTy)
731     O << " (truncated to " << *ResultTy << ")";
732 }
733 #endif
734 
735 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
736 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
737                                   VPSlotTracker &SlotTracker) const {
738   O << Indent;
739   printAsOperand(O, SlotTracker);
740   O << Indent << "= SCALAR-STEPS ";
741   printOperands(O, SlotTracker);
742 }
743 #endif
744 
745 void VPWidenGEPRecipe::execute(VPTransformState &State) {
746   auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
747   // Construct a vector GEP by widening the operands of the scalar GEP as
748   // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
749   // results in a vector of pointers when at least one operand of the GEP
750   // is vector-typed. Thus, to keep the representation compact, we only use
751   // vector-typed operands for loop-varying values.
752 
753   if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
754     // If we are vectorizing, but the GEP has only loop-invariant operands,
755     // the GEP we build (by only using vector-typed operands for
756     // loop-varying values) would be a scalar pointer. Thus, to ensure we
757     // produce a vector of pointers, we need to either arbitrarily pick an
758     // operand to broadcast, or broadcast a clone of the original GEP.
759     // Here, we broadcast a clone of the original.
760     //
761     // TODO: If at some point we decide to scalarize instructions having
762     //       loop-invariant operands, this special case will no longer be
763     //       required. We would add the scalarization decision to
764     //       collectLoopScalars() and teach getVectorValue() to broadcast
765     //       the lane-zero scalar value.
766     auto *Clone = State.Builder.Insert(GEP->clone());
767     for (unsigned Part = 0; Part < State.UF; ++Part) {
768       Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
769       State.set(this, EntryPart, Part);
770       State.addMetadata(EntryPart, GEP);
771     }
772   } else {
773     // If the GEP has at least one loop-varying operand, we are sure to
774     // produce a vector of pointers. But if we are only unrolling, we want
775     // to produce a scalar GEP for each unroll part. Thus, the GEP we
776     // produce with the code below will be scalar (if VF == 1) or vector
777     // (otherwise). Note that for the unroll-only case, we still maintain
778     // values in the vector mapping with initVector, as we do for other
779     // instructions.
780     for (unsigned Part = 0; Part < State.UF; ++Part) {
781       // The pointer operand of the new GEP. If it's loop-invariant, we
782       // won't broadcast it.
783       auto *Ptr = IsPtrLoopInvariant
784                       ? State.get(getOperand(0), VPIteration(0, 0))
785                       : State.get(getOperand(0), Part);
786 
787       // Collect all the indices for the new GEP. If any index is
788       // loop-invariant, we won't broadcast it.
789       SmallVector<Value *, 4> Indices;
790       for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
791         VPValue *Operand = getOperand(I);
792         if (IsIndexLoopInvariant[I - 1])
793           Indices.push_back(State.get(Operand, VPIteration(0, 0)));
794         else
795           Indices.push_back(State.get(Operand, Part));
796       }
797 
798       // If the GEP instruction is vectorized and was in a basic block that
799       // needed predication, we can't propagate the poison-generating 'inbounds'
800       // flag. The control flow has been linearized and the GEP is no longer
801       // guarded by the predicate, which could make the 'inbounds' properties to
802       // no longer hold.
803       bool IsInBounds =
804           GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
805 
806       // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
807       // but it should be a vector, otherwise.
808       auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
809                                              Indices, "", IsInBounds);
810       assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
811              "NewGEP is not a pointer vector");
812       State.set(this, NewGEP, Part);
813       State.addMetadata(NewGEP, GEP);
814     }
815   }
816 }
817 
818 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
819 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
820                              VPSlotTracker &SlotTracker) const {
821   O << Indent << "WIDEN-GEP ";
822   O << (IsPtrLoopInvariant ? "Inv" : "Var");
823   size_t IndicesNumber = IsIndexLoopInvariant.size();
824   for (size_t I = 0; I < IndicesNumber; ++I)
825     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
826 
827   O << " ";
828   printAsOperand(O, SlotTracker);
829   O << " = getelementptr ";
830   printOperands(O, SlotTracker);
831 }
832 #endif
833 
834 void VPBlendRecipe::execute(VPTransformState &State) {
835   State.setDebugLocFromInst(Phi);
836   // We know that all PHIs in non-header blocks are converted into
837   // selects, so we don't have to worry about the insertion order and we
838   // can just use the builder.
839   // At this point we generate the predication tree. There may be
840   // duplications since this is a simple recursive scan, but future
841   // optimizations will clean it up.
842 
843   unsigned NumIncoming = getNumIncomingValues();
844 
845   // Generate a sequence of selects of the form:
846   // SELECT(Mask3, In3,
847   //        SELECT(Mask2, In2,
848   //               SELECT(Mask1, In1,
849   //                      In0)))
850   // Note that Mask0 is never used: lanes for which no path reaches this phi and
851   // are essentially undef are taken from In0.
852  VectorParts Entry(State.UF);
853   for (unsigned In = 0; In < NumIncoming; ++In) {
854     for (unsigned Part = 0; Part < State.UF; ++Part) {
855       // We might have single edge PHIs (blocks) - use an identity
856       // 'select' for the first PHI operand.
857       Value *In0 = State.get(getIncomingValue(In), Part);
858       if (In == 0)
859         Entry[Part] = In0; // Initialize with the first incoming value.
860       else {
861         // Select between the current value and the previous incoming edge
862         // based on the incoming mask.
863         Value *Cond = State.get(getMask(In), Part);
864         Entry[Part] =
865             State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
866       }
867     }
868   }
869   for (unsigned Part = 0; Part < State.UF; ++Part)
870     State.set(this, Entry[Part], Part);
871 }
872 
873 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
874 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
875                           VPSlotTracker &SlotTracker) const {
876   O << Indent << "BLEND ";
877   Phi->printAsOperand(O, false);
878   O << " =";
879   if (getNumIncomingValues() == 1) {
880     // Not a User of any mask: not really blending, this is a
881     // single-predecessor phi.
882     O << " ";
883     getIncomingValue(0)->printAsOperand(O, SlotTracker);
884   } else {
885     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
886       O << " ";
887       getIncomingValue(I)->printAsOperand(O, SlotTracker);
888       O << "/";
889       getMask(I)->printAsOperand(O, SlotTracker);
890     }
891   }
892 }
893 
894 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
895                               VPSlotTracker &SlotTracker) const {
896   O << Indent << "REDUCE ";
897   printAsOperand(O, SlotTracker);
898   O << " = ";
899   getChainOp()->printAsOperand(O, SlotTracker);
900   O << " +";
901   if (isa<FPMathOperator>(getUnderlyingInstr()))
902     O << getUnderlyingInstr()->getFastMathFlags();
903   O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
904   getVecOp()->printAsOperand(O, SlotTracker);
905   if (getCondOp()) {
906     O << ", ";
907     getCondOp()->printAsOperand(O, SlotTracker);
908   }
909   O << ")";
910   if (RdxDesc->IntermediateStore)
911     O << " (with final reduction value stored in invariant address sank "
912          "outside of loop)";
913 }
914 
915 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
916                               VPSlotTracker &SlotTracker) const {
917   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
918 
919   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
920     printAsOperand(O, SlotTracker);
921     O << " = ";
922   }
923   if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
924     O << "call @" << CB->getCalledFunction()->getName() << "(";
925     interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
926                     O, [&O, &SlotTracker](VPValue *Op) {
927                       Op->printAsOperand(O, SlotTracker);
928                     });
929     O << ")";
930   } else {
931     O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
932     printOperands(O, SlotTracker);
933   }
934 
935   if (AlsoPack)
936     O << " (S->V)";
937 }
938 #endif
939 
940 void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
941   assert(State.Instance && "Branch on Mask works only on single instance.");
942 
943   unsigned Part = State.Instance->Part;
944   unsigned Lane = State.Instance->Lane.getKnownLane();
945 
946   Value *ConditionBit = nullptr;
947   VPValue *BlockInMask = getMask();
948   if (BlockInMask) {
949     ConditionBit = State.get(BlockInMask, Part);
950     if (ConditionBit->getType()->isVectorTy())
951       ConditionBit = State.Builder.CreateExtractElement(
952           ConditionBit, State.Builder.getInt32(Lane));
953   } else // Block in mask is all-one.
954     ConditionBit = State.Builder.getTrue();
955 
956   // Replace the temporary unreachable terminator with a new conditional branch,
957   // whose two destinations will be set later when they are created.
958   auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
959   assert(isa<UnreachableInst>(CurrentTerminator) &&
960          "Expected to replace unreachable terminator with conditional branch.");
961   auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
962   CondBr->setSuccessor(0, nullptr);
963   ReplaceInstWithInst(CurrentTerminator, CondBr);
964 }
965 
966 void VPPredInstPHIRecipe::execute(VPTransformState &State) {
967   assert(State.Instance && "Predicated instruction PHI works per instance.");
968   Instruction *ScalarPredInst =
969       cast<Instruction>(State.get(getOperand(0), *State.Instance));
970   BasicBlock *PredicatedBB = ScalarPredInst->getParent();
971   BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
972   assert(PredicatingBB && "Predicated block has no single predecessor.");
973   assert(isa<VPReplicateRecipe>(getOperand(0)) &&
974          "operand must be VPReplicateRecipe");
975 
976   // By current pack/unpack logic we need to generate only a single phi node: if
977   // a vector value for the predicated instruction exists at this point it means
978   // the instruction has vector users only, and a phi for the vector value is
979   // needed. In this case the recipe of the predicated instruction is marked to
980   // also do that packing, thereby "hoisting" the insert-element sequence.
981   // Otherwise, a phi node for the scalar value is needed.
982   unsigned Part = State.Instance->Part;
983   if (State.hasVectorValue(getOperand(0), Part)) {
984     Value *VectorValue = State.get(getOperand(0), Part);
985     InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
986     PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
987     VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
988     VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
989     if (State.hasVectorValue(this, Part))
990       State.reset(this, VPhi, Part);
991     else
992       State.set(this, VPhi, Part);
993     // NOTE: Currently we need to update the value of the operand, so the next
994     // predicated iteration inserts its generated value in the correct vector.
995     State.reset(getOperand(0), VPhi, Part);
996   } else {
997     Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType();
998     PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2);
999     Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
1000                      PredicatingBB);
1001     Phi->addIncoming(ScalarPredInst, PredicatedBB);
1002     if (State.hasScalarValue(this, *State.Instance))
1003       State.reset(this, Phi, *State.Instance);
1004     else
1005       State.set(this, Phi, *State.Instance);
1006     // NOTE: Currently we need to update the value of the operand, so the next
1007     // predicated iteration inserts its generated value in the correct vector.
1008     State.reset(getOperand(0), Phi, *State.Instance);
1009   }
1010 }
1011 
1012 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1013 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1014                                 VPSlotTracker &SlotTracker) const {
1015   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1016   printAsOperand(O, SlotTracker);
1017   O << " = ";
1018   printOperands(O, SlotTracker);
1019 }
1020 
1021 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
1022                                            VPSlotTracker &SlotTracker) const {
1023   O << Indent << "WIDEN ";
1024 
1025   if (!isStore()) {
1026     getVPSingleValue()->printAsOperand(O, SlotTracker);
1027     O << " = ";
1028   }
1029   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
1030 
1031   printOperands(O, SlotTracker);
1032 }
1033 #endif
1034 
1035 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1036   Value *Start = getStartValue()->getLiveInIRValue();
1037   PHINode *EntryPart = PHINode::Create(
1038       Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
1039 
1040   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1041   EntryPart->addIncoming(Start, VectorPH);
1042   EntryPart->setDebugLoc(DL);
1043   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1044     State.set(this, EntryPart, Part);
1045 }
1046 
1047 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1048 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1049                                    VPSlotTracker &SlotTracker) const {
1050   O << Indent << "EMIT ";
1051   printAsOperand(O, SlotTracker);
1052   O << " = CANONICAL-INDUCTION";
1053 }
1054 #endif
1055 
1056 bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID,
1057                                          Type *Ty) const {
1058   if (Ty != getScalarType())
1059     return false;
1060   // The start value of ID must match the start value of this canonical
1061   // induction.
1062   if (getStartValue()->getLiveInIRValue() != ID.getStartValue())
1063     return false;
1064 
1065   ConstantInt *Step = ID.getConstIntStepValue();
1066   // ID must also be incremented by one. IK_IntInduction always increment the
1067   // induction by Step, but the binary op may not be set.
1068   return ID.getKind() == InductionDescriptor::IK_IntInduction && Step &&
1069          Step->isOne();
1070 }
1071 
1072 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
1073   return IsScalarAfterVectorization &&
1074          (!VF.isScalable() || vputils::onlyFirstLaneUsed(this));
1075 }
1076 
1077 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1078 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1079                                           VPSlotTracker &SlotTracker) const {
1080   O << Indent << "EMIT ";
1081   printAsOperand(O, SlotTracker);
1082   O << " = WIDEN-POINTER-INDUCTION ";
1083   getStartValue()->printAsOperand(O, SlotTracker);
1084   O << ", " << *IndDesc.getStep();
1085 }
1086 #endif
1087 
1088 void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1089   assert(!State.Instance && "cannot be used in per-lane");
1090   const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1091   SCEVExpander Exp(SE, DL, "induction");
1092 
1093   Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
1094                                  &*State.Builder.GetInsertPoint());
1095 
1096   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
1097     State.set(this, Res, Part);
1098 }
1099 
1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1102                                VPSlotTracker &SlotTracker) const {
1103   O << Indent << "EMIT ";
1104   getVPSingleValue()->printAsOperand(O, SlotTracker);
1105   O << " = EXPAND SCEV " << *Expr;
1106 }
1107 #endif
1108 
1109 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1110   Value *CanonicalIV = State.get(getOperand(0), 0);
1111   Type *STy = CanonicalIV->getType();
1112   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1113   ElementCount VF = State.VF;
1114   Value *VStart = VF.isScalar()
1115                       ? CanonicalIV
1116                       : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
1117   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1118     Value *VStep = createStepForVF(Builder, STy, VF, Part);
1119     if (VF.isVector()) {
1120       VStep = Builder.CreateVectorSplat(VF, VStep);
1121       VStep =
1122           Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
1123     }
1124     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
1125     State.set(this, CanonicalVectorIV, Part);
1126   }
1127 }
1128 
1129 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1130 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1131                                      VPSlotTracker &SlotTracker) const {
1132   O << Indent << "EMIT ";
1133   printAsOperand(O, SlotTracker);
1134   O << " = WIDEN-CANONICAL-INDUCTION ";
1135   printOperands(O, SlotTracker);
1136 }
1137 #endif
1138 
1139 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1140   auto &Builder = State.Builder;
1141   // Create a vector from the initial value.
1142   auto *VectorInit = getStartValue()->getLiveInIRValue();
1143 
1144   Type *VecTy = State.VF.isScalar()
1145                     ? VectorInit->getType()
1146                     : VectorType::get(VectorInit->getType(), State.VF);
1147 
1148   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1149   if (State.VF.isVector()) {
1150     auto *IdxTy = Builder.getInt32Ty();
1151     auto *One = ConstantInt::get(IdxTy, 1);
1152     IRBuilder<>::InsertPointGuard Guard(Builder);
1153     Builder.SetInsertPoint(VectorPH->getTerminator());
1154     auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
1155     auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
1156     VectorInit = Builder.CreateInsertElement(
1157         PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
1158   }
1159 
1160   // Create a phi node for the new recurrence.
1161   PHINode *EntryPart = PHINode::Create(
1162       VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
1163   EntryPart->addIncoming(VectorInit, VectorPH);
1164   State.set(this, EntryPart, 0);
1165 }
1166 
1167 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1169                                             VPSlotTracker &SlotTracker) const {
1170   O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1171   printAsOperand(O, SlotTracker);
1172   O << " = phi ";
1173   printOperands(O, SlotTracker);
1174 }
1175 #endif
1176 
1177 void VPReductionPHIRecipe::execute(VPTransformState &State) {
1178   PHINode *PN = cast<PHINode>(getUnderlyingValue());
1179   auto &Builder = State.Builder;
1180 
1181   // In order to support recurrences we need to be able to vectorize Phi nodes.
1182   // Phi nodes have cycles, so we need to vectorize them in two stages. This is
1183   // stage #1: We create a new vector PHI node with no incoming edges. We'll use
1184   // this value when we vectorize all of the instructions that use the PHI.
1185   bool ScalarPHI = State.VF.isScalar() || IsInLoop;
1186   Type *VecTy =
1187       ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
1188 
1189   BasicBlock *HeaderBB = State.CFG.PrevBB;
1190   assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1191          "recipe must be in the vector loop header");
1192   unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
1193   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1194     Value *EntryPart =
1195         PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
1196     State.set(this, EntryPart, Part);
1197   }
1198 
1199   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1200 
1201   // Reductions do not have to start at zero. They can start with
1202   // any loop invariant values.
1203   VPValue *StartVPV = getStartValue();
1204   Value *StartV = StartVPV->getLiveInIRValue();
1205 
1206   Value *Iden = nullptr;
1207   RecurKind RK = RdxDesc.getRecurrenceKind();
1208   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
1209       RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
1210     // MinMax reduction have the start value as their identify.
1211     if (ScalarPHI) {
1212       Iden = StartV;
1213     } else {
1214       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1215       Builder.SetInsertPoint(VectorPH->getTerminator());
1216       StartV = Iden =
1217           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
1218     }
1219   } else {
1220     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
1221                                          RdxDesc.getFastMathFlags());
1222 
1223     if (!ScalarPHI) {
1224       Iden = Builder.CreateVectorSplat(State.VF, Iden);
1225       IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1226       Builder.SetInsertPoint(VectorPH->getTerminator());
1227       Constant *Zero = Builder.getInt32(0);
1228       StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
1229     }
1230   }
1231 
1232   for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1233     Value *EntryPart = State.get(this, Part);
1234     // Make sure to add the reduction start value only to the
1235     // first unroll part.
1236     Value *StartVal = (Part == 0) ? StartV : Iden;
1237     cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
1238   }
1239 }
1240 
1241 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1242 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1243                                  VPSlotTracker &SlotTracker) const {
1244   O << Indent << "WIDEN-REDUCTION-PHI ";
1245 
1246   printAsOperand(O, SlotTracker);
1247   O << " = phi ";
1248   printOperands(O, SlotTracker);
1249 }
1250 #endif
1251 
1252 void VPWidenPHIRecipe::execute(VPTransformState &State) {
1253   assert(EnableVPlanNativePath &&
1254          "Non-native vplans are not expected to have VPWidenPHIRecipes.");
1255 
1256   // Currently we enter here in the VPlan-native path for non-induction
1257   // PHIs where all control flow is uniform. We simply widen these PHIs.
1258   // Create a vector phi with no operands - the vector phi operands will be
1259   // set at the end of vector code generation.
1260   VPBasicBlock *Parent = getParent();
1261   VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
1262   unsigned StartIdx = 0;
1263   // For phis in header blocks of loop regions, use the index of the value
1264   // coming from the preheader.
1265   if (LoopRegion->getEntryBasicBlock() == Parent) {
1266     for (unsigned I = 0; I < getNumOperands(); ++I) {
1267       if (getIncomingBlock(I) ==
1268           LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
1269         StartIdx = I;
1270     }
1271   }
1272   Value *Op0 = State.get(getOperand(StartIdx), 0);
1273   Type *VecTy = Op0->getType();
1274   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
1275   State.set(this, VecPhi, 0);
1276 }
1277 
1278 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1279 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1280                              VPSlotTracker &SlotTracker) const {
1281   O << Indent << "WIDEN-PHI ";
1282 
1283   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
1284   // Unless all incoming values are modeled in VPlan  print the original PHI
1285   // directly.
1286   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
1287   // values as VPValues.
1288   if (getNumOperands() != OriginalPhi->getNumOperands()) {
1289     O << VPlanIngredient(OriginalPhi);
1290     return;
1291   }
1292 
1293   printAsOperand(O, SlotTracker);
1294   O << " = phi ";
1295   printOperands(O, SlotTracker);
1296 }
1297 #endif
1298 
1299 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and
1300 // remove VPActiveLaneMaskPHIRecipe.
1301 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
1302   BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
1303   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
1304     Value *StartMask = State.get(getOperand(0), Part);
1305     PHINode *EntryPart =
1306         State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
1307     EntryPart->addIncoming(StartMask, VectorPH);
1308     EntryPart->setDebugLoc(DL);
1309     State.set(this, EntryPart, Part);
1310   }
1311 }
1312 
1313 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1314 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1315                                       VPSlotTracker &SlotTracker) const {
1316   O << Indent << "ACTIVE-LANE-MASK-PHI ";
1317 
1318   printAsOperand(O, SlotTracker);
1319   O << " = phi ";
1320   printOperands(O, SlotTracker);
1321 }
1322 #endif
1323