xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "LoopVectorizationPlanner.h"
21 #include "VPlanCFG.h"
22 #include "VPlanDominatorTree.h"
23 #include "VPlanHelpers.h"
24 #include "VPlanPatternMatch.h"
25 #include "VPlanTransforms.h"
26 #include "VPlanUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/ADT/Twine.h"
32 #include "llvm/Analysis/DomTreeUpdater.h"
33 #include "llvm/Analysis/LoopInfo.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CFG.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/Value.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/GraphWriter.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #include "llvm/Transforms/Utils/LoopVersioning.h"
48 #include <cassert>
49 #include <string>
50 
51 using namespace llvm;
52 using namespace llvm::VPlanPatternMatch;
53 
54 namespace llvm {
55 extern cl::opt<bool> EnableVPlanNativePath;
56 }
57 
58 extern cl::opt<unsigned> ForceTargetInstructionCost;
59 
60 static cl::opt<bool> PrintVPlansInDotFormat(
61     "vplan-print-in-dot-format", cl::Hidden,
62     cl::desc("Use dot format instead of plain text when dumping VPlans"));
63 
64 #define DEBUG_TYPE "loop-vectorize"
65 
66 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
67 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPRecipeBase &R) {
68   const VPBasicBlock *Parent = R.getParent();
69   VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
70   R.print(OS, "", SlotTracker);
71   return OS;
72 }
73 #endif
74 
75 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
76                                 const ElementCount &VF) const {
77   switch (LaneKind) {
78   case VPLane::Kind::ScalableLast:
79     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
80     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
81                              Builder.getInt32(VF.getKnownMinValue() - Lane));
82   case VPLane::Kind::First:
83     return Builder.getInt32(Lane);
84   }
85   llvm_unreachable("Unknown lane kind");
86 }
87 
88 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
89     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
90   if (Def)
91     Def->addDefinedValue(this);
92 }
93 
94 VPValue::~VPValue() {
95   assert(Users.empty() && "trying to delete a VPValue with remaining users");
96   if (Def)
97     Def->removeDefinedValue(this);
98 }
99 
100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
101 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
102   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
103     R->print(OS, "", SlotTracker);
104   else
105     printAsOperand(OS, SlotTracker);
106 }
107 
108 void VPValue::dump() const {
109   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
110   VPSlotTracker SlotTracker(
111       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
112   print(dbgs(), SlotTracker);
113   dbgs() << "\n";
114 }
115 
116 void VPDef::dump() const {
117   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
118   VPSlotTracker SlotTracker(
119       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
120   print(dbgs(), "", SlotTracker);
121   dbgs() << "\n";
122 }
123 #endif
124 
125 VPRecipeBase *VPValue::getDefiningRecipe() {
126   return cast_or_null<VPRecipeBase>(Def);
127 }
128 
129 const VPRecipeBase *VPValue::getDefiningRecipe() const {
130   return cast_or_null<VPRecipeBase>(Def);
131 }
132 
133 // Get the top-most entry block of \p Start. This is the entry block of the
134 // containing VPlan. This function is templated to support both const and non-const blocks
135 template <typename T> static T *getPlanEntry(T *Start) {
136   T *Next = Start;
137   T *Current = Start;
138   while ((Next = Next->getParent()))
139     Current = Next;
140 
141   SmallSetVector<T *, 8> WorkList;
142   WorkList.insert(Current);
143 
144   for (unsigned i = 0; i < WorkList.size(); i++) {
145     T *Current = WorkList[i];
146     if (Current->getNumPredecessors() == 0)
147       return Current;
148     auto &Predecessors = Current->getPredecessors();
149     WorkList.insert_range(Predecessors);
150   }
151 
152   llvm_unreachable("VPlan without any entry node without predecessors");
153 }
154 
155 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
156 
157 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
158 
159 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
160 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
161   const VPBlockBase *Block = this;
162   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
163     Block = Region->getEntry();
164   return cast<VPBasicBlock>(Block);
165 }
166 
167 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
168   VPBlockBase *Block = this;
169   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
170     Block = Region->getEntry();
171   return cast<VPBasicBlock>(Block);
172 }
173 
174 void VPBlockBase::setPlan(VPlan *ParentPlan) {
175   assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
176   Plan = ParentPlan;
177 }
178 
179 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
180 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
181   const VPBlockBase *Block = this;
182   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
183     Block = Region->getExiting();
184   return cast<VPBasicBlock>(Block);
185 }
186 
187 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
188   VPBlockBase *Block = this;
189   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
190     Block = Region->getExiting();
191   return cast<VPBasicBlock>(Block);
192 }
193 
194 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
195   if (!Successors.empty() || !Parent)
196     return this;
197   assert(Parent->getExiting() == this &&
198          "Block w/o successors not the exiting block of its parent.");
199   return Parent->getEnclosingBlockWithSuccessors();
200 }
201 
202 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
203   if (!Predecessors.empty() || !Parent)
204     return this;
205   assert(Parent->getEntry() == this &&
206          "Block w/o predecessors not the entry of its parent.");
207   return Parent->getEnclosingBlockWithPredecessors();
208 }
209 
210 bool VPBlockUtils::isHeader(const VPBlockBase *VPB,
211                             const VPDominatorTree &VPDT) {
212   auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
213   if (!VPBB)
214     return false;
215 
216   // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
217   // VPBB as its entry, i.e., free of predecessors.
218   if (auto *R = VPBB->getParent())
219     return !R->isReplicator() && VPBB->getNumPredecessors() == 0;
220 
221   // A header dominates its second predecessor (the latch), with the other
222   // predecessor being the preheader
223   return VPB->getPredecessors().size() == 2 &&
224          VPDT.dominates(VPB, VPB->getPredecessors()[1]);
225 }
226 
227 bool VPBlockUtils::isLatch(const VPBlockBase *VPB,
228                            const VPDominatorTree &VPDT) {
229   // A latch has a header as its second successor, with its other successor
230   // leaving the loop. A preheader OTOH has a header as its first (and only)
231   // successor.
232   return VPB->getNumSuccessors() == 2 &&
233          VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT);
234 }
235 
236 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
237   iterator It = begin();
238   while (It != end() && It->isPhi())
239     It++;
240   return It;
241 }
242 
243 VPTransformState::VPTransformState(const TargetTransformInfo *TTI,
244                                    ElementCount VF, LoopInfo *LI,
245                                    DominatorTree *DT, AssumptionCache *AC,
246                                    IRBuilderBase &Builder, VPlan *Plan,
247                                    Loop *CurrentParentLoop, Type *CanonicalIVTy)
248     : TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan),
249       CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy),
250       VPDT(*Plan) {}
251 
252 Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) {
253   if (Def->isLiveIn())
254     return Def->getLiveInIRValue();
255 
256   if (hasScalarValue(Def, Lane))
257     return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
258 
259   if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
260       hasScalarValue(Def, VPLane::getFirstLane())) {
261     return Data.VPV2Scalars[Def][0];
262   }
263 
264   // Look through BuildVector to avoid redundant extracts.
265   // TODO: Remove once replicate regions are unrolled explicitly.
266   if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
267     auto *BuildVector = cast<VPInstruction>(Def);
268     return get(BuildVector->getOperand(Lane.getKnownLane()), true);
269   }
270 
271   assert(hasVectorValue(Def));
272   auto *VecPart = Data.VPV2Vector[Def];
273   if (!VecPart->getType()->isVectorTy()) {
274     assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
275     return VecPart;
276   }
277   // TODO: Cache created scalar values.
278   Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
279   auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
280   // set(Def, Extract, Instance);
281   return Extract;
282 }
283 
284 Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
285   if (NeedsScalar) {
286     assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
287             !vputils::onlyFirstLaneUsed(Def) ||
288             (hasScalarValue(Def, VPLane(0)) &&
289              Data.VPV2Scalars[Def].size() == 1)) &&
290            "Trying to access a single scalar per part but has multiple scalars "
291            "per part.");
292     return get(Def, VPLane(0));
293   }
294 
295   // If Values have been set for this Def return the one relevant for \p Part.
296   if (hasVectorValue(Def))
297     return Data.VPV2Vector[Def];
298 
299   auto GetBroadcastInstrs = [this, Def](Value *V) {
300     bool SafeToHoist =
301         !Def->hasDefiningRecipe() ||
302         VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(),
303                                Plan->getVectorPreheader());
304 
305     if (VF.isScalar())
306       return V;
307     // Place the code for broadcasting invariant variables in the new preheader.
308     IRBuilder<>::InsertPointGuard Guard(Builder);
309     if (SafeToHoist) {
310       BasicBlock *LoopVectorPreHeader =
311           CFG.VPBB2IRBB[Plan->getVectorPreheader()];
312       if (LoopVectorPreHeader)
313         Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
314     }
315 
316     // Place the code for broadcasting invariant variables in the new preheader.
317     // Broadcast the scalar into all locations in the vector.
318     Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
319 
320     return Shuf;
321   };
322 
323   if (!hasScalarValue(Def, {0})) {
324     assert(Def->isLiveIn() && "expected a live-in");
325     Value *IRV = Def->getLiveInIRValue();
326     Value *B = GetBroadcastInstrs(IRV);
327     set(Def, B);
328     return B;
329   }
330 
331   Value *ScalarValue = get(Def, VPLane(0));
332   // If we aren't vectorizing, we can just copy the scalar map values over
333   // to the vector map.
334   if (VF.isScalar()) {
335     set(Def, ScalarValue);
336     return ScalarValue;
337   }
338 
339   bool IsSingleScalar = vputils::isSingleScalar(Def);
340 
341   VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
342   // Check if there is a scalar value for the selected lane.
343   if (!hasScalarValue(Def, LastLane)) {
344     // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
345     // VPExpandSCEVRecipes can also be a single scalar.
346     assert((isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe,
347                 VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
348            "unexpected recipe found to be invariant");
349     IsSingleScalar = true;
350     LastLane = 0;
351   }
352 
353   auto *LastInst = cast<Instruction>(get(Def, LastLane));
354   // Set the insert point after the last scalarized instruction or after the
355   // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
356   // will directly follow the scalar definitions.
357   auto OldIP = Builder.saveIP();
358   auto NewIP = isa<PHINode>(LastInst)
359                    ? LastInst->getParent()->getFirstNonPHIIt()
360                    : std::next(BasicBlock::iterator(LastInst));
361   Builder.SetInsertPoint(&*NewIP);
362 
363   // However, if we are vectorizing, we need to construct the vector values.
364   // If the value is known to be uniform after vectorization, we can just
365   // broadcast the scalar value corresponding to lane zero. Otherwise, we
366   // construct the vector values using insertelement instructions. Since the
367   // resulting vectors are stored in State, we will only generate the
368   // insertelements once.
369   Value *VectorValue = nullptr;
370   if (IsSingleScalar) {
371     VectorValue = GetBroadcastInstrs(ScalarValue);
372     set(Def, VectorValue);
373   } else {
374     assert(!VF.isScalable() && "VF is assumed to be non scalable.");
375     // Initialize packing with insertelements to start from poison.
376     VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
377     for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
378       VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane);
379     set(Def, VectorValue);
380   }
381   Builder.restoreIP(OldIP);
382   return VectorValue;
383 }
384 
385 void VPTransformState::setDebugLocFrom(DebugLoc DL) {
386   const DILocation *DIL = DL;
387   // When a FSDiscriminator is enabled, we don't need to add the multiply
388   // factors to the discriminators.
389   if (DIL &&
390       Builder.GetInsertBlock()
391           ->getParent()
392           ->shouldEmitDebugInfoForProfiling() &&
393       !EnableFSDiscriminator) {
394     // FIXME: For scalable vectors, assume vscale=1.
395     unsigned UF = Plan->getUF();
396     auto NewDIL =
397         DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
398     if (NewDIL)
399       Builder.SetCurrentDebugLocation(*NewDIL);
400     else
401       LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
402                         << DIL->getFilename() << " Line: " << DIL->getLine());
403   } else
404     Builder.SetCurrentDebugLocation(DL);
405 }
406 
407 Value *VPTransformState::packScalarIntoVectorizedValue(const VPValue *Def,
408                                                        Value *WideValue,
409                                                        const VPLane &Lane) {
410   Value *ScalarInst = get(Def, Lane);
411   Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
412   if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
413     // We must handle each element of a vectorized struct type.
414     for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
415       Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
416       Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
417       VectorValue =
418           Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
419       WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
420     }
421   } else {
422     WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
423   }
424   return WideValue;
425 }
426 
427 BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
428   auto &CFG = State.CFG;
429   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
430   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
431   BasicBlock *PrevBB = CFG.PrevBB;
432   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
433                                          PrevBB->getParent(), CFG.ExitBB);
434   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
435 
436   return NewBB;
437 }
438 
439 void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
440   auto &CFG = State.CFG;
441   BasicBlock *NewBB = CFG.VPBB2IRBB[this];
442 
443   // Register NewBB in its loop. In innermost loops its the same for all
444   // BB's.
445   Loop *ParentLoop = State.CurrentParentLoop;
446   // If this block has a sole successor that is an exit block or is an exit
447   // block itself then it needs adding to the same parent loop as the exit
448   // block.
449   VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
450   SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
451   if (State.Plan->isExitBlock(SuccOrExitVPB)) {
452     ParentLoop = State.LI->getLoopFor(
453         cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
454   }
455 
456   if (ParentLoop && !State.LI->getLoopFor(NewBB))
457     ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
458 
459   SmallVector<VPBlockBase *> Preds;
460   if (VPBlockUtils::isHeader(this, State.VPDT)) {
461     // There's no block for the latch yet, connect to the preheader only.
462     Preds = {getPredecessors()[0]};
463   } else {
464     Preds = to_vector(getPredecessors());
465   }
466 
467   // Hook up the new basic block to its predecessors.
468   for (VPBlockBase *PredVPBlock : Preds) {
469     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
470     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
471     assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
472            "Predecessor basic-block not found building successor.");
473     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
474     auto *PredBBTerminator = PredBB->getTerminator();
475     LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
476 
477     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
478     if (isa<UnreachableInst>(PredBBTerminator)) {
479       assert(PredVPSuccessors.size() == 1 &&
480              "Predecessor ending w/o branch must have single successor.");
481       DebugLoc DL = PredBBTerminator->getDebugLoc();
482       PredBBTerminator->eraseFromParent();
483       auto *Br = BranchInst::Create(NewBB, PredBB);
484       Br->setDebugLoc(DL);
485     } else if (TermBr && !TermBr->isConditional()) {
486       TermBr->setSuccessor(0, NewBB);
487     } else {
488       // Set each forward successor here when it is created, excluding
489       // backedges. A backward successor is set when the branch is created.
490       // Branches to VPIRBasicBlocks must have the same successors in VPlan as
491       // in the original IR, except when the predecessor is the entry block.
492       // This enables including SCEV and memory runtime check blocks in VPlan.
493       // TODO: Remove exception by modeling the terminator of entry block using
494       // BranchOnCond.
495       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
496       assert((TermBr && (!TermBr->getSuccessor(idx) ||
497                          (isa<VPIRBasicBlock>(this) &&
498                           (TermBr->getSuccessor(idx) == NewBB ||
499                            PredVPBlock == getPlan()->getEntry())))) &&
500              "Trying to reset an existing successor block.");
501       TermBr->setSuccessor(idx, NewBB);
502     }
503     CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
504   }
505 }
506 
507 void VPIRBasicBlock::execute(VPTransformState *State) {
508   assert(getHierarchicalSuccessors().size() <= 2 &&
509          "VPIRBasicBlock can have at most two successors at the moment!");
510   State->Builder.SetInsertPoint(IRBB->getTerminator());
511   State->CFG.PrevBB = IRBB;
512   State->CFG.VPBB2IRBB[this] = IRBB;
513   executeRecipes(State, IRBB);
514   // Create a branch instruction to terminate IRBB if one was not created yet
515   // and is needed.
516   if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
517     auto *Br = State->Builder.CreateBr(IRBB);
518     Br->setOperand(0, nullptr);
519     IRBB->getTerminator()->eraseFromParent();
520   } else {
521     assert(
522         (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
523         "other blocks must be terminated by a branch");
524   }
525 
526   connectToPredecessors(*State);
527 }
528 
529 VPIRBasicBlock *VPIRBasicBlock::clone() {
530   auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
531   for (VPRecipeBase &R : Recipes)
532     NewBlock->appendRecipe(R.clone());
533   return NewBlock;
534 }
535 
536 void VPBasicBlock::execute(VPTransformState *State) {
537   bool Replica = bool(State->Lane);
538   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
539 
540   if (VPBlockUtils::isHeader(this, State->VPDT)) {
541     // Create and register the new vector loop.
542     Loop *PrevParentLoop = State->CurrentParentLoop;
543     State->CurrentParentLoop = State->LI->AllocateLoop();
544 
545     // Insert the new loop into the loop nest and register the new basic blocks
546     // before calling any utilities such as SCEV that require valid LoopInfo.
547     if (PrevParentLoop)
548       PrevParentLoop->addChildLoop(State->CurrentParentLoop);
549     else
550       State->LI->addTopLevelLoop(State->CurrentParentLoop);
551   }
552 
553   auto IsReplicateRegion = [](VPBlockBase *BB) {
554     auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
555     assert((!R || R->isReplicator()) &&
556            "only replicate region blocks should remain");
557     return R;
558   };
559   // 1. Create an IR basic block.
560   if ((Replica && this == getParent()->getEntry()) ||
561       IsReplicateRegion(getSingleHierarchicalPredecessor())) {
562     // Reuse the previous basic block if the current VPBB is either
563     //  * the entry to a replicate region, or
564     //  * the exit of a replicate region.
565     State->CFG.VPBB2IRBB[this] = NewBB;
566   } else {
567     NewBB = createEmptyBasicBlock(*State);
568 
569     State->Builder.SetInsertPoint(NewBB);
570     // Temporarily terminate with unreachable until CFG is rewired.
571     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
572     State->Builder.SetInsertPoint(Terminator);
573 
574     State->CFG.PrevBB = NewBB;
575     State->CFG.VPBB2IRBB[this] = NewBB;
576     connectToPredecessors(*State);
577   }
578 
579   // 2. Fill the IR basic block with IR instructions.
580   executeRecipes(State, NewBB);
581 
582   // If this block is a latch, update CurrentParentLoop.
583   if (VPBlockUtils::isLatch(this, State->VPDT))
584     State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
585 }
586 
587 VPBasicBlock *VPBasicBlock::clone() {
588   auto *NewBlock = getPlan()->createVPBasicBlock(getName());
589   for (VPRecipeBase &R : *this)
590     NewBlock->appendRecipe(R.clone());
591   return NewBlock;
592 }
593 
594 void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) {
595   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
596                     << " in BB: " << BB->getName() << '\n');
597 
598   State->CFG.PrevVPBB = this;
599 
600   for (VPRecipeBase &Recipe : Recipes) {
601     State->setDebugLocFrom(Recipe.getDebugLoc());
602     Recipe.execute(*State);
603   }
604 
605   LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
606 }
607 
608 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
609   assert((SplitAt == end() || SplitAt->getParent() == this) &&
610          "can only split at a position in the same block");
611 
612   // Create new empty block after the block to split.
613   auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
614   VPBlockUtils::insertBlockAfter(SplitBlock, this);
615 
616   // Finally, move the recipes starting at SplitAt to new block.
617   for (VPRecipeBase &ToMove :
618        make_early_inc_range(make_range(SplitAt, this->end())))
619     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
620 
621   return SplitBlock;
622 }
623 
624 /// Return the enclosing loop region for region \p P. The templated version is
625 /// used to support both const and non-const block arguments.
626 template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
627   if (P && P->isReplicator()) {
628     P = P->getParent();
629     // Multiple loop regions can be nested, but replicate regions can only be
630     // nested inside a loop region or must be outside any other region.
631     assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
632   }
633   return P;
634 }
635 
636 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
637   return getEnclosingLoopRegionForRegion(getParent());
638 }
639 
640 const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
641   return getEnclosingLoopRegionForRegion(getParent());
642 }
643 
644 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
645   if (VPBB->empty()) {
646     assert(
647         VPBB->getNumSuccessors() < 2 &&
648         "block with multiple successors doesn't have a recipe as terminator");
649     return false;
650   }
651 
652   const VPRecipeBase *R = &VPBB->back();
653   bool IsSwitch = isa<VPInstruction>(R) &&
654                   cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
655   bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) ||
656                       match(R, m_BranchOnCond(m_VPValue())) ||
657                       match(R, m_BranchOnCount(m_VPValue(), m_VPValue()));
658   (void)IsCondBranch;
659   (void)IsSwitch;
660   if (VPBB->getNumSuccessors() == 2 ||
661       (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
662     assert((IsCondBranch || IsSwitch) &&
663            "block with multiple successors not terminated by "
664            "conditional branch nor switch recipe");
665 
666     return true;
667   }
668 
669   if (VPBB->getNumSuccessors() > 2) {
670     assert(IsSwitch && "block with more than 2 successors not terminated by "
671                        "a switch recipe");
672     return true;
673   }
674 
675   assert(
676       !IsCondBranch &&
677       "block with 0 or 1 successors terminated by conditional branch recipe");
678   return false;
679 }
680 
681 VPRecipeBase *VPBasicBlock::getTerminator() {
682   if (hasConditionalTerminator(this))
683     return &back();
684   return nullptr;
685 }
686 
687 const VPRecipeBase *VPBasicBlock::getTerminator() const {
688   if (hasConditionalTerminator(this))
689     return &back();
690   return nullptr;
691 }
692 
693 bool VPBasicBlock::isExiting() const {
694   return getParent() && getParent()->getExitingBasicBlock() == this;
695 }
696 
697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
698 void VPBlockBase::print(raw_ostream &O) const {
699   VPSlotTracker SlotTracker(getPlan());
700   print(O, "", SlotTracker);
701 }
702 
703 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
704   if (getSuccessors().empty()) {
705     O << Indent << "No successors\n";
706   } else {
707     O << Indent << "Successor(s): ";
708     ListSeparator LS;
709     for (auto *Succ : getSuccessors())
710       O << LS << Succ->getName();
711     O << '\n';
712   }
713 }
714 
715 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
716                          VPSlotTracker &SlotTracker) const {
717   O << Indent << getName() << ":\n";
718 
719   auto RecipeIndent = Indent + "  ";
720   for (const VPRecipeBase &Recipe : *this) {
721     Recipe.print(O, RecipeIndent, SlotTracker);
722     O << '\n';
723   }
724 
725   printSuccessors(O, Indent);
726 }
727 #endif
728 
729 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
730 
731 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning
732 // the blocks and their recipes. Operands of cloned recipes will NOT be updated.
733 // Remapping of operands must be done separately. Returns a pair with the new
734 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a
735 // region, return nullptr for the exiting block.
736 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
737   DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
738   VPBlockBase *Exiting = nullptr;
739   bool InRegion = Entry->getParent();
740   // First, clone blocks reachable from Entry.
741   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
742     VPBlockBase *NewBB = BB->clone();
743     Old2NewVPBlocks[BB] = NewBB;
744     if (InRegion && BB->getNumSuccessors() == 0) {
745       assert(!Exiting && "Multiple exiting blocks?");
746       Exiting = BB;
747     }
748   }
749   assert((!InRegion || Exiting) && "regions must have a single exiting block");
750 
751   // Second, update the predecessors & successors of the cloned blocks.
752   for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
753     VPBlockBase *NewBB = Old2NewVPBlocks[BB];
754     SmallVector<VPBlockBase *> NewPreds;
755     for (VPBlockBase *Pred : BB->getPredecessors()) {
756       NewPreds.push_back(Old2NewVPBlocks[Pred]);
757     }
758     NewBB->setPredecessors(NewPreds);
759     SmallVector<VPBlockBase *> NewSuccs;
760     for (VPBlockBase *Succ : BB->successors()) {
761       NewSuccs.push_back(Old2NewVPBlocks[Succ]);
762     }
763     NewBB->setSuccessors(NewSuccs);
764   }
765 
766 #if !defined(NDEBUG)
767   // Verify that the order of predecessors and successors matches in the cloned
768   // version.
769   for (const auto &[OldBB, NewBB] :
770        zip(vp_depth_first_shallow(Entry),
771            vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
772     for (const auto &[OldPred, NewPred] :
773          zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
774       assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
775 
776     for (const auto &[OldSucc, NewSucc] :
777          zip(OldBB->successors(), NewBB->successors()))
778       assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
779   }
780 #endif
781 
782   return std::make_pair(Old2NewVPBlocks[Entry],
783                         Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
784 }
785 
786 VPRegionBlock *VPRegionBlock::clone() {
787   const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
788   auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
789                                                    getName(), isReplicator());
790   for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
791     Block->setParent(NewRegion);
792   return NewRegion;
793 }
794 
795 void VPRegionBlock::execute(VPTransformState *State) {
796   assert(isReplicator() &&
797          "Loop regions should have been lowered to plain CFG");
798   assert(!State->Lane && "Replicating a Region with non-null instance.");
799   assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
800 
801   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
802       Entry);
803   State->Lane = VPLane(0);
804   for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
805     State->Lane = VPLane(Lane, VPLane::Kind::First);
806     // Visit the VPBlocks connected to \p this, starting from it.
807     for (VPBlockBase *Block : RPOT) {
808       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
809       Block->execute(State);
810     }
811   }
812 
813   // Exit replicating mode.
814   State->Lane.reset();
815 }
816 
817 InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
818   InstructionCost Cost = 0;
819   for (VPRecipeBase &R : Recipes)
820     Cost += R.cost(VF, Ctx);
821   return Cost;
822 }
823 
824 const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
825   const VPBlockBase *Pred = nullptr;
826   if (getNumPredecessors() > 0) {
827     Pred = getPredecessors()[Idx];
828   } else {
829     auto *Region = getParent();
830     assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
831            "must be in the entry block of a non-replicate region");
832     assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
833            "loop region has a single predecessor (preheader), its entry block "
834            "has 2 incoming blocks");
835 
836     // Idx ==  0 selects the predecessor of the region, Idx == 1 selects the
837     // region itself whose exiting block feeds the phi across the backedge.
838     Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
839   }
840   return Pred->getExitingBasicBlock();
841 }
842 
843 InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) {
844   if (!isReplicator()) {
845     InstructionCost Cost = 0;
846     for (VPBlockBase *Block : vp_depth_first_shallow(getEntry()))
847       Cost += Block->cost(VF, Ctx);
848     InstructionCost BackedgeCost =
849         ForceTargetInstructionCost.getNumOccurrences()
850             ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
851             : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
852     LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
853                       << ": vector loop backedge\n");
854     Cost += BackedgeCost;
855     return Cost;
856   }
857 
858   // Compute the cost of a replicate region. Replicating isn't supported for
859   // scalable vectors, return an invalid cost for them.
860   // TODO: Discard scalable VPlans with replicate recipes earlier after
861   // construction.
862   if (VF.isScalable())
863     return InstructionCost::getInvalid();
864 
865   // First compute the cost of the conditionally executed recipes, followed by
866   // account for the branching cost, except if the mask is a header mask or
867   // uniform condition.
868   using namespace llvm::VPlanPatternMatch;
869   VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]);
870   InstructionCost ThenCost = Then->cost(VF, Ctx);
871 
872   // For the scalar case, we may not always execute the original predicated
873   // block, Thus, scale the block's cost by the probability of executing it.
874   if (VF.isScalar())
875     return ThenCost / getPredBlockCostDivisor(Ctx.CostKind);
876 
877   return ThenCost;
878 }
879 
880 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
881 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
882                           VPSlotTracker &SlotTracker) const {
883   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
884   auto NewIndent = Indent + "  ";
885   for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
886     O << '\n';
887     BlockBase->print(O, NewIndent, SlotTracker);
888   }
889   O << Indent << "}\n";
890 
891   printSuccessors(O, Indent);
892 }
893 #endif
894 
895 void VPRegionBlock::dissolveToCFGLoop() {
896   auto *Header = cast<VPBasicBlock>(getEntry());
897   if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
898     assert(this == getPlan()->getVectorLoopRegion() &&
899            "Canonical IV must be in the entry of the top-level loop region");
900     auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
901         {CanIV->getStartValue(), CanIV->getBackedgeValue()},
902         CanIV->getDebugLoc(), "index");
903     CanIV->replaceAllUsesWith(ScalarR);
904     CanIV->eraseFromParent();
905   }
906 
907   VPBlockBase *Preheader = getSinglePredecessor();
908   auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
909   VPBlockBase *Middle = getSingleSuccessor();
910   VPBlockUtils::disconnectBlocks(Preheader, this);
911   VPBlockUtils::disconnectBlocks(this, Middle);
912 
913   for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
914     VPB->setParent(getParent());
915 
916   VPBlockUtils::connectBlocks(Preheader, Header);
917   VPBlockUtils::connectBlocks(ExitingLatch, Middle);
918   VPBlockUtils::connectBlocks(ExitingLatch, Header);
919 }
920 
921 VPlan::VPlan(Loop *L) {
922   setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
923   ScalarHeader = createVPIRBasicBlock(L->getHeader());
924 
925   SmallVector<BasicBlock *> IRExitBlocks;
926   L->getUniqueExitBlocks(IRExitBlocks);
927   for (BasicBlock *EB : IRExitBlocks)
928     ExitBlocks.push_back(createVPIRBasicBlock(EB));
929 }
930 
931 VPlan::~VPlan() {
932   VPValue DummyValue;
933 
934   for (auto *VPB : CreatedBlocks) {
935     if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
936       // Replace all operands of recipes and all VPValues defined in VPBB with
937       // DummyValue so the block can be deleted.
938       for (VPRecipeBase &R : *VPBB) {
939         for (auto *Def : R.definedValues())
940           Def->replaceAllUsesWith(&DummyValue);
941 
942         for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
943           R.setOperand(I, &DummyValue);
944       }
945     }
946     delete VPB;
947   }
948   for (VPValue *VPV : getLiveIns())
949     delete VPV;
950   if (BackedgeTakenCount)
951     delete BackedgeTakenCount;
952 }
953 
954 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
955                              VPTransformState &State) {
956   Type *TCTy = TripCountV->getType();
957   // Check if the backedge taken count is needed, and if so build it.
958   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
959     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
960     auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1),
961                                    "trip.count.minus.1");
962     BackedgeTakenCount->setUnderlyingValue(TCMO);
963   }
964 
965   VectorTripCount.setUnderlyingValue(VectorTripCountV);
966 
967   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
968   // FIXME: Model VF * UF computation completely in VPlan.
969   unsigned UF = getUF();
970   if (VF.getNumUsers()) {
971     Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
972     VF.setUnderlyingValue(RuntimeVF);
973     VFxUF.setUnderlyingValue(
974         UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
975                : RuntimeVF);
976   } else {
977     VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
978   }
979 }
980 
981 VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const {
982   auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
983     return VPIRBB->getIRBasicBlock() == IRBB;
984   });
985   assert(Iter != getExitBlocks().end() && "no exit block found");
986   return *Iter;
987 }
988 
989 bool VPlan::isExitBlock(VPBlockBase *VPBB) {
990   return is_contained(ExitBlocks, VPBB);
991 }
992 
993 /// Generate the code inside the preheader and body of the vectorized loop.
994 /// Assumes a single pre-header basic-block was created for this. Introduce
995 /// additional basic-blocks as needed, and fill them all.
996 void VPlan::execute(VPTransformState *State) {
997   // Initialize CFG state.
998   State->CFG.PrevVPBB = nullptr;
999   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
1000 
1001   // Update VPDominatorTree since VPBasicBlock may be removed after State was
1002   // constructed.
1003   State->VPDT.recalculate(*this);
1004 
1005   // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
1006   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
1007   cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
1008   State->CFG.DTU.applyUpdates(
1009       {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
1010 
1011   LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
1012                     << ", UF=" << getUF() << '\n');
1013   setName("Final VPlan");
1014   LLVM_DEBUG(dump());
1015 
1016   // Disconnect scalar preheader and scalar header, as the dominator tree edge
1017   // will be updated as part of VPlan execution. This allows keeping the DTU
1018   // logic generic during VPlan execution.
1019   BasicBlock *ScalarPh = State->CFG.ExitBB;
1020   State->CFG.DTU.applyUpdates(
1021       {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
1022 
1023   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
1024       Entry);
1025   // Generate code for the VPlan, in parts of the vector skeleton, loop body and
1026   // successor blocks including the middle, exit and scalar preheader blocks.
1027   for (VPBlockBase *Block : RPOT)
1028     Block->execute(State);
1029 
1030   State->CFG.DTU.flush();
1031 
1032   VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1033   if (!Header)
1034     return;
1035 
1036   auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1037   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1038 
1039   // Fix the latch value of canonical, reduction and first-order recurrences
1040   // phis in the vector loop.
1041   for (VPRecipeBase &R : Header->phis()) {
1042     // Skip phi-like recipes that generate their backedege values themselves.
1043     if (isa<VPWidenPHIRecipe>(&R))
1044       continue;
1045 
1046     if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1047       assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1048              "recipe generating only scalars should have been replaced");
1049       auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1050       PHINode *Phi = cast<PHINode>(GEP->getPointerOperand());
1051 
1052       Phi->setIncomingBlock(1, VectorLatchBB);
1053 
1054       // Move the last step to the end of the latch block. This ensures
1055       // consistent placement of all induction updates.
1056       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1057       Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
1058       continue;
1059     }
1060 
1061     auto *PhiR = cast<VPSingleDefRecipe>(&R);
1062     // VPInstructions currently model scalar Phis only.
1063     bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1064                        (isa<VPReductionPHIRecipe>(PhiR) &&
1065                         cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1066 
1067     Value *Phi = State->get(PhiR, NeedsScalar);
1068     // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1069     // not.
1070     Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1071     cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1072   }
1073 }
1074 
1075 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
1076   // For now only return the cost of the vector loop region, ignoring any other
1077   // blocks, like the preheader or middle blocks.
1078   InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx);
1079 
1080   // If any instructions in the middle block are invalid return invalid.
1081   // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created.
1082   if (!getMiddleBlock()->cost(VF, Ctx).isValid())
1083     return InstructionCost::getInvalid();
1084 
1085   return Cost;
1086 }
1087 
1088 VPRegionBlock *VPlan::getVectorLoopRegion() {
1089   // TODO: Cache if possible.
1090   for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1091     if (auto *R = dyn_cast<VPRegionBlock>(B))
1092       return R->isReplicator() ? nullptr : R;
1093   return nullptr;
1094 }
1095 
1096 const VPRegionBlock *VPlan::getVectorLoopRegion() const {
1097   for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
1098     if (auto *R = dyn_cast<VPRegionBlock>(B))
1099       return R->isReplicator() ? nullptr : R;
1100   return nullptr;
1101 }
1102 
1103 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1104 void VPlan::printLiveIns(raw_ostream &O) const {
1105   VPSlotTracker SlotTracker(this);
1106 
1107   if (VF.getNumUsers() > 0) {
1108     O << "\nLive-in ";
1109     VF.printAsOperand(O, SlotTracker);
1110     O << " = VF";
1111   }
1112 
1113   if (VFxUF.getNumUsers() > 0) {
1114     O << "\nLive-in ";
1115     VFxUF.printAsOperand(O, SlotTracker);
1116     O << " = VF * UF";
1117   }
1118 
1119   if (VectorTripCount.getNumUsers() > 0) {
1120     O << "\nLive-in ";
1121     VectorTripCount.printAsOperand(O, SlotTracker);
1122     O << " = vector-trip-count";
1123   }
1124 
1125   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1126     O << "\nLive-in ";
1127     BackedgeTakenCount->printAsOperand(O, SlotTracker);
1128     O << " = backedge-taken count";
1129   }
1130 
1131   O << "\n";
1132   if (TripCount) {
1133     if (TripCount->isLiveIn())
1134       O << "Live-in ";
1135     TripCount->printAsOperand(O, SlotTracker);
1136     O << " = original trip-count";
1137     O << "\n";
1138   }
1139 }
1140 
1141 LLVM_DUMP_METHOD
1142 void VPlan::print(raw_ostream &O) const {
1143   VPSlotTracker SlotTracker(this);
1144 
1145   O << "VPlan '" << getName() << "' {";
1146 
1147   printLiveIns(O);
1148 
1149   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
1150       RPOT(getEntry());
1151   for (const VPBlockBase *Block : RPOT) {
1152     O << '\n';
1153     Block->print(O, "", SlotTracker);
1154   }
1155 
1156   O << "}\n";
1157 }
1158 
1159 std::string VPlan::getName() const {
1160   std::string Out;
1161   raw_string_ostream RSO(Out);
1162   RSO << Name << " for ";
1163   if (!VFs.empty()) {
1164     RSO << "VF={" << VFs[0];
1165     for (ElementCount VF : drop_begin(VFs))
1166       RSO << "," << VF;
1167     RSO << "},";
1168   }
1169 
1170   if (UFs.empty()) {
1171     RSO << "UF>=1";
1172   } else {
1173     RSO << "UF={" << UFs[0];
1174     for (unsigned UF : drop_begin(UFs))
1175       RSO << "," << UF;
1176     RSO << "}";
1177   }
1178 
1179   return Out;
1180 }
1181 
1182 LLVM_DUMP_METHOD
1183 void VPlan::printDOT(raw_ostream &O) const {
1184   VPlanPrinter Printer(O, *this);
1185   Printer.dump();
1186 }
1187 
1188 LLVM_DUMP_METHOD
1189 void VPlan::dump() const { print(dbgs()); }
1190 #endif
1191 
1192 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1193                           DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1194   // Update the operands of all cloned recipes starting at NewEntry. This
1195   // traverses all reachable blocks. This is done in two steps, to handle cycles
1196   // in PHI recipes.
1197   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1198       OldDeepRPOT(Entry);
1199   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
1200       NewDeepRPOT(NewEntry);
1201   // First, collect all mappings from old to new VPValues defined by cloned
1202   // recipes.
1203   for (const auto &[OldBB, NewBB] :
1204        zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
1205            VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
1206     assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1207            "blocks must have the same number of recipes");
1208     for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1209       assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1210              "recipes must have the same number of operands");
1211       assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1212              "recipes must define the same number of operands");
1213       for (const auto &[OldV, NewV] :
1214            zip(OldR.definedValues(), NewR.definedValues()))
1215         Old2NewVPValues[OldV] = NewV;
1216     }
1217   }
1218 
1219   // Update all operands to use cloned VPValues.
1220   for (VPBasicBlock *NewBB :
1221        VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
1222     for (VPRecipeBase &NewR : *NewBB)
1223       for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1224         VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1225         NewR.setOperand(I, NewOp);
1226       }
1227   }
1228 }
1229 
1230 VPlan *VPlan::duplicate() {
1231   unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1232   // Clone blocks.
1233   const auto &[NewEntry, __] = cloneFrom(Entry);
1234 
1235   BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1236   VPIRBasicBlock *NewScalarHeader = nullptr;
1237   if (getScalarHeader()->getNumPredecessors() == 0) {
1238     NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1239   } else {
1240     NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1241         vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1242           auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1243           return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1244         }));
1245   }
1246   // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1247   auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1248   DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1249   for (VPValue *OldLiveIn : getLiveIns()) {
1250     Old2NewVPValues[OldLiveIn] =
1251         NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1252   }
1253   Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1254   Old2NewVPValues[&VF] = &NewPlan->VF;
1255   Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1256   if (BackedgeTakenCount) {
1257     NewPlan->BackedgeTakenCount = new VPValue();
1258     Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1259   }
1260   if (TripCount && TripCount->isLiveIn())
1261     Old2NewVPValues[TripCount] =
1262         NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1263   // else NewTripCount will be created and inserted into Old2NewVPValues when
1264   // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1265 
1266   remapOperands(Entry, NewEntry, Old2NewVPValues);
1267 
1268   // Initialize remaining fields of cloned VPlan.
1269   NewPlan->VFs = VFs;
1270   NewPlan->UFs = UFs;
1271   // TODO: Adjust names.
1272   NewPlan->Name = Name;
1273   if (TripCount) {
1274     assert(Old2NewVPValues.contains(TripCount) &&
1275            "TripCount must have been added to Old2NewVPValues");
1276     NewPlan->TripCount = Old2NewVPValues[TripCount];
1277   }
1278 
1279   // Transfer all cloned blocks (the second half of all current blocks) from
1280   // current to new VPlan.
1281   unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1282   for (unsigned I :
1283        seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1284     NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1285   CreatedBlocks.truncate(NumBlocksBeforeCloning);
1286 
1287   // Update ExitBlocks of the new plan.
1288   for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1289     if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1290         VPB != NewScalarHeader)
1291       NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1292   }
1293 
1294   return NewPlan;
1295 }
1296 
1297 VPIRBasicBlock *VPlan::createEmptyVPIRBasicBlock(BasicBlock *IRBB) {
1298   auto *VPIRBB = new VPIRBasicBlock(IRBB);
1299   CreatedBlocks.push_back(VPIRBB);
1300   return VPIRBB;
1301 }
1302 
1303 VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) {
1304   auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1305   for (Instruction &I :
1306        make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1307     VPIRBB->appendRecipe(VPIRInstruction::create(I));
1308   return VPIRBB;
1309 }
1310 
1311 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1312 
1313 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1314   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1315          Twine(getOrCreateBID(Block));
1316 }
1317 
1318 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1319   const std::string &Name = Block->getName();
1320   if (!Name.empty())
1321     return Name;
1322   return "VPB" + Twine(getOrCreateBID(Block));
1323 }
1324 
1325 void VPlanPrinter::dump() {
1326   Depth = 1;
1327   bumpIndent(0);
1328   OS << "digraph VPlan {\n";
1329   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1330   if (!Plan.getName().empty())
1331     OS << "\\n" << DOT::EscapeString(Plan.getName());
1332 
1333   {
1334     // Print live-ins.
1335   std::string Str;
1336   raw_string_ostream SS(Str);
1337   Plan.printLiveIns(SS);
1338   SmallVector<StringRef, 0> Lines;
1339   StringRef(Str).rtrim('\n').split(Lines, "\n");
1340   for (auto Line : Lines)
1341     OS << DOT::EscapeString(Line.str()) << "\\n";
1342   }
1343 
1344   OS << "\"]\n";
1345   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1346   OS << "edge [fontname=Courier, fontsize=30]\n";
1347   OS << "compound=true\n";
1348 
1349   for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1350     dumpBlock(Block);
1351 
1352   OS << "}\n";
1353 }
1354 
1355 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1356   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
1357     dumpBasicBlock(BasicBlock);
1358   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1359     dumpRegion(Region);
1360   else
1361     llvm_unreachable("Unsupported kind of VPBlock.");
1362 }
1363 
1364 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1365                             bool Hidden, const Twine &Label) {
1366   // Due to "dot" we print an edge between two regions as an edge between the
1367   // exiting basic block and the entry basic of the respective regions.
1368   const VPBlockBase *Tail = From->getExitingBasicBlock();
1369   const VPBlockBase *Head = To->getEntryBasicBlock();
1370   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1371   OS << " [ label=\"" << Label << '\"';
1372   if (Tail != From)
1373     OS << " ltail=" << getUID(From);
1374   if (Head != To)
1375     OS << " lhead=" << getUID(To);
1376   if (Hidden)
1377     OS << "; splines=none";
1378   OS << "]\n";
1379 }
1380 
1381 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1382   auto &Successors = Block->getSuccessors();
1383   if (Successors.size() == 1)
1384     drawEdge(Block, Successors.front(), false, "");
1385   else if (Successors.size() == 2) {
1386     drawEdge(Block, Successors.front(), false, "T");
1387     drawEdge(Block, Successors.back(), false, "F");
1388   } else {
1389     unsigned SuccessorNumber = 0;
1390     for (auto *Successor : Successors)
1391       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1392   }
1393 }
1394 
1395 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1396   // Implement dot-formatted dump by performing plain-text dump into the
1397   // temporary storage followed by some post-processing.
1398   OS << Indent << getUID(BasicBlock) << " [label =\n";
1399   bumpIndent(1);
1400   std::string Str;
1401   raw_string_ostream SS(Str);
1402   // Use no indentation as we need to wrap the lines into quotes ourselves.
1403   BasicBlock->print(SS, "", SlotTracker);
1404 
1405   // We need to process each line of the output separately, so split
1406   // single-string plain-text dump.
1407   SmallVector<StringRef, 0> Lines;
1408   StringRef(Str).rtrim('\n').split(Lines, "\n");
1409 
1410   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1411     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1412   };
1413 
1414   // Don't need the "+" after the last line.
1415   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1416     EmitLine(Line, " +\n");
1417   EmitLine(Lines.back(), "\n");
1418 
1419   bumpIndent(-1);
1420   OS << Indent << "]\n";
1421 
1422   dumpEdges(BasicBlock);
1423 }
1424 
1425 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1426   OS << Indent << "subgraph " << getUID(Region) << " {\n";
1427   bumpIndent(1);
1428   OS << Indent << "fontname=Courier\n"
1429      << Indent << "label=\""
1430      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1431      << DOT::EscapeString(Region->getName()) << "\"\n";
1432   // Dump the blocks of the region.
1433   assert(Region->getEntry() && "Region contains no inner blocks.");
1434   for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1435     dumpBlock(Block);
1436   bumpIndent(-1);
1437   OS << Indent << "}\n";
1438   dumpEdges(Region);
1439 }
1440 
1441 #endif
1442 
1443 /// Returns true if there is a vector loop region and \p VPV is defined in a
1444 /// loop region.
1445 static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1446   const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1447   return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1448                   DefR->getParent()->getEnclosingLoopRegion());
1449 }
1450 
1451 bool VPValue::isDefinedOutsideLoopRegions() const {
1452   return !isDefinedInsideLoopRegions(this);
1453 }
1454 void VPValue::replaceAllUsesWith(VPValue *New) {
1455   replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1456 }
1457 
1458 void VPValue::replaceUsesWithIf(
1459     VPValue *New,
1460     llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1461   // Note that this early exit is required for correctness; the implementation
1462   // below relies on the number of users for this VPValue to decrease, which
1463   // isn't the case if this == New.
1464   if (this == New)
1465     return;
1466 
1467   for (unsigned J = 0; J < getNumUsers();) {
1468     VPUser *User = Users[J];
1469     bool RemovedUser = false;
1470     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1471       if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1472         continue;
1473 
1474       RemovedUser = true;
1475       User->setOperand(I, New);
1476     }
1477     // If a user got removed after updating the current user, the next user to
1478     // update will be moved to the current position, so we only need to
1479     // increment the index if the number of users did not change.
1480     if (!RemovedUser)
1481       J++;
1482   }
1483 }
1484 
1485 void VPUser::replaceUsesOfWith(VPValue *From, VPValue *To) {
1486   for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1487     if (getOperand(Idx) == From)
1488       setOperand(Idx, To);
1489   }
1490 }
1491 
1492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1493 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1494   OS << Tracker.getOrCreateName(this);
1495 }
1496 
1497 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1498   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1499     Op->printAsOperand(O, SlotTracker);
1500   });
1501 }
1502 #endif
1503 
1504 void VPSlotTracker::assignName(const VPValue *V) {
1505   assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1506   auto *UV = V->getUnderlyingValue();
1507   auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1508   if (!UV && !(VPI && !VPI->getName().empty())) {
1509     VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1510     NextSlot++;
1511     return;
1512   }
1513 
1514   // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1515   // appending ".Number" to the name if there are multiple uses.
1516   std::string Name;
1517   if (UV)
1518     Name = getName(UV);
1519   else
1520     Name = VPI->getName();
1521 
1522   assert(!Name.empty() && "Name cannot be empty.");
1523   StringRef Prefix = UV ? "ir<" : "vp<%";
1524   std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1525 
1526   // First assign the base name for V.
1527   const auto &[A, _] = VPValue2Name.insert({V, BaseName});
1528   // Integer or FP constants with different types will result in he same string
1529   // due to stripping types.
1530   if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1531     return;
1532 
1533   // If it is already used by C > 0 other VPValues, increase the version counter
1534   // C and use it for V.
1535   const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0});
1536   if (!UseInserted) {
1537     C->second++;
1538     A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1539   }
1540 }
1541 
1542 void VPSlotTracker::assignNames(const VPlan &Plan) {
1543   if (Plan.VF.getNumUsers() > 0)
1544     assignName(&Plan.VF);
1545   if (Plan.VFxUF.getNumUsers() > 0)
1546     assignName(&Plan.VFxUF);
1547   assignName(&Plan.VectorTripCount);
1548   if (Plan.BackedgeTakenCount)
1549     assignName(Plan.BackedgeTakenCount);
1550   for (VPValue *LI : Plan.getLiveIns())
1551     assignName(LI);
1552 
1553   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1554       RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1555   for (const VPBasicBlock *VPBB :
1556        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1557     assignNames(VPBB);
1558 }
1559 
1560 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1561   for (const VPRecipeBase &Recipe : *VPBB)
1562     for (VPValue *Def : Recipe.definedValues())
1563       assignName(Def);
1564 }
1565 
1566 std::string VPSlotTracker::getName(const Value *V) {
1567   std::string Name;
1568   raw_string_ostream S(Name);
1569   if (V->hasName() || !isa<Instruction>(V)) {
1570     V->printAsOperand(S, false);
1571     return Name;
1572   }
1573 
1574   if (!MST) {
1575     // Lazily create the ModuleSlotTracker when we first hit an unnamed
1576     // instruction.
1577     auto *I = cast<Instruction>(V);
1578     // This check is required to support unit tests with incomplete IR.
1579     if (I->getParent()) {
1580       MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1581       MST->incorporateFunction(*I->getFunction());
1582     } else {
1583       MST = std::make_unique<ModuleSlotTracker>(nullptr);
1584     }
1585   }
1586   V->printAsOperand(S, false, *MST);
1587   return Name;
1588 }
1589 
1590 std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1591   std::string Name = VPValue2Name.lookup(V);
1592   if (!Name.empty())
1593     return Name;
1594 
1595   // If no name was assigned, no VPlan was provided when creating the slot
1596   // tracker or it is not reachable from the provided VPlan. This can happen,
1597   // e.g. when trying to print a recipe that has not been inserted into a VPlan
1598   // in a debugger.
1599   // TODO: Update VPSlotTracker constructor to assign names to recipes &
1600   // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1601   // here.
1602   const VPRecipeBase *DefR = V->getDefiningRecipe();
1603   (void)DefR;
1604   assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1605          "VPValue defined by a recipe in a VPlan?");
1606 
1607   // Use the underlying value's name, if there is one.
1608   if (auto *UV = V->getUnderlyingValue()) {
1609     std::string Name;
1610     raw_string_ostream S(Name);
1611     UV->printAsOperand(S, false);
1612     return (Twine("ir<") + Name + ">").str();
1613   }
1614 
1615   return "<badref>";
1616 }
1617 
1618 bool LoopVectorizationPlanner::getDecisionAndClampRange(
1619     const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1620   assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1621   bool PredicateAtRangeStart = Predicate(Range.Start);
1622 
1623   for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1624     if (Predicate(TmpVF) != PredicateAtRangeStart) {
1625       Range.End = TmpVF;
1626       break;
1627     }
1628 
1629   return PredicateAtRangeStart;
1630 }
1631 
1632 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1633 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1634 /// of VF's starting at a given VF and extending it as much as possible. Each
1635 /// vectorization decision can potentially shorten this sub-range during
1636 /// buildVPlan().
1637 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
1638                                            ElementCount MaxVF) {
1639   auto MaxVFTimes2 = MaxVF * 2;
1640   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1641     VFRange SubRange = {VF, MaxVFTimes2};
1642     if (auto Plan = tryToBuildVPlan(SubRange)) {
1643       VPlanTransforms::optimize(*Plan);
1644       // Update the name of the latch of the top-level vector loop region region
1645       // after optimizations which includes block folding.
1646       Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1647       VPlans.push_back(std::move(Plan));
1648     }
1649     VF = SubRange.End;
1650   }
1651 }
1652 
1653 VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const {
1654   assert(count_if(VPlans,
1655                   [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1656              1 &&
1657          "Multiple VPlans for VF.");
1658 
1659   for (const VPlanPtr &Plan : VPlans) {
1660     if (Plan->hasVF(VF))
1661       return *Plan.get();
1662   }
1663   llvm_unreachable("No plan found!");
1664 }
1665 
1666 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1667 void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
1668   if (VPlans.empty()) {
1669     O << "LV: No VPlans built.\n";
1670     return;
1671   }
1672   for (const auto &Plan : VPlans)
1673     if (PrintVPlansInDotFormat)
1674       Plan->printDOT(O);
1675     else
1676       Plan->print(O);
1677 }
1678 #endif
1679 
1680 TargetTransformInfo::OperandValueInfo
1681 VPCostContext::getOperandInfo(VPValue *V) const {
1682   if (!V->isLiveIn())
1683     return {};
1684 
1685   return TTI::getOperandInfo(V->getLiveInIRValue());
1686 }
1687