xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This is the LLVM vectorization plan. It represents a candidate for
11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
12 /// before generating LLVM-IR.
13 /// The vectorizer uses vectorization plans to estimate the costs of potential
14 /// candidates and if profitable to execute the desired plan, generating vector
15 /// LLVM-IR code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "VPlan.h"
20 #include "VPlanDominatorTree.h"
21 #include "llvm/ADT/DepthFirstIterator.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/Twine.h"
26 #include "llvm/Analysis/LoopInfo.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Value.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/GenericDomTreeConstruction.h"
38 #include "llvm/Support/GraphWriter.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
41 #include "llvm/Transforms/Utils/LoopVersioning.h"
42 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
43 #include <cassert>
44 #include <string>
45 #include <vector>
46 
47 using namespace llvm;
48 extern cl::opt<bool> EnableVPlanNativePath;
49 
50 #define DEBUG_TYPE "vplan"
51 
52 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
53 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
54   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
55   VPSlotTracker SlotTracker(
56       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
57   V.print(OS, SlotTracker);
58   return OS;
59 }
60 #endif
61 
62 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
63                                 const ElementCount &VF) const {
64   switch (LaneKind) {
65   case VPLane::Kind::ScalableLast:
66     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
67     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
68                              Builder.getInt32(VF.getKnownMinValue() - Lane));
69   case VPLane::Kind::First:
70     return Builder.getInt32(Lane);
71   }
72   llvm_unreachable("Unknown lane kind");
73 }
74 
75 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
76     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
77   if (Def)
78     Def->addDefinedValue(this);
79 }
80 
81 VPValue::~VPValue() {
82   assert(Users.empty() && "trying to delete a VPValue with remaining users");
83   if (Def)
84     Def->removeDefinedValue(this);
85 }
86 
87 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
89   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
90     R->print(OS, "", SlotTracker);
91   else
92     printAsOperand(OS, SlotTracker);
93 }
94 
95 void VPValue::dump() const {
96   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
97   VPSlotTracker SlotTracker(
98       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
99   print(dbgs(), SlotTracker);
100   dbgs() << "\n";
101 }
102 
103 void VPDef::dump() const {
104   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
105   VPSlotTracker SlotTracker(
106       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
107   print(dbgs(), "", SlotTracker);
108   dbgs() << "\n";
109 }
110 #endif
111 
112 // Get the top-most entry block of \p Start. This is the entry block of the
113 // containing VPlan. This function is templated to support both const and non-const blocks
114 template <typename T> static T *getPlanEntry(T *Start) {
115   T *Next = Start;
116   T *Current = Start;
117   while ((Next = Next->getParent()))
118     Current = Next;
119 
120   SmallSetVector<T *, 8> WorkList;
121   WorkList.insert(Current);
122 
123   for (unsigned i = 0; i < WorkList.size(); i++) {
124     T *Current = WorkList[i];
125     if (Current->getNumPredecessors() == 0)
126       return Current;
127     auto &Predecessors = Current->getPredecessors();
128     WorkList.insert(Predecessors.begin(), Predecessors.end());
129   }
130 
131   llvm_unreachable("VPlan without any entry node without predecessors");
132 }
133 
134 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
135 
136 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
137 
138 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
139 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
140   const VPBlockBase *Block = this;
141   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
142     Block = Region->getEntry();
143   return cast<VPBasicBlock>(Block);
144 }
145 
146 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
147   VPBlockBase *Block = this;
148   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
149     Block = Region->getEntry();
150   return cast<VPBasicBlock>(Block);
151 }
152 
153 void VPBlockBase::setPlan(VPlan *ParentPlan) {
154   assert(ParentPlan->getEntry() == this &&
155          "Can only set plan on its entry block.");
156   Plan = ParentPlan;
157 }
158 
159 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
160 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
161   const VPBlockBase *Block = this;
162   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
163     Block = Region->getExiting();
164   return cast<VPBasicBlock>(Block);
165 }
166 
167 VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
168   VPBlockBase *Block = this;
169   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
170     Block = Region->getExiting();
171   return cast<VPBasicBlock>(Block);
172 }
173 
174 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
175   if (!Successors.empty() || !Parent)
176     return this;
177   assert(Parent->getExiting() == this &&
178          "Block w/o successors not the exiting block of its parent.");
179   return Parent->getEnclosingBlockWithSuccessors();
180 }
181 
182 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
183   if (!Predecessors.empty() || !Parent)
184     return this;
185   assert(Parent->getEntry() == this &&
186          "Block w/o predecessors not the entry of its parent.");
187   return Parent->getEnclosingBlockWithPredecessors();
188 }
189 
190 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
191   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
192 
193   for (VPBlockBase *Block : Blocks)
194     delete Block;
195 }
196 
197 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
198   iterator It = begin();
199   while (It != end() && It->isPhi())
200     It++;
201   return It;
202 }
203 
204 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
205   if (!Def->getDef())
206     return Def->getLiveInIRValue();
207 
208   if (hasScalarValue(Def, Instance)) {
209     return Data
210         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
211   }
212 
213   assert(hasVectorValue(Def, Instance.Part));
214   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
215   if (!VecPart->getType()->isVectorTy()) {
216     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
217     return VecPart;
218   }
219   // TODO: Cache created scalar values.
220   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
221   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
222   // set(Def, Extract, Instance);
223   return Extract;
224 }
225 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
226   VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
227   return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
228 }
229 
230 void VPTransformState::addNewMetadata(Instruction *To,
231                                       const Instruction *Orig) {
232   // If the loop was versioned with memchecks, add the corresponding no-alias
233   // metadata.
234   if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
235     LVer->annotateInstWithNoAlias(To, Orig);
236 }
237 
238 void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
239   propagateMetadata(To, From);
240   addNewMetadata(To, From);
241 }
242 
243 void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
244   for (Value *V : To) {
245     if (Instruction *I = dyn_cast<Instruction>(V))
246       addMetadata(I, From);
247   }
248 }
249 
250 void VPTransformState::setDebugLocFromInst(const Value *V) {
251   const Instruction *Inst = dyn_cast<Instruction>(V);
252   if (!Inst) {
253     Builder.SetCurrentDebugLocation(DebugLoc());
254     return;
255   }
256 
257   const DILocation *DIL = Inst->getDebugLoc();
258   // When a FSDiscriminator is enabled, we don't need to add the multiply
259   // factors to the discriminators.
260   if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
261       !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
262     // FIXME: For scalable vectors, assume vscale=1.
263     auto NewDIL =
264         DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
265     if (NewDIL)
266       Builder.SetCurrentDebugLocation(*NewDIL);
267     else
268       LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
269                         << DIL->getFilename() << " Line: " << DIL->getLine());
270   } else
271     Builder.SetCurrentDebugLocation(DIL);
272 }
273 
274 BasicBlock *
275 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
276   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
277   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
278   BasicBlock *PrevBB = CFG.PrevBB;
279   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
280                                          PrevBB->getParent(), CFG.ExitBB);
281   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
282 
283   // Hook up the new basic block to its predecessors.
284   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
285     VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
286     auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
287     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
288 
289     assert(PredBB && "Predecessor basic-block not found building successor.");
290     auto *PredBBTerminator = PredBB->getTerminator();
291     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
292 
293     auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
294     if (isa<UnreachableInst>(PredBBTerminator)) {
295       assert(PredVPSuccessors.size() == 1 &&
296              "Predecessor ending w/o branch must have single successor.");
297       DebugLoc DL = PredBBTerminator->getDebugLoc();
298       PredBBTerminator->eraseFromParent();
299       auto *Br = BranchInst::Create(NewBB, PredBB);
300       Br->setDebugLoc(DL);
301     } else if (TermBr && !TermBr->isConditional()) {
302       TermBr->setSuccessor(0, NewBB);
303     } else {
304       // Set each forward successor here when it is created, excluding
305       // backedges. A backward successor is set when the branch is created.
306       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
307       assert(!TermBr->getSuccessor(idx) &&
308              "Trying to reset an existing successor block.");
309       TermBr->setSuccessor(idx, NewBB);
310     }
311   }
312   return NewBB;
313 }
314 
315 void VPBasicBlock::execute(VPTransformState *State) {
316   bool Replica = State->Instance && !State->Instance->isFirstIteration();
317   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
318   VPBlockBase *SingleHPred = nullptr;
319   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
320 
321   auto IsLoopRegion = [](VPBlockBase *BB) {
322     auto *R = dyn_cast<VPRegionBlock>(BB);
323     return R && !R->isReplicator();
324   };
325 
326   // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
327   if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
328     // ExitBB can be re-used for the exit block of the Plan.
329     NewBB = State->CFG.ExitBB;
330     State->CFG.PrevBB = NewBB;
331 
332     // Update the branch instruction in the predecessor to branch to ExitBB.
333     VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
334     VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
335     assert(PredVPB->getSingleSuccessor() == this &&
336            "predecessor must have the current block as only successor");
337     BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
338     // The Exit block of a loop is always set to be successor 0 of the Exiting
339     // block.
340     cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
341   } else if (PrevVPBB && /* A */
342              !((SingleHPred = getSingleHierarchicalPredecessor()) &&
343                SingleHPred->getExitingBasicBlock() == PrevVPBB &&
344                PrevVPBB->getSingleHierarchicalSuccessor() &&
345                (SingleHPred->getParent() == getEnclosingLoopRegion() &&
346                 !IsLoopRegion(SingleHPred))) &&         /* B */
347              !(Replica && getPredecessors().empty())) { /* C */
348     // The last IR basic block is reused, as an optimization, in three cases:
349     // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
350     // B. when the current VPBB has a single (hierarchical) predecessor which
351     //    is PrevVPBB and the latter has a single (hierarchical) successor which
352     //    both are in the same non-replicator region; and
353     // C. when the current VPBB is an entry of a region replica - where PrevVPBB
354     //    is the exiting VPBB of this region from a previous instance, or the
355     //    predecessor of this region.
356 
357     NewBB = createEmptyBasicBlock(State->CFG);
358     State->Builder.SetInsertPoint(NewBB);
359     // Temporarily terminate with unreachable until CFG is rewired.
360     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
361     // Register NewBB in its loop. In innermost loops its the same for all
362     // BB's.
363     if (State->CurrentVectorLoop)
364       State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
365     State->Builder.SetInsertPoint(Terminator);
366     State->CFG.PrevBB = NewBB;
367   }
368 
369   // 2. Fill the IR basic block with IR instructions.
370   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
371                     << " in BB:" << NewBB->getName() << '\n');
372 
373   State->CFG.VPBB2IRBB[this] = NewBB;
374   State->CFG.PrevVPBB = this;
375 
376   for (VPRecipeBase &Recipe : Recipes)
377     Recipe.execute(*State);
378 
379   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
380 }
381 
382 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
383   for (VPRecipeBase &R : Recipes) {
384     for (auto *Def : R.definedValues())
385       Def->replaceAllUsesWith(NewValue);
386 
387     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
388       R.setOperand(I, NewValue);
389   }
390 }
391 
392 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
393   assert((SplitAt == end() || SplitAt->getParent() == this) &&
394          "can only split at a position in the same block");
395 
396   SmallVector<VPBlockBase *, 2> Succs(successors());
397   // First, disconnect the current block from its successors.
398   for (VPBlockBase *Succ : Succs)
399     VPBlockUtils::disconnectBlocks(this, Succ);
400 
401   // Create new empty block after the block to split.
402   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
403   VPBlockUtils::insertBlockAfter(SplitBlock, this);
404 
405   // Add successors for block to split to new block.
406   for (VPBlockBase *Succ : Succs)
407     VPBlockUtils::connectBlocks(SplitBlock, Succ);
408 
409   // Finally, move the recipes starting at SplitAt to new block.
410   for (VPRecipeBase &ToMove :
411        make_early_inc_range(make_range(SplitAt, this->end())))
412     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
413 
414   return SplitBlock;
415 }
416 
417 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
418   VPRegionBlock *P = getParent();
419   if (P && P->isReplicator()) {
420     P = P->getParent();
421     assert(!cast<VPRegionBlock>(P)->isReplicator() &&
422            "unexpected nested replicate regions");
423   }
424   return P;
425 }
426 
427 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
428   if (VPBB->empty()) {
429     assert(
430         VPBB->getNumSuccessors() < 2 &&
431         "block with multiple successors doesn't have a recipe as terminator");
432     return false;
433   }
434 
435   const VPRecipeBase *R = &VPBB->back();
436   auto *VPI = dyn_cast<VPInstruction>(R);
437   bool IsCondBranch =
438       isa<VPBranchOnMaskRecipe>(R) ||
439       (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
440                VPI->getOpcode() == VPInstruction::BranchOnCount));
441   (void)IsCondBranch;
442 
443   if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
444     assert(IsCondBranch && "block with multiple successors not terminated by "
445                            "conditional branch recipe");
446 
447     return true;
448   }
449 
450   assert(
451       !IsCondBranch &&
452       "block with 0 or 1 successors terminated by conditional branch recipe");
453   return false;
454 }
455 
456 VPRecipeBase *VPBasicBlock::getTerminator() {
457   if (hasConditionalTerminator(this))
458     return &back();
459   return nullptr;
460 }
461 
462 const VPRecipeBase *VPBasicBlock::getTerminator() const {
463   if (hasConditionalTerminator(this))
464     return &back();
465   return nullptr;
466 }
467 
468 bool VPBasicBlock::isExiting() const {
469   return getParent()->getExitingBasicBlock() == this;
470 }
471 
472 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
473 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
474   if (getSuccessors().empty()) {
475     O << Indent << "No successors\n";
476   } else {
477     O << Indent << "Successor(s): ";
478     ListSeparator LS;
479     for (auto *Succ : getSuccessors())
480       O << LS << Succ->getName();
481     O << '\n';
482   }
483 }
484 
485 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
486                          VPSlotTracker &SlotTracker) const {
487   O << Indent << getName() << ":\n";
488 
489   auto RecipeIndent = Indent + "  ";
490   for (const VPRecipeBase &Recipe : *this) {
491     Recipe.print(O, RecipeIndent, SlotTracker);
492     O << '\n';
493   }
494 
495   printSuccessors(O, Indent);
496 }
497 #endif
498 
499 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
500   for (VPBlockBase *Block : depth_first(Entry))
501     // Drop all references in VPBasicBlocks and replace all uses with
502     // DummyValue.
503     Block->dropAllReferences(NewValue);
504 }
505 
506 void VPRegionBlock::execute(VPTransformState *State) {
507   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
508 
509   if (!isReplicator()) {
510     // Create and register the new vector loop.
511     Loop *PrevLoop = State->CurrentVectorLoop;
512     State->CurrentVectorLoop = State->LI->AllocateLoop();
513     BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
514     Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
515 
516     // Insert the new loop into the loop nest and register the new basic blocks
517     // before calling any utilities such as SCEV that require valid LoopInfo.
518     if (ParentLoop)
519       ParentLoop->addChildLoop(State->CurrentVectorLoop);
520     else
521       State->LI->addTopLevelLoop(State->CurrentVectorLoop);
522 
523     // Visit the VPBlocks connected to "this", starting from it.
524     for (VPBlockBase *Block : RPOT) {
525       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
526       Block->execute(State);
527     }
528 
529     State->CurrentVectorLoop = PrevLoop;
530     return;
531   }
532 
533   assert(!State->Instance && "Replicating a Region with non-null instance.");
534 
535   // Enter replicating mode.
536   State->Instance = VPIteration(0, 0);
537 
538   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
539     State->Instance->Part = Part;
540     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
541     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
542          ++Lane) {
543       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
544       // Visit the VPBlocks connected to \p this, starting from it.
545       for (VPBlockBase *Block : RPOT) {
546         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
547         Block->execute(State);
548       }
549     }
550   }
551 
552   // Exit replicating mode.
553   State->Instance.reset();
554 }
555 
556 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
557 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
558                           VPSlotTracker &SlotTracker) const {
559   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
560   auto NewIndent = Indent + "  ";
561   for (auto *BlockBase : depth_first(Entry)) {
562     O << '\n';
563     BlockBase->print(O, NewIndent, SlotTracker);
564   }
565   O << Indent << "}\n";
566 
567   printSuccessors(O, Indent);
568 }
569 #endif
570 
571 VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
572   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
573   for (VPRecipeBase &R : Header->phis()) {
574     if (isa<VPActiveLaneMaskPHIRecipe>(&R))
575       return cast<VPActiveLaneMaskPHIRecipe>(&R);
576   }
577   return nullptr;
578 }
579 
580 static bool canSimplifyBranchOnCond(VPInstruction *Term) {
581   VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
582   if (!Not || Not->getOpcode() != VPInstruction::Not)
583     return false;
584 
585   VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
586   return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
587 }
588 
589 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
590                              Value *CanonicalIVStartValue,
591                              VPTransformState &State,
592                              bool IsEpilogueVectorization) {
593 
594   VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
595   auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
596   // Try to simplify the branch condition if TC <= VF * UF when preparing to
597   // execute the plan for the main vector loop. We only do this if the
598   // terminator is:
599   //  1. BranchOnCount, or
600   //  2. BranchOnCond where the input is Not(ActiveLaneMask).
601   if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) &&
602       (Term->getOpcode() == VPInstruction::BranchOnCount ||
603        (Term->getOpcode() == VPInstruction::BranchOnCond &&
604         canSimplifyBranchOnCond(Term)))) {
605     ConstantInt *C = cast<ConstantInt>(TripCountV);
606     uint64_t TCVal = C->getZExtValue();
607     if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
608       auto *BOC =
609           new VPInstruction(VPInstruction::BranchOnCond,
610                             {getOrAddExternalDef(State.Builder.getTrue())});
611       Term->eraseFromParent();
612       ExitingVPBB->appendRecipe(BOC);
613       // TODO: Further simplifications are possible
614       //      1. Replace inductions with constants.
615       //      2. Replace vector loop region with VPBasicBlock.
616     }
617   }
618 
619   // Check if the trip count is needed, and if so build it.
620   if (TripCount && TripCount->getNumUsers()) {
621     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
622       State.set(TripCount, TripCountV, Part);
623   }
624 
625   // Check if the backedge taken count is needed, and if so build it.
626   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
627     IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
628     auto *TCMO = Builder.CreateSub(TripCountV,
629                                    ConstantInt::get(TripCountV->getType(), 1),
630                                    "trip.count.minus.1");
631     auto VF = State.VF;
632     Value *VTCMO =
633         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
634     for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
635       State.set(BackedgeTakenCount, VTCMO, Part);
636   }
637 
638   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
639     State.set(&VectorTripCount, VectorTripCountV, Part);
640 
641   // When vectorizing the epilogue loop, the canonical induction start value
642   // needs to be changed from zero to the value after the main vector loop.
643   if (CanonicalIVStartValue) {
644     VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
645     auto *IV = getCanonicalIV();
646     assert(all_of(IV->users(),
647                   [](const VPUser *U) {
648                     if (isa<VPScalarIVStepsRecipe>(U))
649                       return true;
650                     auto *VPI = cast<VPInstruction>(U);
651                     return VPI->getOpcode() ==
652                                VPInstruction::CanonicalIVIncrement ||
653                            VPI->getOpcode() ==
654                                VPInstruction::CanonicalIVIncrementNUW;
655                   }) &&
656            "the canonical IV should only be used by its increments or "
657            "ScalarIVSteps when "
658            "resetting the start value");
659     IV->setOperand(0, VPV);
660   }
661 }
662 
663 /// Generate the code inside the preheader and body of the vectorized loop.
664 /// Assumes a single pre-header basic-block was created for this. Introduce
665 /// additional basic-blocks as needed, and fill them all.
666 void VPlan::execute(VPTransformState *State) {
667   // Set the reverse mapping from VPValues to Values for code generation.
668   for (auto &Entry : Value2VPValue)
669     State->VPValue2Value[Entry.second] = Entry.first;
670 
671   // Initialize CFG state.
672   State->CFG.PrevVPBB = nullptr;
673   State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
674   BasicBlock *VectorPreHeader = State->CFG.PrevBB;
675   State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
676 
677   // Generate code in the loop pre-header and body.
678   for (VPBlockBase *Block : depth_first(Entry))
679     Block->execute(State);
680 
681   VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
682   BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
683 
684   // Fix the latch value of canonical, reduction and first-order recurrences
685   // phis in the vector loop.
686   VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
687   for (VPRecipeBase &R : Header->phis()) {
688     // Skip phi-like recipes that generate their backedege values themselves.
689     if (isa<VPWidenPHIRecipe>(&R))
690       continue;
691 
692     if (isa<VPWidenPointerInductionRecipe>(&R) ||
693         isa<VPWidenIntOrFpInductionRecipe>(&R)) {
694       PHINode *Phi = nullptr;
695       if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
696         Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
697       } else {
698         auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
699         // TODO: Split off the case that all users of a pointer phi are scalar
700         // from the VPWidenPointerInductionRecipe.
701         if (WidenPhi->onlyScalarsGenerated(State->VF))
702           continue;
703 
704         auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
705         Phi = cast<PHINode>(GEP->getPointerOperand());
706       }
707 
708       Phi->setIncomingBlock(1, VectorLatchBB);
709 
710       // Move the last step to the end of the latch block. This ensures
711       // consistent placement of all induction updates.
712       Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
713       Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
714       continue;
715     }
716 
717     auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
718     // For  canonical IV, first-order recurrences and in-order reduction phis,
719     // only a single part is generated, which provides the last part from the
720     // previous iteration. For non-ordered reductions all UF parts are
721     // generated.
722     bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
723                             isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
724                             (isa<VPReductionPHIRecipe>(PhiR) &&
725                              cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
726     unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
727 
728     for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
729       Value *Phi = State->get(PhiR, Part);
730       Value *Val = State->get(PhiR->getBackedgeValue(),
731                               SinglePartNeeded ? State->UF - 1 : Part);
732       cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
733     }
734   }
735 
736   // We do not attempt to preserve DT for outer loop vectorization currently.
737   if (!EnableVPlanNativePath) {
738     BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
739     State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
740     updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
741                         State->CFG.ExitBB);
742   }
743 }
744 
745 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
746 LLVM_DUMP_METHOD
747 void VPlan::print(raw_ostream &O) const {
748   VPSlotTracker SlotTracker(this);
749 
750   O << "VPlan '" << Name << "' {";
751 
752   if (VectorTripCount.getNumUsers() > 0) {
753     O << "\nLive-in ";
754     VectorTripCount.printAsOperand(O, SlotTracker);
755     O << " = vector-trip-count\n";
756   }
757 
758   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
759     O << "\nLive-in ";
760     BackedgeTakenCount->printAsOperand(O, SlotTracker);
761     O << " = backedge-taken count\n";
762   }
763 
764   for (const VPBlockBase *Block : depth_first(getEntry())) {
765     O << '\n';
766     Block->print(O, "", SlotTracker);
767   }
768 
769   if (!LiveOuts.empty())
770     O << "\n";
771   for (auto &KV : LiveOuts) {
772     O << "Live-out ";
773     KV.second->getPhi()->printAsOperand(O);
774     O << " = ";
775     KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
776     O << "\n";
777   }
778 
779   O << "}\n";
780 }
781 
782 LLVM_DUMP_METHOD
783 void VPlan::printDOT(raw_ostream &O) const {
784   VPlanPrinter Printer(O, *this);
785   Printer.dump();
786 }
787 
788 LLVM_DUMP_METHOD
789 void VPlan::dump() const { print(dbgs()); }
790 #endif
791 
792 void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
793   assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
794   LiveOuts.insert({PN, new VPLiveOut(PN, V)});
795 }
796 
797 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
798                                 BasicBlock *LoopLatchBB,
799                                 BasicBlock *LoopExitBB) {
800   // The vector body may be more than a single basic-block by this point.
801   // Update the dominator tree information inside the vector body by propagating
802   // it from header to latch, expecting only triangular control-flow, if any.
803   BasicBlock *PostDomSucc = nullptr;
804   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
805     // Get the list of successors of this block.
806     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
807     assert(Succs.size() <= 2 &&
808            "Basic block in vector loop has more than 2 successors.");
809     PostDomSucc = Succs[0];
810     if (Succs.size() == 1) {
811       assert(PostDomSucc->getSinglePredecessor() &&
812              "PostDom successor has more than one predecessor.");
813       DT->addNewBlock(PostDomSucc, BB);
814       continue;
815     }
816     BasicBlock *InterimSucc = Succs[1];
817     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
818       PostDomSucc = Succs[1];
819       InterimSucc = Succs[0];
820     }
821     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
822            "One successor of a basic block does not lead to the other.");
823     assert(InterimSucc->getSinglePredecessor() &&
824            "Interim successor has more than one predecessor.");
825     assert(PostDomSucc->hasNPredecessors(2) &&
826            "PostDom successor has more than two predecessors.");
827     DT->addNewBlock(InterimSucc, BB);
828     DT->addNewBlock(PostDomSucc, BB);
829   }
830   // Latch block is a new dominator for the loop exit.
831   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
832   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
833 }
834 
835 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
836 
837 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
838   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
839          Twine(getOrCreateBID(Block));
840 }
841 
842 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
843   const std::string &Name = Block->getName();
844   if (!Name.empty())
845     return Name;
846   return "VPB" + Twine(getOrCreateBID(Block));
847 }
848 
849 void VPlanPrinter::dump() {
850   Depth = 1;
851   bumpIndent(0);
852   OS << "digraph VPlan {\n";
853   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
854   if (!Plan.getName().empty())
855     OS << "\\n" << DOT::EscapeString(Plan.getName());
856   if (Plan.BackedgeTakenCount) {
857     OS << ", where:\\n";
858     Plan.BackedgeTakenCount->print(OS, SlotTracker);
859     OS << " := BackedgeTakenCount";
860   }
861   OS << "\"]\n";
862   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
863   OS << "edge [fontname=Courier, fontsize=30]\n";
864   OS << "compound=true\n";
865 
866   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
867     dumpBlock(Block);
868 
869   OS << "}\n";
870 }
871 
872 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
873   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
874     dumpBasicBlock(BasicBlock);
875   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
876     dumpRegion(Region);
877   else
878     llvm_unreachable("Unsupported kind of VPBlock.");
879 }
880 
881 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
882                             bool Hidden, const Twine &Label) {
883   // Due to "dot" we print an edge between two regions as an edge between the
884   // exiting basic block and the entry basic of the respective regions.
885   const VPBlockBase *Tail = From->getExitingBasicBlock();
886   const VPBlockBase *Head = To->getEntryBasicBlock();
887   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
888   OS << " [ label=\"" << Label << '\"';
889   if (Tail != From)
890     OS << " ltail=" << getUID(From);
891   if (Head != To)
892     OS << " lhead=" << getUID(To);
893   if (Hidden)
894     OS << "; splines=none";
895   OS << "]\n";
896 }
897 
898 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
899   auto &Successors = Block->getSuccessors();
900   if (Successors.size() == 1)
901     drawEdge(Block, Successors.front(), false, "");
902   else if (Successors.size() == 2) {
903     drawEdge(Block, Successors.front(), false, "T");
904     drawEdge(Block, Successors.back(), false, "F");
905   } else {
906     unsigned SuccessorNumber = 0;
907     for (auto *Successor : Successors)
908       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
909   }
910 }
911 
912 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
913   // Implement dot-formatted dump by performing plain-text dump into the
914   // temporary storage followed by some post-processing.
915   OS << Indent << getUID(BasicBlock) << " [label =\n";
916   bumpIndent(1);
917   std::string Str;
918   raw_string_ostream SS(Str);
919   // Use no indentation as we need to wrap the lines into quotes ourselves.
920   BasicBlock->print(SS, "", SlotTracker);
921 
922   // We need to process each line of the output separately, so split
923   // single-string plain-text dump.
924   SmallVector<StringRef, 0> Lines;
925   StringRef(Str).rtrim('\n').split(Lines, "\n");
926 
927   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
928     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
929   };
930 
931   // Don't need the "+" after the last line.
932   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
933     EmitLine(Line, " +\n");
934   EmitLine(Lines.back(), "\n");
935 
936   bumpIndent(-1);
937   OS << Indent << "]\n";
938 
939   dumpEdges(BasicBlock);
940 }
941 
942 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
943   OS << Indent << "subgraph " << getUID(Region) << " {\n";
944   bumpIndent(1);
945   OS << Indent << "fontname=Courier\n"
946      << Indent << "label=\""
947      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
948      << DOT::EscapeString(Region->getName()) << "\"\n";
949   // Dump the blocks of the region.
950   assert(Region->getEntry() && "Region contains no inner blocks.");
951   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
952     dumpBlock(Block);
953   bumpIndent(-1);
954   OS << Indent << "}\n";
955   dumpEdges(Region);
956 }
957 
958 void VPlanIngredient::print(raw_ostream &O) const {
959   if (auto *Inst = dyn_cast<Instruction>(V)) {
960     if (!Inst->getType()->isVoidTy()) {
961       Inst->printAsOperand(O, false);
962       O << " = ";
963     }
964     O << Inst->getOpcodeName() << " ";
965     unsigned E = Inst->getNumOperands();
966     if (E > 0) {
967       Inst->getOperand(0)->printAsOperand(O, false);
968       for (unsigned I = 1; I < E; ++I)
969         Inst->getOperand(I)->printAsOperand(O << ", ", false);
970     }
971   } else // !Inst
972     V->printAsOperand(O, false);
973 }
974 
975 #endif
976 
977 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
978 
979 void VPValue::replaceAllUsesWith(VPValue *New) {
980   for (unsigned J = 0; J < getNumUsers();) {
981     VPUser *User = Users[J];
982     unsigned NumUsers = getNumUsers();
983     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
984       if (User->getOperand(I) == this)
985         User->setOperand(I, New);
986     // If a user got removed after updating the current user, the next user to
987     // update will be moved to the current position, so we only need to
988     // increment the index if the number of users did not change.
989     if (NumUsers == getNumUsers())
990       J++;
991   }
992 }
993 
994 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
995 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
996   if (const Value *UV = getUnderlyingValue()) {
997     OS << "ir<";
998     UV->printAsOperand(OS, false);
999     OS << ">";
1000     return;
1001   }
1002 
1003   unsigned Slot = Tracker.getSlot(this);
1004   if (Slot == unsigned(-1))
1005     OS << "<badref>";
1006   else
1007     OS << "vp<%" << Tracker.getSlot(this) << ">";
1008 }
1009 
1010 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1011   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1012     Op->printAsOperand(O, SlotTracker);
1013   });
1014 }
1015 #endif
1016 
1017 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1018                                           Old2NewTy &Old2New,
1019                                           InterleavedAccessInfo &IAI) {
1020   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
1021   for (VPBlockBase *Base : RPOT) {
1022     visitBlock(Base, Old2New, IAI);
1023   }
1024 }
1025 
1026 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1027                                          InterleavedAccessInfo &IAI) {
1028   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1029     for (VPRecipeBase &VPI : *VPBB) {
1030       if (isa<VPHeaderPHIRecipe>(&VPI))
1031         continue;
1032       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1033       auto *VPInst = cast<VPInstruction>(&VPI);
1034 
1035       auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1036       if (!Inst)
1037         continue;
1038       auto *IG = IAI.getInterleaveGroup(Inst);
1039       if (!IG)
1040         continue;
1041 
1042       auto NewIGIter = Old2New.find(IG);
1043       if (NewIGIter == Old2New.end())
1044         Old2New[IG] = new InterleaveGroup<VPInstruction>(
1045             IG->getFactor(), IG->isReverse(), IG->getAlign());
1046 
1047       if (Inst == IG->getInsertPos())
1048         Old2New[IG]->setInsertPos(VPInst);
1049 
1050       InterleaveGroupMap[VPInst] = Old2New[IG];
1051       InterleaveGroupMap[VPInst]->insertMember(
1052           VPInst, IG->getIndex(Inst),
1053           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1054                                 : IG->getFactor()));
1055     }
1056   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1057     visitRegion(Region, Old2New, IAI);
1058   else
1059     llvm_unreachable("Unsupported kind of VPBlock.");
1060 }
1061 
1062 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1063                                                  InterleavedAccessInfo &IAI) {
1064   Old2NewTy Old2New;
1065   visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1066 }
1067 
1068 void VPSlotTracker::assignSlot(const VPValue *V) {
1069   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1070   Slots[V] = NextSlot++;
1071 }
1072 
1073 void VPSlotTracker::assignSlots(const VPlan &Plan) {
1074 
1075   for (const auto &P : Plan.VPExternalDefs)
1076     assignSlot(P.second);
1077 
1078   assignSlot(&Plan.VectorTripCount);
1079   if (Plan.BackedgeTakenCount)
1080     assignSlot(Plan.BackedgeTakenCount);
1081 
1082   ReversePostOrderTraversal<
1083       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
1084       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
1085           Plan.getEntry()));
1086   for (const VPBasicBlock *VPBB :
1087        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1088     for (const VPRecipeBase &Recipe : *VPBB)
1089       for (VPValue *Def : Recipe.definedValues())
1090         assignSlot(Def);
1091 }
1092 
1093 bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1094   return all_of(Def->users(),
1095                 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1096 }
1097 
1098 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1099                                                 ScalarEvolution &SE) {
1100   if (auto *E = dyn_cast<SCEVConstant>(Expr))
1101     return Plan.getOrAddExternalDef(E->getValue());
1102   if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1103     return Plan.getOrAddExternalDef(E->getValue());
1104 
1105   VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1106   VPValue *Step = new VPExpandSCEVRecipe(Expr, SE);
1107   Preheader->appendRecipe(cast<VPRecipeBase>(Step->getDef()));
1108   return Step;
1109 }
1110