xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlan.cpp (revision be092bcde96bdcfde9013d60e442cca023bfbd1b)
1  //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  ///
9  /// \file
10  /// This is the LLVM vectorization plan. It represents a candidate for
11  /// vectorization, allowing to plan and optimize how to vectorize a given loop
12  /// before generating LLVM-IR.
13  /// The vectorizer uses vectorization plans to estimate the costs of potential
14  /// candidates and if profitable to execute the desired plan, generating vector
15  /// LLVM-IR code.
16  ///
17  //===----------------------------------------------------------------------===//
18  
19  #include "VPlan.h"
20  #include "VPlanCFG.h"
21  #include "VPlanDominatorTree.h"
22  #include "llvm/ADT/DepthFirstIterator.h"
23  #include "llvm/ADT/PostOrderIterator.h"
24  #include "llvm/ADT/STLExtras.h"
25  #include "llvm/ADT/SmallVector.h"
26  #include "llvm/ADT/Twine.h"
27  #include "llvm/Analysis/LoopInfo.h"
28  #include "llvm/IR/BasicBlock.h"
29  #include "llvm/IR/CFG.h"
30  #include "llvm/IR/IRBuilder.h"
31  #include "llvm/IR/Instruction.h"
32  #include "llvm/IR/Instructions.h"
33  #include "llvm/IR/Type.h"
34  #include "llvm/IR/Value.h"
35  #include "llvm/Support/Casting.h"
36  #include "llvm/Support/CommandLine.h"
37  #include "llvm/Support/Debug.h"
38  #include "llvm/Support/GenericDomTreeConstruction.h"
39  #include "llvm/Support/GraphWriter.h"
40  #include "llvm/Support/raw_ostream.h"
41  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42  #include "llvm/Transforms/Utils/LoopVersioning.h"
43  #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
44  #include <cassert>
45  #include <string>
46  #include <vector>
47  
48  using namespace llvm;
49  extern cl::opt<bool> EnableVPlanNativePath;
50  
51  #define DEBUG_TYPE "vplan"
52  
53  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
54  raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
55    const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
56    VPSlotTracker SlotTracker(
57        (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
58    V.print(OS, SlotTracker);
59    return OS;
60  }
61  #endif
62  
63  Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
64                                  const ElementCount &VF) const {
65    switch (LaneKind) {
66    case VPLane::Kind::ScalableLast:
67      // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
68      return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
69                               Builder.getInt32(VF.getKnownMinValue() - Lane));
70    case VPLane::Kind::First:
71      return Builder.getInt32(Lane);
72    }
73    llvm_unreachable("Unknown lane kind");
74  }
75  
76  VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
77      : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
78    if (Def)
79      Def->addDefinedValue(this);
80  }
81  
82  VPValue::~VPValue() {
83    assert(Users.empty() && "trying to delete a VPValue with remaining users");
84    if (Def)
85      Def->removeDefinedValue(this);
86  }
87  
88  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89  void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
90    if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
91      R->print(OS, "", SlotTracker);
92    else
93      printAsOperand(OS, SlotTracker);
94  }
95  
96  void VPValue::dump() const {
97    const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
98    VPSlotTracker SlotTracker(
99        (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
100    print(dbgs(), SlotTracker);
101    dbgs() << "\n";
102  }
103  
104  void VPDef::dump() const {
105    const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
106    VPSlotTracker SlotTracker(
107        (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
108    print(dbgs(), "", SlotTracker);
109    dbgs() << "\n";
110  }
111  #endif
112  
113  VPRecipeBase *VPValue::getDefiningRecipe() {
114    return cast_or_null<VPRecipeBase>(Def);
115  }
116  
117  const VPRecipeBase *VPValue::getDefiningRecipe() const {
118    return cast_or_null<VPRecipeBase>(Def);
119  }
120  
121  // Get the top-most entry block of \p Start. This is the entry block of the
122  // containing VPlan. This function is templated to support both const and non-const blocks
123  template <typename T> static T *getPlanEntry(T *Start) {
124    T *Next = Start;
125    T *Current = Start;
126    while ((Next = Next->getParent()))
127      Current = Next;
128  
129    SmallSetVector<T *, 8> WorkList;
130    WorkList.insert(Current);
131  
132    for (unsigned i = 0; i < WorkList.size(); i++) {
133      T *Current = WorkList[i];
134      if (Current->getNumPredecessors() == 0)
135        return Current;
136      auto &Predecessors = Current->getPredecessors();
137      WorkList.insert(Predecessors.begin(), Predecessors.end());
138    }
139  
140    llvm_unreachable("VPlan without any entry node without predecessors");
141  }
142  
143  VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
144  
145  const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
146  
147  /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
148  const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
149    const VPBlockBase *Block = this;
150    while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
151      Block = Region->getEntry();
152    return cast<VPBasicBlock>(Block);
153  }
154  
155  VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
156    VPBlockBase *Block = this;
157    while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
158      Block = Region->getEntry();
159    return cast<VPBasicBlock>(Block);
160  }
161  
162  void VPBlockBase::setPlan(VPlan *ParentPlan) {
163    assert(ParentPlan->getEntry() == this &&
164           "Can only set plan on its entry block.");
165    Plan = ParentPlan;
166  }
167  
168  /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
169  const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const {
170    const VPBlockBase *Block = this;
171    while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
172      Block = Region->getExiting();
173    return cast<VPBasicBlock>(Block);
174  }
175  
176  VPBasicBlock *VPBlockBase::getExitingBasicBlock() {
177    VPBlockBase *Block = this;
178    while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
179      Block = Region->getExiting();
180    return cast<VPBasicBlock>(Block);
181  }
182  
183  VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
184    if (!Successors.empty() || !Parent)
185      return this;
186    assert(Parent->getExiting() == this &&
187           "Block w/o successors not the exiting block of its parent.");
188    return Parent->getEnclosingBlockWithSuccessors();
189  }
190  
191  VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
192    if (!Predecessors.empty() || !Parent)
193      return this;
194    assert(Parent->getEntry() == this &&
195           "Block w/o predecessors not the entry of its parent.");
196    return Parent->getEnclosingBlockWithPredecessors();
197  }
198  
199  void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
200    for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry)))
201      delete Block;
202  }
203  
204  VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
205    iterator It = begin();
206    while (It != end() && It->isPhi())
207      It++;
208    return It;
209  }
210  
211  Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
212    if (!Def->hasDefiningRecipe())
213      return Def->getLiveInIRValue();
214  
215    if (hasScalarValue(Def, Instance)) {
216      return Data
217          .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
218    }
219  
220    assert(hasVectorValue(Def, Instance.Part));
221    auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
222    if (!VecPart->getType()->isVectorTy()) {
223      assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
224      return VecPart;
225    }
226    // TODO: Cache created scalar values.
227    Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
228    auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
229    // set(Def, Extract, Instance);
230    return Extract;
231  }
232  BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
233    VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
234    return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
235  }
236  
237  void VPTransformState::addNewMetadata(Instruction *To,
238                                        const Instruction *Orig) {
239    // If the loop was versioned with memchecks, add the corresponding no-alias
240    // metadata.
241    if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
242      LVer->annotateInstWithNoAlias(To, Orig);
243  }
244  
245  void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
246    propagateMetadata(To, From);
247    addNewMetadata(To, From);
248  }
249  
250  void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
251    for (Value *V : To) {
252      if (Instruction *I = dyn_cast<Instruction>(V))
253        addMetadata(I, From);
254    }
255  }
256  
257  void VPTransformState::setDebugLocFromInst(const Value *V) {
258    const Instruction *Inst = dyn_cast<Instruction>(V);
259    if (!Inst) {
260      Builder.SetCurrentDebugLocation(DebugLoc());
261      return;
262    }
263  
264    const DILocation *DIL = Inst->getDebugLoc();
265    // When a FSDiscriminator is enabled, we don't need to add the multiply
266    // factors to the discriminators.
267    if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
268        !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
269      // FIXME: For scalable vectors, assume vscale=1.
270      auto NewDIL =
271          DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
272      if (NewDIL)
273        Builder.SetCurrentDebugLocation(*NewDIL);
274      else
275        LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
276                          << DIL->getFilename() << " Line: " << DIL->getLine());
277    } else
278      Builder.SetCurrentDebugLocation(DIL);
279  }
280  
281  BasicBlock *
282  VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
283    // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
284    // Pred stands for Predessor. Prev stands for Previous - last visited/created.
285    BasicBlock *PrevBB = CFG.PrevBB;
286    BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
287                                           PrevBB->getParent(), CFG.ExitBB);
288    LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
289  
290    // Hook up the new basic block to its predecessors.
291    for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
292      VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
293      auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
294      BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
295  
296      assert(PredBB && "Predecessor basic-block not found building successor.");
297      auto *PredBBTerminator = PredBB->getTerminator();
298      LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
299  
300      auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
301      if (isa<UnreachableInst>(PredBBTerminator)) {
302        assert(PredVPSuccessors.size() == 1 &&
303               "Predecessor ending w/o branch must have single successor.");
304        DebugLoc DL = PredBBTerminator->getDebugLoc();
305        PredBBTerminator->eraseFromParent();
306        auto *Br = BranchInst::Create(NewBB, PredBB);
307        Br->setDebugLoc(DL);
308      } else if (TermBr && !TermBr->isConditional()) {
309        TermBr->setSuccessor(0, NewBB);
310      } else {
311        // Set each forward successor here when it is created, excluding
312        // backedges. A backward successor is set when the branch is created.
313        unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
314        assert(!TermBr->getSuccessor(idx) &&
315               "Trying to reset an existing successor block.");
316        TermBr->setSuccessor(idx, NewBB);
317      }
318    }
319    return NewBB;
320  }
321  
322  void VPBasicBlock::execute(VPTransformState *State) {
323    bool Replica = State->Instance && !State->Instance->isFirstIteration();
324    VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
325    VPBlockBase *SingleHPred = nullptr;
326    BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
327  
328    auto IsLoopRegion = [](VPBlockBase *BB) {
329      auto *R = dyn_cast<VPRegionBlock>(BB);
330      return R && !R->isReplicator();
331    };
332  
333    // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
334    if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
335      // ExitBB can be re-used for the exit block of the Plan.
336      NewBB = State->CFG.ExitBB;
337      State->CFG.PrevBB = NewBB;
338  
339      // Update the branch instruction in the predecessor to branch to ExitBB.
340      VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
341      VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
342      assert(PredVPB->getSingleSuccessor() == this &&
343             "predecessor must have the current block as only successor");
344      BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
345      // The Exit block of a loop is always set to be successor 0 of the Exiting
346      // block.
347      cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
348    } else if (PrevVPBB && /* A */
349               !((SingleHPred = getSingleHierarchicalPredecessor()) &&
350                 SingleHPred->getExitingBasicBlock() == PrevVPBB &&
351                 PrevVPBB->getSingleHierarchicalSuccessor() &&
352                 (SingleHPred->getParent() == getEnclosingLoopRegion() &&
353                  !IsLoopRegion(SingleHPred))) &&         /* B */
354               !(Replica && getPredecessors().empty())) { /* C */
355      // The last IR basic block is reused, as an optimization, in three cases:
356      // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
357      // B. when the current VPBB has a single (hierarchical) predecessor which
358      //    is PrevVPBB and the latter has a single (hierarchical) successor which
359      //    both are in the same non-replicator region; and
360      // C. when the current VPBB is an entry of a region replica - where PrevVPBB
361      //    is the exiting VPBB of this region from a previous instance, or the
362      //    predecessor of this region.
363  
364      NewBB = createEmptyBasicBlock(State->CFG);
365      State->Builder.SetInsertPoint(NewBB);
366      // Temporarily terminate with unreachable until CFG is rewired.
367      UnreachableInst *Terminator = State->Builder.CreateUnreachable();
368      // Register NewBB in its loop. In innermost loops its the same for all
369      // BB's.
370      if (State->CurrentVectorLoop)
371        State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
372      State->Builder.SetInsertPoint(Terminator);
373      State->CFG.PrevBB = NewBB;
374    }
375  
376    // 2. Fill the IR basic block with IR instructions.
377    LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
378                      << " in BB:" << NewBB->getName() << '\n');
379  
380    State->CFG.VPBB2IRBB[this] = NewBB;
381    State->CFG.PrevVPBB = this;
382  
383    for (VPRecipeBase &Recipe : Recipes)
384      Recipe.execute(*State);
385  
386    LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
387  }
388  
389  void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
390    for (VPRecipeBase &R : Recipes) {
391      for (auto *Def : R.definedValues())
392        Def->replaceAllUsesWith(NewValue);
393  
394      for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
395        R.setOperand(I, NewValue);
396    }
397  }
398  
399  VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
400    assert((SplitAt == end() || SplitAt->getParent() == this) &&
401           "can only split at a position in the same block");
402  
403    SmallVector<VPBlockBase *, 2> Succs(successors());
404    // First, disconnect the current block from its successors.
405    for (VPBlockBase *Succ : Succs)
406      VPBlockUtils::disconnectBlocks(this, Succ);
407  
408    // Create new empty block after the block to split.
409    auto *SplitBlock = new VPBasicBlock(getName() + ".split");
410    VPBlockUtils::insertBlockAfter(SplitBlock, this);
411  
412    // Add successors for block to split to new block.
413    for (VPBlockBase *Succ : Succs)
414      VPBlockUtils::connectBlocks(SplitBlock, Succ);
415  
416    // Finally, move the recipes starting at SplitAt to new block.
417    for (VPRecipeBase &ToMove :
418         make_early_inc_range(make_range(SplitAt, this->end())))
419      ToMove.moveBefore(*SplitBlock, SplitBlock->end());
420  
421    return SplitBlock;
422  }
423  
424  VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
425    VPRegionBlock *P = getParent();
426    if (P && P->isReplicator()) {
427      P = P->getParent();
428      assert(!cast<VPRegionBlock>(P)->isReplicator() &&
429             "unexpected nested replicate regions");
430    }
431    return P;
432  }
433  
434  static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
435    if (VPBB->empty()) {
436      assert(
437          VPBB->getNumSuccessors() < 2 &&
438          "block with multiple successors doesn't have a recipe as terminator");
439      return false;
440    }
441  
442    const VPRecipeBase *R = &VPBB->back();
443    auto *VPI = dyn_cast<VPInstruction>(R);
444    bool IsCondBranch =
445        isa<VPBranchOnMaskRecipe>(R) ||
446        (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
447                 VPI->getOpcode() == VPInstruction::BranchOnCount));
448    (void)IsCondBranch;
449  
450    if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
451      assert(IsCondBranch && "block with multiple successors not terminated by "
452                             "conditional branch recipe");
453  
454      return true;
455    }
456  
457    assert(
458        !IsCondBranch &&
459        "block with 0 or 1 successors terminated by conditional branch recipe");
460    return false;
461  }
462  
463  VPRecipeBase *VPBasicBlock::getTerminator() {
464    if (hasConditionalTerminator(this))
465      return &back();
466    return nullptr;
467  }
468  
469  const VPRecipeBase *VPBasicBlock::getTerminator() const {
470    if (hasConditionalTerminator(this))
471      return &back();
472    return nullptr;
473  }
474  
475  bool VPBasicBlock::isExiting() const {
476    return getParent()->getExitingBasicBlock() == this;
477  }
478  
479  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
480  void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
481    if (getSuccessors().empty()) {
482      O << Indent << "No successors\n";
483    } else {
484      O << Indent << "Successor(s): ";
485      ListSeparator LS;
486      for (auto *Succ : getSuccessors())
487        O << LS << Succ->getName();
488      O << '\n';
489    }
490  }
491  
492  void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
493                           VPSlotTracker &SlotTracker) const {
494    O << Indent << getName() << ":\n";
495  
496    auto RecipeIndent = Indent + "  ";
497    for (const VPRecipeBase &Recipe : *this) {
498      Recipe.print(O, RecipeIndent, SlotTracker);
499      O << '\n';
500    }
501  
502    printSuccessors(O, Indent);
503  }
504  #endif
505  
506  void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
507    for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
508      // Drop all references in VPBasicBlocks and replace all uses with
509      // DummyValue.
510      Block->dropAllReferences(NewValue);
511  }
512  
513  void VPRegionBlock::execute(VPTransformState *State) {
514    ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
515        RPOT(Entry);
516  
517    if (!isReplicator()) {
518      // Create and register the new vector loop.
519      Loop *PrevLoop = State->CurrentVectorLoop;
520      State->CurrentVectorLoop = State->LI->AllocateLoop();
521      BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
522      Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
523  
524      // Insert the new loop into the loop nest and register the new basic blocks
525      // before calling any utilities such as SCEV that require valid LoopInfo.
526      if (ParentLoop)
527        ParentLoop->addChildLoop(State->CurrentVectorLoop);
528      else
529        State->LI->addTopLevelLoop(State->CurrentVectorLoop);
530  
531      // Visit the VPBlocks connected to "this", starting from it.
532      for (VPBlockBase *Block : RPOT) {
533        LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
534        Block->execute(State);
535      }
536  
537      State->CurrentVectorLoop = PrevLoop;
538      return;
539    }
540  
541    assert(!State->Instance && "Replicating a Region with non-null instance.");
542  
543    // Enter replicating mode.
544    State->Instance = VPIteration(0, 0);
545  
546    for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
547      State->Instance->Part = Part;
548      assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
549      for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
550           ++Lane) {
551        State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
552        // Visit the VPBlocks connected to \p this, starting from it.
553        for (VPBlockBase *Block : RPOT) {
554          LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
555          Block->execute(State);
556        }
557      }
558    }
559  
560    // Exit replicating mode.
561    State->Instance.reset();
562  }
563  
564  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
565  void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
566                            VPSlotTracker &SlotTracker) const {
567    O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
568    auto NewIndent = Indent + "  ";
569    for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
570      O << '\n';
571      BlockBase->print(O, NewIndent, SlotTracker);
572    }
573    O << Indent << "}\n";
574  
575    printSuccessors(O, Indent);
576  }
577  #endif
578  
579  VPlan::~VPlan() {
580    clearLiveOuts();
581  
582    if (Entry) {
583      VPValue DummyValue;
584      for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
585        Block->dropAllReferences(&DummyValue);
586  
587      VPBlockBase::deleteCFG(Entry);
588    }
589    for (VPValue *VPV : VPValuesToFree)
590      delete VPV;
591    if (TripCount)
592      delete TripCount;
593    if (BackedgeTakenCount)
594      delete BackedgeTakenCount;
595    for (auto &P : VPExternalDefs)
596      delete P.second;
597  }
598  
599  VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
600    VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
601    for (VPRecipeBase &R : Header->phis()) {
602      if (isa<VPActiveLaneMaskPHIRecipe>(&R))
603        return cast<VPActiveLaneMaskPHIRecipe>(&R);
604    }
605    return nullptr;
606  }
607  
608  void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
609                               Value *CanonicalIVStartValue,
610                               VPTransformState &State,
611                               bool IsEpilogueVectorization) {
612  
613    // Check if the trip count is needed, and if so build it.
614    if (TripCount && TripCount->getNumUsers()) {
615      for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
616        State.set(TripCount, TripCountV, Part);
617    }
618  
619    // Check if the backedge taken count is needed, and if so build it.
620    if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
621      IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
622      auto *TCMO = Builder.CreateSub(TripCountV,
623                                     ConstantInt::get(TripCountV->getType(), 1),
624                                     "trip.count.minus.1");
625      auto VF = State.VF;
626      Value *VTCMO =
627          VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
628      for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
629        State.set(BackedgeTakenCount, VTCMO, Part);
630    }
631  
632    for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
633      State.set(&VectorTripCount, VectorTripCountV, Part);
634  
635    // When vectorizing the epilogue loop, the canonical induction start value
636    // needs to be changed from zero to the value after the main vector loop.
637    // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
638    if (CanonicalIVStartValue) {
639      VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
640      auto *IV = getCanonicalIV();
641      assert(all_of(IV->users(),
642                    [](const VPUser *U) {
643                      if (isa<VPScalarIVStepsRecipe>(U) ||
644                          isa<VPDerivedIVRecipe>(U))
645                        return true;
646                      auto *VPI = cast<VPInstruction>(U);
647                      return VPI->getOpcode() ==
648                                 VPInstruction::CanonicalIVIncrement ||
649                             VPI->getOpcode() ==
650                                 VPInstruction::CanonicalIVIncrementNUW;
651                    }) &&
652             "the canonical IV should only be used by its increments or "
653             "ScalarIVSteps when "
654             "resetting the start value");
655      IV->setOperand(0, VPV);
656    }
657  }
658  
659  /// Generate the code inside the preheader and body of the vectorized loop.
660  /// Assumes a single pre-header basic-block was created for this. Introduce
661  /// additional basic-blocks as needed, and fill them all.
662  void VPlan::execute(VPTransformState *State) {
663    // Set the reverse mapping from VPValues to Values for code generation.
664    for (auto &Entry : Value2VPValue)
665      State->VPValue2Value[Entry.second] = Entry.first;
666  
667    // Initialize CFG state.
668    State->CFG.PrevVPBB = nullptr;
669    State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
670    BasicBlock *VectorPreHeader = State->CFG.PrevBB;
671    State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
672  
673    // Generate code in the loop pre-header and body.
674    for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
675      Block->execute(State);
676  
677    VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
678    BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
679  
680    // Fix the latch value of canonical, reduction and first-order recurrences
681    // phis in the vector loop.
682    VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
683    for (VPRecipeBase &R : Header->phis()) {
684      // Skip phi-like recipes that generate their backedege values themselves.
685      if (isa<VPWidenPHIRecipe>(&R))
686        continue;
687  
688      if (isa<VPWidenPointerInductionRecipe>(&R) ||
689          isa<VPWidenIntOrFpInductionRecipe>(&R)) {
690        PHINode *Phi = nullptr;
691        if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
692          Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
693        } else {
694          auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
695          // TODO: Split off the case that all users of a pointer phi are scalar
696          // from the VPWidenPointerInductionRecipe.
697          if (WidenPhi->onlyScalarsGenerated(State->VF))
698            continue;
699  
700          auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
701          Phi = cast<PHINode>(GEP->getPointerOperand());
702        }
703  
704        Phi->setIncomingBlock(1, VectorLatchBB);
705  
706        // Move the last step to the end of the latch block. This ensures
707        // consistent placement of all induction updates.
708        Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
709        Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
710        continue;
711      }
712  
713      auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
714      // For  canonical IV, first-order recurrences and in-order reduction phis,
715      // only a single part is generated, which provides the last part from the
716      // previous iteration. For non-ordered reductions all UF parts are
717      // generated.
718      bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
719                              isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
720                              (isa<VPReductionPHIRecipe>(PhiR) &&
721                               cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
722      unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
723  
724      for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
725        Value *Phi = State->get(PhiR, Part);
726        Value *Val = State->get(PhiR->getBackedgeValue(),
727                                SinglePartNeeded ? State->UF - 1 : Part);
728        cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
729      }
730    }
731  
732    // We do not attempt to preserve DT for outer loop vectorization currently.
733    if (!EnableVPlanNativePath) {
734      BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
735      State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
736      updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
737                          State->CFG.ExitBB);
738    }
739  }
740  
741  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
742  LLVM_DUMP_METHOD
743  void VPlan::print(raw_ostream &O) const {
744    VPSlotTracker SlotTracker(this);
745  
746    O << "VPlan '" << getName() << "' {";
747  
748    if (VectorTripCount.getNumUsers() > 0) {
749      O << "\nLive-in ";
750      VectorTripCount.printAsOperand(O, SlotTracker);
751      O << " = vector-trip-count\n";
752    }
753  
754    if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
755      O << "\nLive-in ";
756      BackedgeTakenCount->printAsOperand(O, SlotTracker);
757      O << " = backedge-taken count\n";
758    }
759  
760    for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
761      O << '\n';
762      Block->print(O, "", SlotTracker);
763    }
764  
765    if (!LiveOuts.empty())
766      O << "\n";
767    for (const auto &KV : LiveOuts) {
768      O << "Live-out ";
769      KV.second->getPhi()->printAsOperand(O);
770      O << " = ";
771      KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
772      O << "\n";
773    }
774  
775    O << "}\n";
776  }
777  
778  std::string VPlan::getName() const {
779    std::string Out;
780    raw_string_ostream RSO(Out);
781    RSO << Name << " for ";
782    if (!VFs.empty()) {
783      RSO << "VF={" << VFs[0];
784      for (ElementCount VF : drop_begin(VFs))
785        RSO << "," << VF;
786      RSO << "},";
787    }
788  
789    if (UFs.empty()) {
790      RSO << "UF>=1";
791    } else {
792      RSO << "UF={" << UFs[0];
793      for (unsigned UF : drop_begin(UFs))
794        RSO << "," << UF;
795      RSO << "}";
796    }
797  
798    return Out;
799  }
800  
801  LLVM_DUMP_METHOD
802  void VPlan::printDOT(raw_ostream &O) const {
803    VPlanPrinter Printer(O, *this);
804    Printer.dump();
805  }
806  
807  LLVM_DUMP_METHOD
808  void VPlan::dump() const { print(dbgs()); }
809  #endif
810  
811  void VPlan::addLiveOut(PHINode *PN, VPValue *V) {
812    assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
813    LiveOuts.insert({PN, new VPLiveOut(PN, V)});
814  }
815  
816  void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
817                                  BasicBlock *LoopLatchBB,
818                                  BasicBlock *LoopExitBB) {
819    // The vector body may be more than a single basic-block by this point.
820    // Update the dominator tree information inside the vector body by propagating
821    // it from header to latch, expecting only triangular control-flow, if any.
822    BasicBlock *PostDomSucc = nullptr;
823    for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
824      // Get the list of successors of this block.
825      std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
826      assert(Succs.size() <= 2 &&
827             "Basic block in vector loop has more than 2 successors.");
828      PostDomSucc = Succs[0];
829      if (Succs.size() == 1) {
830        assert(PostDomSucc->getSinglePredecessor() &&
831               "PostDom successor has more than one predecessor.");
832        DT->addNewBlock(PostDomSucc, BB);
833        continue;
834      }
835      BasicBlock *InterimSucc = Succs[1];
836      if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
837        PostDomSucc = Succs[1];
838        InterimSucc = Succs[0];
839      }
840      assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
841             "One successor of a basic block does not lead to the other.");
842      assert(InterimSucc->getSinglePredecessor() &&
843             "Interim successor has more than one predecessor.");
844      assert(PostDomSucc->hasNPredecessors(2) &&
845             "PostDom successor has more than two predecessors.");
846      DT->addNewBlock(InterimSucc, BB);
847      DT->addNewBlock(PostDomSucc, BB);
848    }
849    // Latch block is a new dominator for the loop exit.
850    DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
851    assert(DT->verify(DominatorTree::VerificationLevel::Fast));
852  }
853  
854  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
855  
856  Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
857    return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
858           Twine(getOrCreateBID(Block));
859  }
860  
861  Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
862    const std::string &Name = Block->getName();
863    if (!Name.empty())
864      return Name;
865    return "VPB" + Twine(getOrCreateBID(Block));
866  }
867  
868  void VPlanPrinter::dump() {
869    Depth = 1;
870    bumpIndent(0);
871    OS << "digraph VPlan {\n";
872    OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
873    if (!Plan.getName().empty())
874      OS << "\\n" << DOT::EscapeString(Plan.getName());
875    if (Plan.BackedgeTakenCount) {
876      OS << ", where:\\n";
877      Plan.BackedgeTakenCount->print(OS, SlotTracker);
878      OS << " := BackedgeTakenCount";
879    }
880    OS << "\"]\n";
881    OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
882    OS << "edge [fontname=Courier, fontsize=30]\n";
883    OS << "compound=true\n";
884  
885    for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
886      dumpBlock(Block);
887  
888    OS << "}\n";
889  }
890  
891  void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
892    if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
893      dumpBasicBlock(BasicBlock);
894    else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
895      dumpRegion(Region);
896    else
897      llvm_unreachable("Unsupported kind of VPBlock.");
898  }
899  
900  void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
901                              bool Hidden, const Twine &Label) {
902    // Due to "dot" we print an edge between two regions as an edge between the
903    // exiting basic block and the entry basic of the respective regions.
904    const VPBlockBase *Tail = From->getExitingBasicBlock();
905    const VPBlockBase *Head = To->getEntryBasicBlock();
906    OS << Indent << getUID(Tail) << " -> " << getUID(Head);
907    OS << " [ label=\"" << Label << '\"';
908    if (Tail != From)
909      OS << " ltail=" << getUID(From);
910    if (Head != To)
911      OS << " lhead=" << getUID(To);
912    if (Hidden)
913      OS << "; splines=none";
914    OS << "]\n";
915  }
916  
917  void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
918    auto &Successors = Block->getSuccessors();
919    if (Successors.size() == 1)
920      drawEdge(Block, Successors.front(), false, "");
921    else if (Successors.size() == 2) {
922      drawEdge(Block, Successors.front(), false, "T");
923      drawEdge(Block, Successors.back(), false, "F");
924    } else {
925      unsigned SuccessorNumber = 0;
926      for (auto *Successor : Successors)
927        drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
928    }
929  }
930  
931  void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
932    // Implement dot-formatted dump by performing plain-text dump into the
933    // temporary storage followed by some post-processing.
934    OS << Indent << getUID(BasicBlock) << " [label =\n";
935    bumpIndent(1);
936    std::string Str;
937    raw_string_ostream SS(Str);
938    // Use no indentation as we need to wrap the lines into quotes ourselves.
939    BasicBlock->print(SS, "", SlotTracker);
940  
941    // We need to process each line of the output separately, so split
942    // single-string plain-text dump.
943    SmallVector<StringRef, 0> Lines;
944    StringRef(Str).rtrim('\n').split(Lines, "\n");
945  
946    auto EmitLine = [&](StringRef Line, StringRef Suffix) {
947      OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
948    };
949  
950    // Don't need the "+" after the last line.
951    for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
952      EmitLine(Line, " +\n");
953    EmitLine(Lines.back(), "\n");
954  
955    bumpIndent(-1);
956    OS << Indent << "]\n";
957  
958    dumpEdges(BasicBlock);
959  }
960  
961  void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
962    OS << Indent << "subgraph " << getUID(Region) << " {\n";
963    bumpIndent(1);
964    OS << Indent << "fontname=Courier\n"
965       << Indent << "label=\""
966       << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
967       << DOT::EscapeString(Region->getName()) << "\"\n";
968    // Dump the blocks of the region.
969    assert(Region->getEntry() && "Region contains no inner blocks.");
970    for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
971      dumpBlock(Block);
972    bumpIndent(-1);
973    OS << Indent << "}\n";
974    dumpEdges(Region);
975  }
976  
977  void VPlanIngredient::print(raw_ostream &O) const {
978    if (auto *Inst = dyn_cast<Instruction>(V)) {
979      if (!Inst->getType()->isVoidTy()) {
980        Inst->printAsOperand(O, false);
981        O << " = ";
982      }
983      O << Inst->getOpcodeName() << " ";
984      unsigned E = Inst->getNumOperands();
985      if (E > 0) {
986        Inst->getOperand(0)->printAsOperand(O, false);
987        for (unsigned I = 1; I < E; ++I)
988          Inst->getOperand(I)->printAsOperand(O << ", ", false);
989      }
990    } else // !Inst
991      V->printAsOperand(O, false);
992  }
993  
994  #endif
995  
996  template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
997  
998  void VPValue::replaceAllUsesWith(VPValue *New) {
999    for (unsigned J = 0; J < getNumUsers();) {
1000      VPUser *User = Users[J];
1001      unsigned NumUsers = getNumUsers();
1002      for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1003        if (User->getOperand(I) == this)
1004          User->setOperand(I, New);
1005      // If a user got removed after updating the current user, the next user to
1006      // update will be moved to the current position, so we only need to
1007      // increment the index if the number of users did not change.
1008      if (NumUsers == getNumUsers())
1009        J++;
1010    }
1011  }
1012  
1013  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1014  void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
1015    if (const Value *UV = getUnderlyingValue()) {
1016      OS << "ir<";
1017      UV->printAsOperand(OS, false);
1018      OS << ">";
1019      return;
1020    }
1021  
1022    unsigned Slot = Tracker.getSlot(this);
1023    if (Slot == unsigned(-1))
1024      OS << "<badref>";
1025    else
1026      OS << "vp<%" << Tracker.getSlot(this) << ">";
1027  }
1028  
1029  void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
1030    interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1031      Op->printAsOperand(O, SlotTracker);
1032    });
1033  }
1034  #endif
1035  
1036  void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1037                                            Old2NewTy &Old2New,
1038                                            InterleavedAccessInfo &IAI) {
1039    ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
1040        RPOT(Region->getEntry());
1041    for (VPBlockBase *Base : RPOT) {
1042      visitBlock(Base, Old2New, IAI);
1043    }
1044  }
1045  
1046  void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1047                                           InterleavedAccessInfo &IAI) {
1048    if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1049      for (VPRecipeBase &VPI : *VPBB) {
1050        if (isa<VPHeaderPHIRecipe>(&VPI))
1051          continue;
1052        assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1053        auto *VPInst = cast<VPInstruction>(&VPI);
1054  
1055        auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1056        if (!Inst)
1057          continue;
1058        auto *IG = IAI.getInterleaveGroup(Inst);
1059        if (!IG)
1060          continue;
1061  
1062        auto NewIGIter = Old2New.find(IG);
1063        if (NewIGIter == Old2New.end())
1064          Old2New[IG] = new InterleaveGroup<VPInstruction>(
1065              IG->getFactor(), IG->isReverse(), IG->getAlign());
1066  
1067        if (Inst == IG->getInsertPos())
1068          Old2New[IG]->setInsertPos(VPInst);
1069  
1070        InterleaveGroupMap[VPInst] = Old2New[IG];
1071        InterleaveGroupMap[VPInst]->insertMember(
1072            VPInst, IG->getIndex(Inst),
1073            Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1074                                  : IG->getFactor()));
1075      }
1076    } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1077      visitRegion(Region, Old2New, IAI);
1078    else
1079      llvm_unreachable("Unsupported kind of VPBlock.");
1080  }
1081  
1082  VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
1083                                                   InterleavedAccessInfo &IAI) {
1084    Old2NewTy Old2New;
1085    visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1086  }
1087  
1088  void VPSlotTracker::assignSlot(const VPValue *V) {
1089    assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
1090    Slots[V] = NextSlot++;
1091  }
1092  
1093  void VPSlotTracker::assignSlots(const VPlan &Plan) {
1094  
1095    for (const auto &P : Plan.VPExternalDefs)
1096      assignSlot(P.second);
1097  
1098    assignSlot(&Plan.VectorTripCount);
1099    if (Plan.BackedgeTakenCount)
1100      assignSlot(Plan.BackedgeTakenCount);
1101  
1102    ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1103        RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1104    for (const VPBasicBlock *VPBB :
1105         VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1106      for (const VPRecipeBase &Recipe : *VPBB)
1107        for (VPValue *Def : Recipe.definedValues())
1108          assignSlot(Def);
1109  }
1110  
1111  bool vputils::onlyFirstLaneUsed(VPValue *Def) {
1112    return all_of(Def->users(),
1113                  [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1114  }
1115  
1116  VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
1117                                                  ScalarEvolution &SE) {
1118    if (auto *E = dyn_cast<SCEVConstant>(Expr))
1119      return Plan.getOrAddExternalDef(E->getValue());
1120    if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1121      return Plan.getOrAddExternalDef(E->getValue());
1122  
1123    VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1124    VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE);
1125    Preheader->appendRecipe(Step);
1126    return Step;
1127  }
1128