xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements explicit unrolling for VPlans.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "VPRecipeBuilder.h"
15 #include "VPlan.h"
16 #include "VPlanAnalysis.h"
17 #include "VPlanCFG.h"
18 #include "VPlanHelpers.h"
19 #include "VPlanPatternMatch.h"
20 #include "VPlanTransforms.h"
21 #include "VPlanUtils.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Analysis/IVDescriptors.h"
26 #include "llvm/IR/Intrinsics.h"
27 
28 using namespace llvm;
29 using namespace llvm::VPlanPatternMatch;
30 
31 namespace {
32 
33 /// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34 /// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35 /// the unrolling transformation, where the original VPValues are retained for
36 /// part zero.
37 class UnrollState {
38   /// Plan to unroll.
39   VPlan &Plan;
40   /// Unroll factor to unroll by.
41   const unsigned UF;
42   /// Analysis for types.
43   VPTypeAnalysis TypeInfo;
44 
45   /// Unrolling may create recipes that should not be unrolled themselves.
46   /// Those are tracked in ToSkip.
47   SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48 
49   // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50   // ..., UF-1.
51   DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52 
53   /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54   void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55 
56   /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57   /// all parts.
58   void unrollRecipeByUF(VPRecipeBase &R);
59 
60   /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61   /// depends on the concrete header phi. Inserts newly created recipes at \p
62   /// InsertPtForPhi.
63   void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64                            VPBasicBlock::iterator InsertPtForPhi);
65 
66   /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67   /// the induction steps for each part.
68   void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
69                                 VPBasicBlock::iterator InsertPtForPhi);
70 
71   VPValue *getConstantVPV(unsigned Part) {
72     Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
73     return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
74   }
75 
76 public:
77   UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
78       : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
79 
80   void unrollBlock(VPBlockBase *VPB);
81 
82   VPValue *getValueForPart(VPValue *V, unsigned Part) {
83     if (Part == 0 || V->isLiveIn())
84       return V;
85     assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86            "accessed value does not exist");
87     return VPV2Parts[V][Part - 1];
88   }
89 
90   /// Given a single original recipe \p OrigR (of part zero), and its copy \p
91   /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92   /// corresponding VPValue defined by \p CopyR.
93   void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94                         unsigned Part) {
95     for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
96       auto Ins = VPV2Parts.insert({VPV, {}});
97       assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
98       Ins.first->second.push_back(CopyR->getVPValue(Idx));
99     }
100   }
101 
102   /// Given a uniform recipe \p R, add it for all parts.
103   void addUniformForAllParts(VPSingleDefRecipe *R) {
104     auto Ins = VPV2Parts.insert({R, {}});
105     assert(Ins.second && "uniform value already added");
106     for (unsigned Part = 0; Part != UF; ++Part)
107       Ins.first->second.push_back(R);
108   }
109 
110   bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
111 
112   /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113   /// \p P.
114   void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115     auto *Op = R->getOperand(OpIdx);
116     R->setOperand(OpIdx, getValueForPart(Op, Part));
117   }
118 
119   /// Update \p R's operands with their corresponding VPValues for part \p P.
120   void remapOperands(VPRecipeBase *R, unsigned Part) {
121     for (const auto &[OpIdx, Op] : enumerate(R->operands()))
122       R->setOperand(OpIdx, getValueForPart(Op, Part));
123   }
124 };
125 } // namespace
126 
127 void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
128   VPBlockBase *InsertPt = VPR->getSingleSuccessor();
129   for (unsigned Part = 1; Part != UF; ++Part) {
130     auto *Copy = VPR->clone();
131     VPBlockUtils::insertBlockBefore(Copy, InsertPt);
132 
133     auto PartI = vp_depth_first_shallow(Copy->getEntry());
134     auto Part0 = vp_depth_first_shallow(VPR->getEntry());
135     for (const auto &[PartIVPBB, Part0VPBB] :
136          zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
137              VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
138       for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
139         remapOperands(&PartIR, Part);
140         if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
141           ScalarIVSteps->addOperand(getConstantVPV(Part));
142         }
143 
144         addRecipeForPart(&Part0R, &PartIR, Part);
145       }
146     }
147   }
148 }
149 
150 void UnrollState::unrollWidenInductionByUF(
151     VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
152   VPBasicBlock *PH = cast<VPBasicBlock>(
153       IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154   Type *IVTy = TypeInfo.inferScalarType(IV);
155   auto &ID = IV->getInductionDescriptor();
156   VPIRFlags Flags;
157   if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
158     Flags = ID.getInductionBinOp()->getFastMathFlags();
159 
160   VPValue *ScalarStep = IV->getStepValue();
161   VPBuilder Builder(PH);
162   VPInstruction *VectorStep = Builder.createNaryOp(
163       VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags,
164       IV->getDebugLoc());
165 
166   ToSkip.insert(VectorStep);
167 
168   // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
169   // remains the header phi. Parts > 0 are computed by adding Step to the
170   // previous part. The header phi recipe will get 2 new operands: the step
171   // value for a single part and the last part, used to compute the backedge
172   // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
173   // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
174   // %Part.1 = %Part.0 + %VectorStep
175   // %Part.2 = %Part.1 + %VectorStep
176   // %Part.3 = %Part.2 + %VectorStep
177   //
178   // The newly added recipes are added to ToSkip to avoid interleaving them
179   // again.
180   VPValue *Prev = IV;
181   Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
182   unsigned AddOpc =
183       IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
184   for (unsigned Part = 1; Part != UF; ++Part) {
185     std::string Name =
186         Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
187 
188     VPInstruction *Add = Builder.createNaryOp(AddOpc,
189                                               {
190                                                   Prev,
191                                                   VectorStep,
192                                               },
193                                               Flags, IV->getDebugLoc(), Name);
194     ToSkip.insert(Add);
195     addRecipeForPart(IV, Add, Part);
196     Prev = Add;
197   }
198   IV->addOperand(VectorStep);
199   IV->addOperand(Prev);
200 }
201 
202 void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
203                                       VPBasicBlock::iterator InsertPtForPhi) {
204   // First-order recurrences pass a single vector or scalar through their header
205   // phis, irrespective of interleaving.
206   if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
207     return;
208 
209   // Generate step vectors for each unrolled part.
210   if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
211     unrollWidenInductionByUF(IV, InsertPtForPhi);
212     return;
213   }
214 
215   auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
216   if (RdxPhi && RdxPhi->isOrdered())
217     return;
218 
219   auto InsertPt = std::next(R->getIterator());
220   for (unsigned Part = 1; Part != UF; ++Part) {
221     VPRecipeBase *Copy = R->clone();
222     Copy->insertBefore(*R->getParent(), InsertPt);
223     addRecipeForPart(R, Copy, Part);
224     if (isa<VPWidenPointerInductionRecipe>(R)) {
225       Copy->addOperand(R);
226       Copy->addOperand(getConstantVPV(Part));
227     } else if (RdxPhi) {
228       // If the start value is a ReductionStartVector, use the identity value
229       // (second operand) for unrolled parts. If the scaling factor is > 1,
230       // create a new ReductionStartVector with the scale factor and both
231       // operands set to the identity value.
232       if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
233         assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
234                "unexpected start VPInstruction");
235         if (Part != 1)
236           continue;
237         VPValue *StartV;
238         if (match(VPI->getOperand(2), m_SpecificInt(1))) {
239           StartV = VPI->getOperand(1);
240         } else {
241           auto *C = VPI->clone();
242           C->setOperand(0, C->getOperand(1));
243           C->insertAfter(VPI);
244           StartV = C;
245         }
246         for (unsigned Part = 1; Part != UF; ++Part)
247           VPV2Parts[VPI][Part - 1] = StartV;
248       }
249       Copy->addOperand(getConstantVPV(Part));
250     } else {
251       assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
252              "unexpected header phi recipe not needing unrolled part");
253     }
254   }
255 }
256 
257 /// Handle non-header-phi recipes.
258 void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
259   if (match(&R, m_BranchOnCond(m_VPValue())) ||
260       match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
261     return;
262 
263   if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
264     if (vputils::onlyFirstPartUsed(VPI)) {
265       addUniformForAllParts(VPI);
266       return;
267     }
268   }
269   if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
270     if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
271         RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
272       // Stores to an invariant address only need to store the last part.
273       remapOperands(&R, UF - 1);
274       return;
275     }
276     if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
277       if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
278         addUniformForAllParts(RepR);
279         return;
280       }
281     }
282   }
283 
284   // Unroll non-uniform recipes.
285   auto InsertPt = std::next(R.getIterator());
286   VPBasicBlock &VPBB = *R.getParent();
287   for (unsigned Part = 1; Part != UF; ++Part) {
288     VPRecipeBase *Copy = R.clone();
289     Copy->insertBefore(VPBB, InsertPt);
290     addRecipeForPart(&R, Copy, Part);
291 
292     VPValue *Op;
293     if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
294                       m_VPValue(), m_VPValue(Op)))) {
295       Copy->setOperand(0, getValueForPart(Op, Part - 1));
296       Copy->setOperand(1, getValueForPart(Op, Part));
297       continue;
298     }
299     if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
300       auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
301       if (Phi && Phi->isOrdered()) {
302         auto &Parts = VPV2Parts[Phi];
303         if (Part == 1) {
304           Parts.clear();
305           Parts.push_back(Red);
306         }
307         Parts.push_back(Copy->getVPSingleValue());
308         Phi->setOperand(1, Copy->getVPSingleValue());
309       }
310     }
311     remapOperands(Copy, Part);
312 
313     // Add operand indicating the part to generate code for, to recipes still
314     // requiring it.
315     if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
316             VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
317         match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
318                         m_VPValue())))
319       Copy->addOperand(getConstantVPV(Part));
320 
321     if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
322       Copy->setOperand(0, R.getOperand(0));
323   }
324 }
325 
326 void UnrollState::unrollBlock(VPBlockBase *VPB) {
327   auto *VPR = dyn_cast<VPRegionBlock>(VPB);
328   if (VPR) {
329     if (VPR->isReplicator())
330       return unrollReplicateRegionByUF(VPR);
331 
332     // Traverse blocks in region in RPO to ensure defs are visited before uses
333     // across blocks.
334     ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
335         RPOT(VPR->getEntry());
336     for (VPBlockBase *VPB : RPOT)
337       unrollBlock(VPB);
338     return;
339   }
340 
341   // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
342   auto *VPBB = cast<VPBasicBlock>(VPB);
343   auto InsertPtForPhi = VPBB->getFirstNonPhi();
344   for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
345     if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
346       continue;
347 
348     // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
349     // Compute*Result which combine all parts to compute the final value.
350     VPValue *Op1;
351     if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
352         match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
353                       m_VPValue(Op1))) ||
354         match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
355                       m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
356         match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
357                       m_VPValue(), m_VPValue(Op1))) ||
358         match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
359                       m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
360       addUniformForAllParts(cast<VPInstruction>(&R));
361       for (unsigned Part = 1; Part != UF; ++Part)
362         R.addOperand(getValueForPart(Op1, Part));
363       continue;
364     }
365     VPValue *Op0;
366     if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
367                       m_VPValue(Op0))) ||
368         match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
369                       m_VPValue(Op0)))) {
370       addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
371       if (Plan.hasScalarVFOnly()) {
372         auto *I = cast<VPInstruction>(&R);
373         // Extracting from end with VF = 1 implies retrieving the last or
374         // penultimate scalar part (UF-1 or UF-2).
375         unsigned Offset =
376             I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
377         I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
378         R.eraseFromParent();
379       } else {
380         // Otherwise we extract from the last part.
381         remapOperands(&R, UF - 1);
382       }
383       continue;
384     }
385 
386     auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
387     if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
388       addUniformForAllParts(SingleDef);
389       continue;
390     }
391 
392     if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
393       unrollHeaderPHIByUF(H, InsertPtForPhi);
394       continue;
395     }
396 
397     unrollRecipeByUF(R);
398   }
399 }
400 
401 void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
402   assert(UF > 0 && "Unroll factor must be positive");
403   Plan.setUF(UF);
404   auto Cleanup = make_scope_exit([&Plan]() {
405     auto Iter = vp_depth_first_deep(Plan.getEntry());
406     // Remove recipes that are redundant after unrolling.
407     for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
408       for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
409         auto *VPI = dyn_cast<VPInstruction>(&R);
410         if (VPI &&
411             VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
412             VPI->getNumOperands() == 1) {
413           VPI->replaceAllUsesWith(VPI->getOperand(0));
414           VPI->eraseFromParent();
415         }
416       }
417     }
418   });
419   if (UF == 1) {
420     return;
421   }
422 
423   UnrollState Unroller(Plan, UF, Ctx);
424 
425   // Iterate over all blocks in the plan starting from Entry, and unroll
426   // recipes inside them. This includes the vector preheader and middle blocks,
427   // which may set up or post-process per-part values.
428   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
429       Plan.getEntry());
430   for (VPBlockBase *VPB : RPOT)
431     Unroller.unrollBlock(VPB);
432 
433   unsigned Part = 1;
434   // Remap operands of cloned header phis to update backedge values. The header
435   // phis cloned during unrolling are just after the header phi for part 0.
436   // Reset Part to 1 when reaching the first (part 0) recipe of a block.
437   for (VPRecipeBase &H :
438        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
439     // The second operand of Fixed Order Recurrence phi's, feeding the spliced
440     // value across the backedge, needs to remap to the last part of the spliced
441     // value.
442     if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) {
443       Unroller.remapOperand(&H, 1, UF - 1);
444       continue;
445     }
446     if (Unroller.contains(H.getVPSingleValue()) ||
447         isa<VPWidenPointerInductionRecipe>(&H)) {
448       Part = 1;
449       continue;
450     }
451     Unroller.remapOperands(&H, Part);
452     Part++;
453   }
454 
455   VPlanTransforms::removeDeadRecipes(Plan);
456 }
457 
458 /// Create a single-scalar clone of \p RepR for lane \p Lane.
459 static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
460                                        Type *IdxTy, VPReplicateRecipe *RepR,
461                                        VPLane Lane) {
462   // Collect the operands at Lane, creating extracts as needed.
463   SmallVector<VPValue *> NewOps;
464   for (VPValue *Op : RepR->operands()) {
465     if (vputils::isSingleScalar(Op)) {
466       NewOps.push_back(Op);
467       continue;
468     }
469     if (Lane.getKind() == VPLane::Kind::ScalableLast) {
470       NewOps.push_back(
471           Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
472       continue;
473     }
474     // Look through buildvector to avoid unnecessary extracts.
475     if (match(Op, m_BuildVector())) {
476       NewOps.push_back(
477           cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
478       continue;
479     }
480     VPValue *Idx =
481         Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
482     VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
483     NewOps.push_back(Ext);
484   }
485 
486   auto *New =
487       new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
488                             /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
489   New->transferFlags(*RepR);
490   New->insertBefore(RepR);
491   return New;
492 }
493 
494 void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
495   Type *IdxTy = IntegerType::get(
496       Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
497 
498   // Visit all VPBBs outside the loop region and directly inside the top-level
499   // loop region.
500   auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
501       vp_depth_first_shallow(Plan.getEntry()));
502   auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
503       vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
504   auto VPBBsToUnroll =
505       concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
506   for (VPBasicBlock *VPBB : VPBBsToUnroll) {
507     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
508       auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
509       if (!RepR || RepR->isSingleScalar())
510         continue;
511 
512       VPBuilder Builder(RepR);
513       if (RepR->getNumUsers() == 0) {
514         if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
515             vputils::isSingleScalar(RepR->getOperand(1))) {
516           // Stores to invariant addresses need to store the last lane only.
517           cloneForLane(Plan, Builder, IdxTy, RepR,
518                        VPLane::getLastLaneForVF(VF));
519         } else {
520           // Create single-scalar version of RepR for all lanes.
521           for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
522             cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
523         }
524         RepR->eraseFromParent();
525         continue;
526       }
527       /// Create single-scalar version of RepR for all lanes.
528       SmallVector<VPValue *> LaneDefs;
529       for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
530         LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
531 
532       /// Users that only demand the first lane can use the definition for lane
533       /// 0.
534       RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
535         return U.onlyFirstLaneUsed(RepR);
536       });
537 
538       // If needed, create a Build(Struct)Vector recipe to insert the scalar
539       // lane values into a vector.
540       Type *ResTy = RepR->getUnderlyingInstr()->getType();
541       VPValue *VecRes = Builder.createNaryOp(
542           ResTy->isStructTy() ? VPInstruction::BuildStructVector
543                               : VPInstruction::BuildVector,
544           LaneDefs);
545       RepR->replaceAllUsesWith(VecRes);
546       RepR->eraseFromParent();
547     }
548   }
549 }
550