1 //===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements explicit unrolling for VPlans.
11 ///
12 //===----------------------------------------------------------------------===//
13
14 #include "VPRecipeBuilder.h"
15 #include "VPlan.h"
16 #include "VPlanAnalysis.h"
17 #include "VPlanCFG.h"
18 #include "VPlanHelpers.h"
19 #include "VPlanPatternMatch.h"
20 #include "VPlanTransforms.h"
21 #include "VPlanUtils.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/ScopeExit.h"
25 #include "llvm/Analysis/IVDescriptors.h"
26 #include "llvm/IR/Intrinsics.h"
27
28 using namespace llvm;
29 using namespace llvm::VPlanPatternMatch;
30
31 namespace {
32
33 /// Helper to hold state needed for unrolling. It holds the Plan to unroll by
34 /// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate
35 /// the unrolling transformation, where the original VPValues are retained for
36 /// part zero.
37 class UnrollState {
38 /// Plan to unroll.
39 VPlan &Plan;
40 /// Unroll factor to unroll by.
41 const unsigned UF;
42 /// Analysis for types.
43 VPTypeAnalysis TypeInfo;
44
45 /// Unrolling may create recipes that should not be unrolled themselves.
46 /// Those are tracked in ToSkip.
47 SmallPtrSet<VPRecipeBase *, 8> ToSkip;
48
49 // Associate with each VPValue of part 0 its unrolled instances of parts 1,
50 // ..., UF-1.
51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts;
52
53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times.
54 void unrollReplicateRegionByUF(VPRegionBlock *VPR);
55
56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
57 /// all parts.
58 void unrollRecipeByUF(VPRecipeBase &R);
59
60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled
61 /// depends on the concrete header phi. Inserts newly created recipes at \p
62 /// InsertPtForPhi.
63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
64 VPBasicBlock::iterator InsertPtForPhi);
65
66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute
67 /// the induction steps for each part.
68 void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV,
69 VPBasicBlock::iterator InsertPtForPhi);
70
getConstantVPV(unsigned Part)71 VPValue *getConstantVPV(unsigned Part) {
72 Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType();
73 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part));
74 }
75
76 public:
UnrollState(VPlan & Plan,unsigned UF,LLVMContext & Ctx)77 UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx)
78 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {}
79
80 void unrollBlock(VPBlockBase *VPB);
81
getValueForPart(VPValue * V,unsigned Part)82 VPValue *getValueForPart(VPValue *V, unsigned Part) {
83 if (Part == 0 || V->isLiveIn())
84 return V;
85 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) &&
86 "accessed value does not exist");
87 return VPV2Parts[V][Part - 1];
88 }
89
90 /// Given a single original recipe \p OrigR (of part zero), and its copy \p
91 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its
92 /// corresponding VPValue defined by \p CopyR.
addRecipeForPart(VPRecipeBase * OrigR,VPRecipeBase * CopyR,unsigned Part)93 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR,
94 unsigned Part) {
95 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) {
96 auto Ins = VPV2Parts.insert({VPV, {}});
97 assert(Ins.first->second.size() == Part - 1 && "earlier parts not set");
98 Ins.first->second.push_back(CopyR->getVPValue(Idx));
99 }
100 }
101
102 /// Given a uniform recipe \p R, add it for all parts.
addUniformForAllParts(VPSingleDefRecipe * R)103 void addUniformForAllParts(VPSingleDefRecipe *R) {
104 auto Ins = VPV2Parts.insert({R, {}});
105 assert(Ins.second && "uniform value already added");
106 for (unsigned Part = 0; Part != UF; ++Part)
107 Ins.first->second.push_back(R);
108 }
109
contains(VPValue * VPV) const110 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); }
111
112 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part
113 /// \p P.
remapOperand(VPRecipeBase * R,unsigned OpIdx,unsigned Part)114 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) {
115 auto *Op = R->getOperand(OpIdx);
116 R->setOperand(OpIdx, getValueForPart(Op, Part));
117 }
118
119 /// Update \p R's operands with their corresponding VPValues for part \p P.
remapOperands(VPRecipeBase * R,unsigned Part)120 void remapOperands(VPRecipeBase *R, unsigned Part) {
121 for (const auto &[OpIdx, Op] : enumerate(R->operands()))
122 R->setOperand(OpIdx, getValueForPart(Op, Part));
123 }
124 };
125 } // namespace
126
unrollReplicateRegionByUF(VPRegionBlock * VPR)127 void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
128 VPBlockBase *InsertPt = VPR->getSingleSuccessor();
129 for (unsigned Part = 1; Part != UF; ++Part) {
130 auto *Copy = VPR->clone();
131 VPBlockUtils::insertBlockBefore(Copy, InsertPt);
132
133 auto PartI = vp_depth_first_shallow(Copy->getEntry());
134 auto Part0 = vp_depth_first_shallow(VPR->getEntry());
135 for (const auto &[PartIVPBB, Part0VPBB] :
136 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI),
137 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
138 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
139 remapOperands(&PartIR, Part);
140 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
141 ScalarIVSteps->addOperand(getConstantVPV(Part));
142 }
143
144 addRecipeForPart(&Part0R, &PartIR, Part);
145 }
146 }
147 }
148 }
149
unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe * IV,VPBasicBlock::iterator InsertPtForPhi)150 void UnrollState::unrollWidenInductionByUF(
151 VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) {
152 VPBasicBlock *PH = cast<VPBasicBlock>(
153 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
154 Type *IVTy = TypeInfo.inferScalarType(IV);
155 auto &ID = IV->getInductionDescriptor();
156 VPIRFlags Flags;
157 if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
158 Flags = ID.getInductionBinOp()->getFastMathFlags();
159
160 VPValue *ScalarStep = IV->getStepValue();
161 VPBuilder Builder(PH);
162 VPInstruction *VectorStep = Builder.createNaryOp(
163 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags,
164 IV->getDebugLoc());
165
166 ToSkip.insert(VectorStep);
167
168 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0
169 // remains the header phi. Parts > 0 are computed by adding Step to the
170 // previous part. The header phi recipe will get 2 new operands: the step
171 // value for a single part and the last part, used to compute the backedge
172 // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 =
173 // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3
174 // %Part.1 = %Part.0 + %VectorStep
175 // %Part.2 = %Part.1 + %VectorStep
176 // %Part.3 = %Part.2 + %VectorStep
177 //
178 // The newly added recipes are added to ToSkip to avoid interleaving them
179 // again.
180 VPValue *Prev = IV;
181 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi);
182 unsigned AddOpc =
183 IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add;
184 for (unsigned Part = 1; Part != UF; ++Part) {
185 std::string Name =
186 Part > 1 ? "step.add." + std::to_string(Part) : "step.add";
187
188 VPInstruction *Add = Builder.createNaryOp(AddOpc,
189 {
190 Prev,
191 VectorStep,
192 },
193 Flags, IV->getDebugLoc(), Name);
194 ToSkip.insert(Add);
195 addRecipeForPart(IV, Add, Part);
196 Prev = Add;
197 }
198 IV->addOperand(VectorStep);
199 IV->addOperand(Prev);
200 }
201
unrollHeaderPHIByUF(VPHeaderPHIRecipe * R,VPBasicBlock::iterator InsertPtForPhi)202 void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
203 VPBasicBlock::iterator InsertPtForPhi) {
204 // First-order recurrences pass a single vector or scalar through their header
205 // phis, irrespective of interleaving.
206 if (isa<VPFirstOrderRecurrencePHIRecipe>(R))
207 return;
208
209 // Generate step vectors for each unrolled part.
210 if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
211 unrollWidenInductionByUF(IV, InsertPtForPhi);
212 return;
213 }
214
215 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R);
216 if (RdxPhi && RdxPhi->isOrdered())
217 return;
218
219 auto InsertPt = std::next(R->getIterator());
220 for (unsigned Part = 1; Part != UF; ++Part) {
221 VPRecipeBase *Copy = R->clone();
222 Copy->insertBefore(*R->getParent(), InsertPt);
223 addRecipeForPart(R, Copy, Part);
224 if (isa<VPWidenPointerInductionRecipe>(R)) {
225 Copy->addOperand(R);
226 Copy->addOperand(getConstantVPV(Part));
227 } else if (RdxPhi) {
228 // If the start value is a ReductionStartVector, use the identity value
229 // (second operand) for unrolled parts. If the scaling factor is > 1,
230 // create a new ReductionStartVector with the scale factor and both
231 // operands set to the identity value.
232 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
233 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector &&
234 "unexpected start VPInstruction");
235 if (Part != 1)
236 continue;
237 VPValue *StartV;
238 if (match(VPI->getOperand(2), m_SpecificInt(1))) {
239 StartV = VPI->getOperand(1);
240 } else {
241 auto *C = VPI->clone();
242 C->setOperand(0, C->getOperand(1));
243 C->insertAfter(VPI);
244 StartV = C;
245 }
246 for (unsigned Part = 1; Part != UF; ++Part)
247 VPV2Parts[VPI][Part - 1] = StartV;
248 }
249 Copy->addOperand(getConstantVPV(Part));
250 } else {
251 assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&
252 "unexpected header phi recipe not needing unrolled part");
253 }
254 }
255 }
256
257 /// Handle non-header-phi recipes.
unrollRecipeByUF(VPRecipeBase & R)258 void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
259 if (match(&R, m_BranchOnCond(m_VPValue())) ||
260 match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
261 return;
262
263 if (auto *VPI = dyn_cast<VPInstruction>(&R)) {
264 if (vputils::onlyFirstPartUsed(VPI)) {
265 addUniformForAllParts(VPI);
266 return;
267 }
268 }
269 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
270 if (isa<StoreInst>(RepR->getUnderlyingValue()) &&
271 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) {
272 // Stores to an invariant address only need to store the last part.
273 remapOperands(&R, UF - 1);
274 return;
275 }
276 if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) {
277 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) {
278 addUniformForAllParts(RepR);
279 return;
280 }
281 }
282 }
283
284 // Unroll non-uniform recipes.
285 auto InsertPt = std::next(R.getIterator());
286 VPBasicBlock &VPBB = *R.getParent();
287 for (unsigned Part = 1; Part != UF; ++Part) {
288 VPRecipeBase *Copy = R.clone();
289 Copy->insertBefore(VPBB, InsertPt);
290 addRecipeForPart(&R, Copy, Part);
291
292 VPValue *Op;
293 if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>(
294 m_VPValue(), m_VPValue(Op)))) {
295 Copy->setOperand(0, getValueForPart(Op, Part - 1));
296 Copy->setOperand(1, getValueForPart(Op, Part));
297 continue;
298 }
299 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
300 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
301 if (Phi && Phi->isOrdered()) {
302 auto &Parts = VPV2Parts[Phi];
303 if (Part == 1) {
304 Parts.clear();
305 Parts.push_back(Red);
306 }
307 Parts.push_back(Copy->getVPSingleValue());
308 Phi->setOperand(1, Copy->getVPSingleValue());
309 }
310 }
311 remapOperands(Copy, Part);
312
313 // Add operand indicating the part to generate code for, to recipes still
314 // requiring it.
315 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
316 VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
317 match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
318 m_VPValue())))
319 Copy->addOperand(getConstantVPV(Part));
320
321 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
322 Copy->setOperand(0, R.getOperand(0));
323 }
324 }
325
unrollBlock(VPBlockBase * VPB)326 void UnrollState::unrollBlock(VPBlockBase *VPB) {
327 auto *VPR = dyn_cast<VPRegionBlock>(VPB);
328 if (VPR) {
329 if (VPR->isReplicator())
330 return unrollReplicateRegionByUF(VPR);
331
332 // Traverse blocks in region in RPO to ensure defs are visited before uses
333 // across blocks.
334 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
335 RPOT(VPR->getEntry());
336 for (VPBlockBase *VPB : RPOT)
337 unrollBlock(VPB);
338 return;
339 }
340
341 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes.
342 auto *VPBB = cast<VPBasicBlock>(VPB);
343 auto InsertPtForPhi = VPBB->getFirstNonPhi();
344 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
345 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R))
346 continue;
347
348 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and
349 // Compute*Result which combine all parts to compute the final value.
350 VPValue *Op1;
351 if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) ||
352 match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>(
353 m_VPValue(Op1))) ||
354 match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
355 m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
356 match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
357 m_VPValue(), m_VPValue(Op1))) ||
358 match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
359 m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
360 addUniformForAllParts(cast<VPInstruction>(&R));
361 for (unsigned Part = 1; Part != UF; ++Part)
362 R.addOperand(getValueForPart(Op1, Part));
363 continue;
364 }
365 VPValue *Op0;
366 if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>(
367 m_VPValue(Op0))) ||
368 match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>(
369 m_VPValue(Op0)))) {
370 addUniformForAllParts(cast<VPSingleDefRecipe>(&R));
371 if (Plan.hasScalarVFOnly()) {
372 auto *I = cast<VPInstruction>(&R);
373 // Extracting from end with VF = 1 implies retrieving the last or
374 // penultimate scalar part (UF-1 or UF-2).
375 unsigned Offset =
376 I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2;
377 I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset));
378 R.eraseFromParent();
379 } else {
380 // Otherwise we extract from the last part.
381 remapOperands(&R, UF - 1);
382 }
383 continue;
384 }
385
386 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R);
387 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) {
388 addUniformForAllParts(SingleDef);
389 continue;
390 }
391
392 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) {
393 unrollHeaderPHIByUF(H, InsertPtForPhi);
394 continue;
395 }
396
397 unrollRecipeByUF(R);
398 }
399 }
400
unrollByUF(VPlan & Plan,unsigned UF,LLVMContext & Ctx)401 void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
402 assert(UF > 0 && "Unroll factor must be positive");
403 Plan.setUF(UF);
404 auto Cleanup = make_scope_exit([&Plan]() {
405 auto Iter = vp_depth_first_deep(Plan.getEntry());
406 // Remove recipes that are redundant after unrolling.
407 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
408 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
409 auto *VPI = dyn_cast<VPInstruction>(&R);
410 if (VPI &&
411 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart &&
412 VPI->getNumOperands() == 1) {
413 VPI->replaceAllUsesWith(VPI->getOperand(0));
414 VPI->eraseFromParent();
415 }
416 }
417 }
418 });
419 if (UF == 1) {
420 return;
421 }
422
423 UnrollState Unroller(Plan, UF, Ctx);
424
425 // Iterate over all blocks in the plan starting from Entry, and unroll
426 // recipes inside them. This includes the vector preheader and middle blocks,
427 // which may set up or post-process per-part values.
428 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
429 Plan.getEntry());
430 for (VPBlockBase *VPB : RPOT)
431 Unroller.unrollBlock(VPB);
432
433 unsigned Part = 1;
434 // Remap operands of cloned header phis to update backedge values. The header
435 // phis cloned during unrolling are just after the header phi for part 0.
436 // Reset Part to 1 when reaching the first (part 0) recipe of a block.
437 for (VPRecipeBase &H :
438 Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
439 // The second operand of Fixed Order Recurrence phi's, feeding the spliced
440 // value across the backedge, needs to remap to the last part of the spliced
441 // value.
442 if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) {
443 Unroller.remapOperand(&H, 1, UF - 1);
444 continue;
445 }
446 if (Unroller.contains(H.getVPSingleValue()) ||
447 isa<VPWidenPointerInductionRecipe>(&H)) {
448 Part = 1;
449 continue;
450 }
451 Unroller.remapOperands(&H, Part);
452 Part++;
453 }
454
455 VPlanTransforms::removeDeadRecipes(Plan);
456 }
457
458 /// Create a single-scalar clone of \p RepR for lane \p Lane.
cloneForLane(VPlan & Plan,VPBuilder & Builder,Type * IdxTy,VPReplicateRecipe * RepR,VPLane Lane)459 static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
460 Type *IdxTy, VPReplicateRecipe *RepR,
461 VPLane Lane) {
462 // Collect the operands at Lane, creating extracts as needed.
463 SmallVector<VPValue *> NewOps;
464 for (VPValue *Op : RepR->operands()) {
465 if (vputils::isSingleScalar(Op)) {
466 NewOps.push_back(Op);
467 continue;
468 }
469 if (Lane.getKind() == VPLane::Kind::ScalableLast) {
470 NewOps.push_back(
471 Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
472 continue;
473 }
474 // Look through buildvector to avoid unnecessary extracts.
475 if (match(Op, m_BuildVector())) {
476 NewOps.push_back(
477 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
478 continue;
479 }
480 VPValue *Idx =
481 Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
482 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
483 NewOps.push_back(Ext);
484 }
485
486 auto *New =
487 new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
488 /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
489 New->transferFlags(*RepR);
490 New->insertBefore(RepR);
491 return New;
492 }
493
replicateByVF(VPlan & Plan,ElementCount VF)494 void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
495 Type *IdxTy = IntegerType::get(
496 Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
497
498 // Visit all VPBBs outside the loop region and directly inside the top-level
499 // loop region.
500 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
501 vp_depth_first_shallow(Plan.getEntry()));
502 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
503 vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
504 auto VPBBsToUnroll =
505 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
506 for (VPBasicBlock *VPBB : VPBBsToUnroll) {
507 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
508 auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
509 if (!RepR || RepR->isSingleScalar())
510 continue;
511
512 VPBuilder Builder(RepR);
513 if (RepR->getNumUsers() == 0) {
514 if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
515 vputils::isSingleScalar(RepR->getOperand(1))) {
516 // Stores to invariant addresses need to store the last lane only.
517 cloneForLane(Plan, Builder, IdxTy, RepR,
518 VPLane::getLastLaneForVF(VF));
519 } else {
520 // Create single-scalar version of RepR for all lanes.
521 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
522 cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
523 }
524 RepR->eraseFromParent();
525 continue;
526 }
527 /// Create single-scalar version of RepR for all lanes.
528 SmallVector<VPValue *> LaneDefs;
529 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
530 LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
531
532 /// Users that only demand the first lane can use the definition for lane
533 /// 0.
534 RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
535 return U.onlyFirstLaneUsed(RepR);
536 });
537
538 // If needed, create a Build(Struct)Vector recipe to insert the scalar
539 // lane values into a vector.
540 Type *ResTy = RepR->getUnderlyingInstr()->getType();
541 VPValue *VecRes = Builder.createNaryOp(
542 ResTy->isStructTy() ? VPInstruction::BuildStructVector
543 : VPInstruction::BuildVector,
544 LaneDefs);
545 RepR->replaceAllUsesWith(VecRes);
546 RepR->eraseFromParent();
547 }
548 }
549 }
550