1 //===-- VPlanUnroll.cpp - VPlan unroller ----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements explicit unrolling for VPlans. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPRecipeBuilder.h" 15 #include "VPlan.h" 16 #include "VPlanAnalysis.h" 17 #include "VPlanCFG.h" 18 #include "VPlanHelpers.h" 19 #include "VPlanPatternMatch.h" 20 #include "VPlanTransforms.h" 21 #include "VPlanUtils.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/ScopeExit.h" 25 #include "llvm/Analysis/IVDescriptors.h" 26 #include "llvm/IR/Intrinsics.h" 27 28 using namespace llvm; 29 using namespace llvm::VPlanPatternMatch; 30 31 namespace { 32 33 /// Helper to hold state needed for unrolling. It holds the Plan to unroll by 34 /// UF. It also holds copies of VPValues across UF-1 unroll parts to facilitate 35 /// the unrolling transformation, where the original VPValues are retained for 36 /// part zero. 37 class UnrollState { 38 /// Plan to unroll. 39 VPlan &Plan; 40 /// Unroll factor to unroll by. 41 const unsigned UF; 42 /// Analysis for types. 43 VPTypeAnalysis TypeInfo; 44 45 /// Unrolling may create recipes that should not be unrolled themselves. 46 /// Those are tracked in ToSkip. 47 SmallPtrSet<VPRecipeBase *, 8> ToSkip; 48 49 // Associate with each VPValue of part 0 its unrolled instances of parts 1, 50 // ..., UF-1. 51 DenseMap<VPValue *, SmallVector<VPValue *>> VPV2Parts; 52 53 /// Unroll replicate region \p VPR by cloning the region UF - 1 times. 54 void unrollReplicateRegionByUF(VPRegionBlock *VPR); 55 56 /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across 57 /// all parts. 58 void unrollRecipeByUF(VPRecipeBase &R); 59 60 /// Unroll header phi recipe \p R. How exactly the recipe gets unrolled 61 /// depends on the concrete header phi. Inserts newly created recipes at \p 62 /// InsertPtForPhi. 63 void unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, 64 VPBasicBlock::iterator InsertPtForPhi); 65 66 /// Unroll a widen induction recipe \p IV. This introduces recipes to compute 67 /// the induction steps for each part. 68 void unrollWidenInductionByUF(VPWidenIntOrFpInductionRecipe *IV, 69 VPBasicBlock::iterator InsertPtForPhi); 70 71 VPValue *getConstantVPV(unsigned Part) { 72 Type *CanIVIntTy = Plan.getCanonicalIV()->getScalarType(); 73 return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); 74 } 75 76 public: 77 UnrollState(VPlan &Plan, unsigned UF, LLVMContext &Ctx) 78 : Plan(Plan), UF(UF), TypeInfo(Plan.getCanonicalIV()->getScalarType()) {} 79 80 void unrollBlock(VPBlockBase *VPB); 81 82 VPValue *getValueForPart(VPValue *V, unsigned Part) { 83 if (Part == 0 || V->isLiveIn()) 84 return V; 85 assert((VPV2Parts.contains(V) && VPV2Parts[V].size() >= Part) && 86 "accessed value does not exist"); 87 return VPV2Parts[V][Part - 1]; 88 } 89 90 /// Given a single original recipe \p OrigR (of part zero), and its copy \p 91 /// CopyR for part \p Part, map every VPValue defined by \p OrigR to its 92 /// corresponding VPValue defined by \p CopyR. 93 void addRecipeForPart(VPRecipeBase *OrigR, VPRecipeBase *CopyR, 94 unsigned Part) { 95 for (const auto &[Idx, VPV] : enumerate(OrigR->definedValues())) { 96 auto Ins = VPV2Parts.insert({VPV, {}}); 97 assert(Ins.first->second.size() == Part - 1 && "earlier parts not set"); 98 Ins.first->second.push_back(CopyR->getVPValue(Idx)); 99 } 100 } 101 102 /// Given a uniform recipe \p R, add it for all parts. 103 void addUniformForAllParts(VPSingleDefRecipe *R) { 104 auto Ins = VPV2Parts.insert({R, {}}); 105 assert(Ins.second && "uniform value already added"); 106 for (unsigned Part = 0; Part != UF; ++Part) 107 Ins.first->second.push_back(R); 108 } 109 110 bool contains(VPValue *VPV) const { return VPV2Parts.contains(VPV); } 111 112 /// Update \p R's operand at \p OpIdx with its corresponding VPValue for part 113 /// \p P. 114 void remapOperand(VPRecipeBase *R, unsigned OpIdx, unsigned Part) { 115 auto *Op = R->getOperand(OpIdx); 116 R->setOperand(OpIdx, getValueForPart(Op, Part)); 117 } 118 119 /// Update \p R's operands with their corresponding VPValues for part \p P. 120 void remapOperands(VPRecipeBase *R, unsigned Part) { 121 for (const auto &[OpIdx, Op] : enumerate(R->operands())) 122 R->setOperand(OpIdx, getValueForPart(Op, Part)); 123 } 124 }; 125 } // namespace 126 127 void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { 128 VPBlockBase *InsertPt = VPR->getSingleSuccessor(); 129 for (unsigned Part = 1; Part != UF; ++Part) { 130 auto *Copy = VPR->clone(); 131 VPBlockUtils::insertBlockBefore(Copy, InsertPt); 132 133 auto PartI = vp_depth_first_shallow(Copy->getEntry()); 134 auto Part0 = vp_depth_first_shallow(VPR->getEntry()); 135 for (const auto &[PartIVPBB, Part0VPBB] : 136 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(PartI), 137 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) { 138 for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) { 139 remapOperands(&PartIR, Part); 140 if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) { 141 ScalarIVSteps->addOperand(getConstantVPV(Part)); 142 } 143 144 addRecipeForPart(&Part0R, &PartIR, Part); 145 } 146 } 147 } 148 } 149 150 void UnrollState::unrollWidenInductionByUF( 151 VPWidenIntOrFpInductionRecipe *IV, VPBasicBlock::iterator InsertPtForPhi) { 152 VPBasicBlock *PH = cast<VPBasicBlock>( 153 IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor()); 154 Type *IVTy = TypeInfo.inferScalarType(IV); 155 auto &ID = IV->getInductionDescriptor(); 156 VPIRFlags Flags; 157 if (isa_and_present<FPMathOperator>(ID.getInductionBinOp())) 158 Flags = ID.getInductionBinOp()->getFastMathFlags(); 159 160 VPValue *ScalarStep = IV->getStepValue(); 161 VPBuilder Builder(PH); 162 VPInstruction *VectorStep = Builder.createNaryOp( 163 VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, IVTy, Flags, 164 IV->getDebugLoc()); 165 166 ToSkip.insert(VectorStep); 167 168 // Now create recipes to compute the induction steps for part 1 .. UF. Part 0 169 // remains the header phi. Parts > 0 are computed by adding Step to the 170 // previous part. The header phi recipe will get 2 new operands: the step 171 // value for a single part and the last part, used to compute the backedge 172 // value during VPWidenIntOrFpInductionRecipe::execute. %Part.0 = 173 // VPWidenIntOrFpInductionRecipe %Start, %ScalarStep, %VectorStep, %Part.3 174 // %Part.1 = %Part.0 + %VectorStep 175 // %Part.2 = %Part.1 + %VectorStep 176 // %Part.3 = %Part.2 + %VectorStep 177 // 178 // The newly added recipes are added to ToSkip to avoid interleaving them 179 // again. 180 VPValue *Prev = IV; 181 Builder.setInsertPoint(IV->getParent(), InsertPtForPhi); 182 unsigned AddOpc = 183 IVTy->isFloatingPointTy() ? ID.getInductionOpcode() : Instruction::Add; 184 for (unsigned Part = 1; Part != UF; ++Part) { 185 std::string Name = 186 Part > 1 ? "step.add." + std::to_string(Part) : "step.add"; 187 188 VPInstruction *Add = Builder.createNaryOp(AddOpc, 189 { 190 Prev, 191 VectorStep, 192 }, 193 Flags, IV->getDebugLoc(), Name); 194 ToSkip.insert(Add); 195 addRecipeForPart(IV, Add, Part); 196 Prev = Add; 197 } 198 IV->addOperand(VectorStep); 199 IV->addOperand(Prev); 200 } 201 202 void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R, 203 VPBasicBlock::iterator InsertPtForPhi) { 204 // First-order recurrences pass a single vector or scalar through their header 205 // phis, irrespective of interleaving. 206 if (isa<VPFirstOrderRecurrencePHIRecipe>(R)) 207 return; 208 209 // Generate step vectors for each unrolled part. 210 if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) { 211 unrollWidenInductionByUF(IV, InsertPtForPhi); 212 return; 213 } 214 215 auto *RdxPhi = dyn_cast<VPReductionPHIRecipe>(R); 216 if (RdxPhi && RdxPhi->isOrdered()) 217 return; 218 219 auto InsertPt = std::next(R->getIterator()); 220 for (unsigned Part = 1; Part != UF; ++Part) { 221 VPRecipeBase *Copy = R->clone(); 222 Copy->insertBefore(*R->getParent(), InsertPt); 223 addRecipeForPart(R, Copy, Part); 224 if (isa<VPWidenPointerInductionRecipe>(R)) { 225 Copy->addOperand(R); 226 Copy->addOperand(getConstantVPV(Part)); 227 } else if (RdxPhi) { 228 // If the start value is a ReductionStartVector, use the identity value 229 // (second operand) for unrolled parts. If the scaling factor is > 1, 230 // create a new ReductionStartVector with the scale factor and both 231 // operands set to the identity value. 232 if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) { 233 assert(VPI->getOpcode() == VPInstruction::ReductionStartVector && 234 "unexpected start VPInstruction"); 235 if (Part != 1) 236 continue; 237 VPValue *StartV; 238 if (match(VPI->getOperand(2), m_SpecificInt(1))) { 239 StartV = VPI->getOperand(1); 240 } else { 241 auto *C = VPI->clone(); 242 C->setOperand(0, C->getOperand(1)); 243 C->insertAfter(VPI); 244 StartV = C; 245 } 246 for (unsigned Part = 1; Part != UF; ++Part) 247 VPV2Parts[VPI][Part - 1] = StartV; 248 } 249 Copy->addOperand(getConstantVPV(Part)); 250 } else { 251 assert(isa<VPActiveLaneMaskPHIRecipe>(R) && 252 "unexpected header phi recipe not needing unrolled part"); 253 } 254 } 255 } 256 257 /// Handle non-header-phi recipes. 258 void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { 259 if (match(&R, m_BranchOnCond(m_VPValue())) || 260 match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) 261 return; 262 263 if (auto *VPI = dyn_cast<VPInstruction>(&R)) { 264 if (vputils::onlyFirstPartUsed(VPI)) { 265 addUniformForAllParts(VPI); 266 return; 267 } 268 } 269 if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) { 270 if (isa<StoreInst>(RepR->getUnderlyingValue()) && 271 RepR->getOperand(1)->isDefinedOutsideLoopRegions()) { 272 // Stores to an invariant address only need to store the last part. 273 remapOperands(&R, UF - 1); 274 return; 275 } 276 if (auto *II = dyn_cast<IntrinsicInst>(RepR->getUnderlyingValue())) { 277 if (II->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl) { 278 addUniformForAllParts(RepR); 279 return; 280 } 281 } 282 } 283 284 // Unroll non-uniform recipes. 285 auto InsertPt = std::next(R.getIterator()); 286 VPBasicBlock &VPBB = *R.getParent(); 287 for (unsigned Part = 1; Part != UF; ++Part) { 288 VPRecipeBase *Copy = R.clone(); 289 Copy->insertBefore(VPBB, InsertPt); 290 addRecipeForPart(&R, Copy, Part); 291 292 VPValue *Op; 293 if (match(&R, m_VPInstruction<VPInstruction::FirstOrderRecurrenceSplice>( 294 m_VPValue(), m_VPValue(Op)))) { 295 Copy->setOperand(0, getValueForPart(Op, Part - 1)); 296 Copy->setOperand(1, getValueForPart(Op, Part)); 297 continue; 298 } 299 if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) { 300 auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0)); 301 if (Phi && Phi->isOrdered()) { 302 auto &Parts = VPV2Parts[Phi]; 303 if (Part == 1) { 304 Parts.clear(); 305 Parts.push_back(Red); 306 } 307 Parts.push_back(Copy->getVPSingleValue()); 308 Phi->setOperand(1, Copy->getVPSingleValue()); 309 } 310 } 311 remapOperands(Copy, Part); 312 313 // Add operand indicating the part to generate code for, to recipes still 314 // requiring it. 315 if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe, 316 VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) || 317 match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>( 318 m_VPValue()))) 319 Copy->addOperand(getConstantVPV(Part)); 320 321 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R)) 322 Copy->setOperand(0, R.getOperand(0)); 323 } 324 } 325 326 void UnrollState::unrollBlock(VPBlockBase *VPB) { 327 auto *VPR = dyn_cast<VPRegionBlock>(VPB); 328 if (VPR) { 329 if (VPR->isReplicator()) 330 return unrollReplicateRegionByUF(VPR); 331 332 // Traverse blocks in region in RPO to ensure defs are visited before uses 333 // across blocks. 334 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 335 RPOT(VPR->getEntry()); 336 for (VPBlockBase *VPB : RPOT) 337 unrollBlock(VPB); 338 return; 339 } 340 341 // VPB is a VPBasicBlock; unroll it, i.e., unroll its recipes. 342 auto *VPBB = cast<VPBasicBlock>(VPB); 343 auto InsertPtForPhi = VPBB->getFirstNonPhi(); 344 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 345 if (ToSkip.contains(&R) || isa<VPIRInstruction>(&R)) 346 continue; 347 348 // Add all VPValues for all parts to AnyOf, FirstActiveLaneMask and 349 // Compute*Result which combine all parts to compute the final value. 350 VPValue *Op1; 351 if (match(&R, m_VPInstruction<VPInstruction::AnyOf>(m_VPValue(Op1))) || 352 match(&R, m_VPInstruction<VPInstruction::FirstActiveLane>( 353 m_VPValue(Op1))) || 354 match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>( 355 m_VPValue(), m_VPValue(), m_VPValue(Op1))) || 356 match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>( 357 m_VPValue(), m_VPValue(Op1))) || 358 match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>( 359 m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) { 360 addUniformForAllParts(cast<VPInstruction>(&R)); 361 for (unsigned Part = 1; Part != UF; ++Part) 362 R.addOperand(getValueForPart(Op1, Part)); 363 continue; 364 } 365 VPValue *Op0; 366 if (match(&R, m_VPInstruction<VPInstruction::ExtractLastElement>( 367 m_VPValue(Op0))) || 368 match(&R, m_VPInstruction<VPInstruction::ExtractPenultimateElement>( 369 m_VPValue(Op0)))) { 370 addUniformForAllParts(cast<VPSingleDefRecipe>(&R)); 371 if (Plan.hasScalarVFOnly()) { 372 auto *I = cast<VPInstruction>(&R); 373 // Extracting from end with VF = 1 implies retrieving the last or 374 // penultimate scalar part (UF-1 or UF-2). 375 unsigned Offset = 376 I->getOpcode() == VPInstruction::ExtractLastElement ? 1 : 2; 377 I->replaceAllUsesWith(getValueForPart(Op0, UF - Offset)); 378 R.eraseFromParent(); 379 } else { 380 // Otherwise we extract from the last part. 381 remapOperands(&R, UF - 1); 382 } 383 continue; 384 } 385 386 auto *SingleDef = dyn_cast<VPSingleDefRecipe>(&R); 387 if (SingleDef && vputils::isUniformAcrossVFsAndUFs(SingleDef)) { 388 addUniformForAllParts(SingleDef); 389 continue; 390 } 391 392 if (auto *H = dyn_cast<VPHeaderPHIRecipe>(&R)) { 393 unrollHeaderPHIByUF(H, InsertPtForPhi); 394 continue; 395 } 396 397 unrollRecipeByUF(R); 398 } 399 } 400 401 void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { 402 assert(UF > 0 && "Unroll factor must be positive"); 403 Plan.setUF(UF); 404 auto Cleanup = make_scope_exit([&Plan]() { 405 auto Iter = vp_depth_first_deep(Plan.getEntry()); 406 // Remove recipes that are redundant after unrolling. 407 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { 408 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 409 auto *VPI = dyn_cast<VPInstruction>(&R); 410 if (VPI && 411 VPI->getOpcode() == VPInstruction::CanonicalIVIncrementForPart && 412 VPI->getNumOperands() == 1) { 413 VPI->replaceAllUsesWith(VPI->getOperand(0)); 414 VPI->eraseFromParent(); 415 } 416 } 417 } 418 }); 419 if (UF == 1) { 420 return; 421 } 422 423 UnrollState Unroller(Plan, UF, Ctx); 424 425 // Iterate over all blocks in the plan starting from Entry, and unroll 426 // recipes inside them. This includes the vector preheader and middle blocks, 427 // which may set up or post-process per-part values. 428 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( 429 Plan.getEntry()); 430 for (VPBlockBase *VPB : RPOT) 431 Unroller.unrollBlock(VPB); 432 433 unsigned Part = 1; 434 // Remap operands of cloned header phis to update backedge values. The header 435 // phis cloned during unrolling are just after the header phi for part 0. 436 // Reset Part to 1 when reaching the first (part 0) recipe of a block. 437 for (VPRecipeBase &H : 438 Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 439 // The second operand of Fixed Order Recurrence phi's, feeding the spliced 440 // value across the backedge, needs to remap to the last part of the spliced 441 // value. 442 if (isa<VPFirstOrderRecurrencePHIRecipe>(&H)) { 443 Unroller.remapOperand(&H, 1, UF - 1); 444 continue; 445 } 446 if (Unroller.contains(H.getVPSingleValue()) || 447 isa<VPWidenPointerInductionRecipe>(&H)) { 448 Part = 1; 449 continue; 450 } 451 Unroller.remapOperands(&H, Part); 452 Part++; 453 } 454 455 VPlanTransforms::removeDeadRecipes(Plan); 456 } 457 458 /// Create a single-scalar clone of \p RepR for lane \p Lane. 459 static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder, 460 Type *IdxTy, VPReplicateRecipe *RepR, 461 VPLane Lane) { 462 // Collect the operands at Lane, creating extracts as needed. 463 SmallVector<VPValue *> NewOps; 464 for (VPValue *Op : RepR->operands()) { 465 if (vputils::isSingleScalar(Op)) { 466 NewOps.push_back(Op); 467 continue; 468 } 469 if (Lane.getKind() == VPLane::Kind::ScalableLast) { 470 NewOps.push_back( 471 Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); 472 continue; 473 } 474 // Look through buildvector to avoid unnecessary extracts. 475 if (match(Op, m_BuildVector())) { 476 NewOps.push_back( 477 cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane())); 478 continue; 479 } 480 VPValue *Idx = 481 Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane())); 482 VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx}); 483 NewOps.push_back(Ext); 484 } 485 486 auto *New = 487 new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, 488 /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); 489 New->transferFlags(*RepR); 490 New->insertBefore(RepR); 491 return New; 492 } 493 494 void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { 495 Type *IdxTy = IntegerType::get( 496 Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32); 497 498 // Visit all VPBBs outside the loop region and directly inside the top-level 499 // loop region. 500 auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>( 501 vp_depth_first_shallow(Plan.getEntry())); 502 auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>( 503 vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())); 504 auto VPBBsToUnroll = 505 concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion); 506 for (VPBasicBlock *VPBB : VPBBsToUnroll) { 507 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { 508 auto *RepR = dyn_cast<VPReplicateRecipe>(&R); 509 if (!RepR || RepR->isSingleScalar()) 510 continue; 511 512 VPBuilder Builder(RepR); 513 if (RepR->getNumUsers() == 0) { 514 if (isa<StoreInst>(RepR->getUnderlyingInstr()) && 515 vputils::isSingleScalar(RepR->getOperand(1))) { 516 // Stores to invariant addresses need to store the last lane only. 517 cloneForLane(Plan, Builder, IdxTy, RepR, 518 VPLane::getLastLaneForVF(VF)); 519 } else { 520 // Create single-scalar version of RepR for all lanes. 521 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) 522 cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)); 523 } 524 RepR->eraseFromParent(); 525 continue; 526 } 527 /// Create single-scalar version of RepR for all lanes. 528 SmallVector<VPValue *> LaneDefs; 529 for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) 530 LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I))); 531 532 /// Users that only demand the first lane can use the definition for lane 533 /// 0. 534 RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) { 535 return U.onlyFirstLaneUsed(RepR); 536 }); 537 538 // If needed, create a Build(Struct)Vector recipe to insert the scalar 539 // lane values into a vector. 540 Type *ResTy = RepR->getUnderlyingInstr()->getType(); 541 VPValue *VecRes = Builder.createNaryOp( 542 ResTy->isStructTy() ? VPInstruction::BuildStructVector 543 : VPInstruction::BuildVector, 544 LaneDefs); 545 RepR->replaceAllUsesWith(VecRes); 546 RepR->eraseFromParent(); 547 } 548 } 549 } 550