1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the LLVM vectorization plan. It represents a candidate for 11 /// vectorization, allowing to plan and optimize how to vectorize a given loop 12 /// before generating LLVM-IR. 13 /// The vectorizer uses vectorization plans to estimate the costs of potential 14 /// candidates and if profitable to execute the desired plan, generating vector 15 /// LLVM-IR code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "VPlan.h" 20 #include "LoopVectorizationPlanner.h" 21 #include "VPlanCFG.h" 22 #include "VPlanDominatorTree.h" 23 #include "VPlanHelpers.h" 24 #include "VPlanPatternMatch.h" 25 #include "VPlanTransforms.h" 26 #include "VPlanUtils.h" 27 #include "llvm/ADT/PostOrderIterator.h" 28 #include "llvm/ADT/STLExtras.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/ADT/Twine.h" 32 #include "llvm/Analysis/DomTreeUpdater.h" 33 #include "llvm/Analysis/LoopInfo.h" 34 #include "llvm/IR/BasicBlock.h" 35 #include "llvm/IR/CFG.h" 36 #include "llvm/IR/IRBuilder.h" 37 #include "llvm/IR/Instruction.h" 38 #include "llvm/IR/Instructions.h" 39 #include "llvm/IR/Type.h" 40 #include "llvm/IR/Value.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/GraphWriter.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 47 #include "llvm/Transforms/Utils/LoopVersioning.h" 48 #include <cassert> 49 #include <string> 50 51 using namespace llvm; 52 using namespace llvm::VPlanPatternMatch; 53 54 namespace llvm { 55 extern cl::opt<bool> EnableVPlanNativePath; 56 } 57 58 extern cl::opt<unsigned> ForceTargetInstructionCost; 59 60 static cl::opt<bool> PrintVPlansInDotFormat( 61 "vplan-print-in-dot-format", cl::Hidden, 62 cl::desc("Use dot format instead of plain text when dumping VPlans")); 63 64 #define DEBUG_TYPE "loop-vectorize" 65 66 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 67 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPRecipeBase &R) { 68 const VPBasicBlock *Parent = R.getParent(); 69 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr); 70 R.print(OS, "", SlotTracker); 71 return OS; 72 } 73 #endif 74 75 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, 76 const ElementCount &VF) const { 77 switch (LaneKind) { 78 case VPLane::Kind::ScalableLast: 79 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane 80 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), 81 Builder.getInt32(VF.getKnownMinValue() - Lane)); 82 case VPLane::Kind::First: 83 return Builder.getInt32(Lane); 84 } 85 llvm_unreachable("Unknown lane kind"); 86 } 87 88 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) 89 : SubclassID(SC), UnderlyingVal(UV), Def(Def) { 90 if (Def) 91 Def->addDefinedValue(this); 92 } 93 94 VPValue::~VPValue() { 95 assert(Users.empty() && "trying to delete a VPValue with remaining users"); 96 if (Def) 97 Def->removeDefinedValue(this); 98 } 99 100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 101 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { 102 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) 103 R->print(OS, "", SlotTracker); 104 else 105 printAsOperand(OS, SlotTracker); 106 } 107 108 void VPValue::dump() const { 109 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); 110 VPSlotTracker SlotTracker( 111 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 112 print(dbgs(), SlotTracker); 113 dbgs() << "\n"; 114 } 115 116 void VPDef::dump() const { 117 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); 118 VPSlotTracker SlotTracker( 119 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 120 print(dbgs(), "", SlotTracker); 121 dbgs() << "\n"; 122 } 123 #endif 124 125 VPRecipeBase *VPValue::getDefiningRecipe() { 126 return cast_or_null<VPRecipeBase>(Def); 127 } 128 129 const VPRecipeBase *VPValue::getDefiningRecipe() const { 130 return cast_or_null<VPRecipeBase>(Def); 131 } 132 133 // Get the top-most entry block of \p Start. This is the entry block of the 134 // containing VPlan. This function is templated to support both const and non-const blocks 135 template <typename T> static T *getPlanEntry(T *Start) { 136 T *Next = Start; 137 T *Current = Start; 138 while ((Next = Next->getParent())) 139 Current = Next; 140 141 SmallSetVector<T *, 8> WorkList; 142 WorkList.insert(Current); 143 144 for (unsigned i = 0; i < WorkList.size(); i++) { 145 T *Current = WorkList[i]; 146 if (Current->getNumPredecessors() == 0) 147 return Current; 148 auto &Predecessors = Current->getPredecessors(); 149 WorkList.insert_range(Predecessors); 150 } 151 152 llvm_unreachable("VPlan without any entry node without predecessors"); 153 } 154 155 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } 156 157 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } 158 159 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 160 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 161 const VPBlockBase *Block = this; 162 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 163 Block = Region->getEntry(); 164 return cast<VPBasicBlock>(Block); 165 } 166 167 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 168 VPBlockBase *Block = this; 169 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 170 Block = Region->getEntry(); 171 return cast<VPBasicBlock>(Block); 172 } 173 174 void VPBlockBase::setPlan(VPlan *ParentPlan) { 175 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry."); 176 Plan = ParentPlan; 177 } 178 179 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 180 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { 181 const VPBlockBase *Block = this; 182 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 183 Block = Region->getExiting(); 184 return cast<VPBasicBlock>(Block); 185 } 186 187 VPBasicBlock *VPBlockBase::getExitingBasicBlock() { 188 VPBlockBase *Block = this; 189 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 190 Block = Region->getExiting(); 191 return cast<VPBasicBlock>(Block); 192 } 193 194 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 195 if (!Successors.empty() || !Parent) 196 return this; 197 assert(Parent->getExiting() == this && 198 "Block w/o successors not the exiting block of its parent."); 199 return Parent->getEnclosingBlockWithSuccessors(); 200 } 201 202 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 203 if (!Predecessors.empty() || !Parent) 204 return this; 205 assert(Parent->getEntry() == this && 206 "Block w/o predecessors not the entry of its parent."); 207 return Parent->getEnclosingBlockWithPredecessors(); 208 } 209 210 bool VPBlockUtils::isHeader(const VPBlockBase *VPB, 211 const VPDominatorTree &VPDT) { 212 auto *VPBB = dyn_cast<VPBasicBlock>(VPB); 213 if (!VPBB) 214 return false; 215 216 // If VPBB is in a region R, VPBB is a loop header if R is a loop region with 217 // VPBB as its entry, i.e., free of predecessors. 218 if (auto *R = VPBB->getParent()) 219 return !R->isReplicator() && VPBB->getNumPredecessors() == 0; 220 221 // A header dominates its second predecessor (the latch), with the other 222 // predecessor being the preheader 223 return VPB->getPredecessors().size() == 2 && 224 VPDT.dominates(VPB, VPB->getPredecessors()[1]); 225 } 226 227 bool VPBlockUtils::isLatch(const VPBlockBase *VPB, 228 const VPDominatorTree &VPDT) { 229 // A latch has a header as its second successor, with its other successor 230 // leaving the loop. A preheader OTOH has a header as its first (and only) 231 // successor. 232 return VPB->getNumSuccessors() == 2 && 233 VPBlockUtils::isHeader(VPB->getSuccessors()[1], VPDT); 234 } 235 236 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { 237 iterator It = begin(); 238 while (It != end() && It->isPhi()) 239 It++; 240 return It; 241 } 242 243 VPTransformState::VPTransformState(const TargetTransformInfo *TTI, 244 ElementCount VF, LoopInfo *LI, 245 DominatorTree *DT, AssumptionCache *AC, 246 IRBuilderBase &Builder, VPlan *Plan, 247 Loop *CurrentParentLoop, Type *CanonicalIVTy) 248 : TTI(TTI), VF(VF), CFG(DT), LI(LI), AC(AC), Builder(Builder), Plan(Plan), 249 CurrentParentLoop(CurrentParentLoop), TypeAnalysis(CanonicalIVTy), 250 VPDT(*Plan) {} 251 252 Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) { 253 if (Def->isLiveIn()) 254 return Def->getLiveInIRValue(); 255 256 if (hasScalarValue(Def, Lane)) 257 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; 258 259 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) && 260 hasScalarValue(Def, VPLane::getFirstLane())) { 261 return Data.VPV2Scalars[Def][0]; 262 } 263 264 // Look through BuildVector to avoid redundant extracts. 265 // TODO: Remove once replicate regions are unrolled explicitly. 266 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) { 267 auto *BuildVector = cast<VPInstruction>(Def); 268 return get(BuildVector->getOperand(Lane.getKnownLane()), true); 269 } 270 271 assert(hasVectorValue(Def)); 272 auto *VecPart = Data.VPV2Vector[Def]; 273 if (!VecPart->getType()->isVectorTy()) { 274 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar"); 275 return VecPart; 276 } 277 // TODO: Cache created scalar values. 278 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF); 279 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV); 280 // set(Def, Extract, Instance); 281 return Extract; 282 } 283 284 Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { 285 if (NeedsScalar) { 286 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) || 287 !vputils::onlyFirstLaneUsed(Def) || 288 (hasScalarValue(Def, VPLane(0)) && 289 Data.VPV2Scalars[Def].size() == 1)) && 290 "Trying to access a single scalar per part but has multiple scalars " 291 "per part."); 292 return get(Def, VPLane(0)); 293 } 294 295 // If Values have been set for this Def return the one relevant for \p Part. 296 if (hasVectorValue(Def)) 297 return Data.VPV2Vector[Def]; 298 299 auto GetBroadcastInstrs = [this, Def](Value *V) { 300 bool SafeToHoist = 301 !Def->hasDefiningRecipe() || 302 VPDT.properlyDominates(Def->getDefiningRecipe()->getParent(), 303 Plan->getVectorPreheader()); 304 305 if (VF.isScalar()) 306 return V; 307 // Place the code for broadcasting invariant variables in the new preheader. 308 IRBuilder<>::InsertPointGuard Guard(Builder); 309 if (SafeToHoist) { 310 BasicBlock *LoopVectorPreHeader = 311 CFG.VPBB2IRBB[Plan->getVectorPreheader()]; 312 if (LoopVectorPreHeader) 313 Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); 314 } 315 316 // Place the code for broadcasting invariant variables in the new preheader. 317 // Broadcast the scalar into all locations in the vector. 318 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); 319 320 return Shuf; 321 }; 322 323 if (!hasScalarValue(Def, {0})) { 324 assert(Def->isLiveIn() && "expected a live-in"); 325 Value *IRV = Def->getLiveInIRValue(); 326 Value *B = GetBroadcastInstrs(IRV); 327 set(Def, B); 328 return B; 329 } 330 331 Value *ScalarValue = get(Def, VPLane(0)); 332 // If we aren't vectorizing, we can just copy the scalar map values over 333 // to the vector map. 334 if (VF.isScalar()) { 335 set(Def, ScalarValue); 336 return ScalarValue; 337 } 338 339 bool IsSingleScalar = vputils::isSingleScalar(Def); 340 341 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1); 342 // Check if there is a scalar value for the selected lane. 343 if (!hasScalarValue(Def, LastLane)) { 344 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and 345 // VPExpandSCEVRecipes can also be a single scalar. 346 assert((isa<VPWidenIntOrFpInductionRecipe, VPScalarIVStepsRecipe, 347 VPExpandSCEVRecipe>(Def->getDefiningRecipe())) && 348 "unexpected recipe found to be invariant"); 349 IsSingleScalar = true; 350 LastLane = 0; 351 } 352 353 auto *LastInst = cast<Instruction>(get(Def, LastLane)); 354 // Set the insert point after the last scalarized instruction or after the 355 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence 356 // will directly follow the scalar definitions. 357 auto OldIP = Builder.saveIP(); 358 auto NewIP = isa<PHINode>(LastInst) 359 ? LastInst->getParent()->getFirstNonPHIIt() 360 : std::next(BasicBlock::iterator(LastInst)); 361 Builder.SetInsertPoint(&*NewIP); 362 363 // However, if we are vectorizing, we need to construct the vector values. 364 // If the value is known to be uniform after vectorization, we can just 365 // broadcast the scalar value corresponding to lane zero. Otherwise, we 366 // construct the vector values using insertelement instructions. Since the 367 // resulting vectors are stored in State, we will only generate the 368 // insertelements once. 369 Value *VectorValue = nullptr; 370 if (IsSingleScalar) { 371 VectorValue = GetBroadcastInstrs(ScalarValue); 372 set(Def, VectorValue); 373 } else { 374 assert(!VF.isScalable() && "VF is assumed to be non scalable."); 375 // Initialize packing with insertelements to start from poison. 376 VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); 377 for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane) 378 VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane); 379 set(Def, VectorValue); 380 } 381 Builder.restoreIP(OldIP); 382 return VectorValue; 383 } 384 385 void VPTransformState::setDebugLocFrom(DebugLoc DL) { 386 const DILocation *DIL = DL; 387 // When a FSDiscriminator is enabled, we don't need to add the multiply 388 // factors to the discriminators. 389 if (DIL && 390 Builder.GetInsertBlock() 391 ->getParent() 392 ->shouldEmitDebugInfoForProfiling() && 393 !EnableFSDiscriminator) { 394 // FIXME: For scalable vectors, assume vscale=1. 395 unsigned UF = Plan->getUF(); 396 auto NewDIL = 397 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); 398 if (NewDIL) 399 Builder.SetCurrentDebugLocation(*NewDIL); 400 else 401 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " 402 << DIL->getFilename() << " Line: " << DIL->getLine()); 403 } else 404 Builder.SetCurrentDebugLocation(DL); 405 } 406 407 Value *VPTransformState::packScalarIntoVectorizedValue(const VPValue *Def, 408 Value *WideValue, 409 const VPLane &Lane) { 410 Value *ScalarInst = get(Def, Lane); 411 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF); 412 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) { 413 // We must handle each element of a vectorized struct type. 414 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) { 415 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I); 416 Value *VectorValue = Builder.CreateExtractValue(WideValue, I); 417 VectorValue = 418 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr); 419 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I); 420 } 421 } else { 422 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr); 423 } 424 return WideValue; 425 } 426 427 BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) { 428 auto &CFG = State.CFG; 429 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 430 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 431 BasicBlock *PrevBB = CFG.PrevBB; 432 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 433 PrevBB->getParent(), CFG.ExitBB); 434 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 435 436 return NewBB; 437 } 438 439 void VPBasicBlock::connectToPredecessors(VPTransformState &State) { 440 auto &CFG = State.CFG; 441 BasicBlock *NewBB = CFG.VPBB2IRBB[this]; 442 443 // Register NewBB in its loop. In innermost loops its the same for all 444 // BB's. 445 Loop *ParentLoop = State.CurrentParentLoop; 446 // If this block has a sole successor that is an exit block or is an exit 447 // block itself then it needs adding to the same parent loop as the exit 448 // block. 449 VPBlockBase *SuccOrExitVPB = getSingleSuccessor(); 450 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this; 451 if (State.Plan->isExitBlock(SuccOrExitVPB)) { 452 ParentLoop = State.LI->getLoopFor( 453 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock()); 454 } 455 456 if (ParentLoop && !State.LI->getLoopFor(NewBB)) 457 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI); 458 459 SmallVector<VPBlockBase *> Preds; 460 if (VPBlockUtils::isHeader(this, State.VPDT)) { 461 // There's no block for the latch yet, connect to the preheader only. 462 Preds = {getPredecessors()[0]}; 463 } else { 464 Preds = to_vector(getPredecessors()); 465 } 466 467 // Hook up the new basic block to its predecessors. 468 for (VPBlockBase *PredVPBlock : Preds) { 469 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); 470 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); 471 assert(CFG.VPBB2IRBB.contains(PredVPBB) && 472 "Predecessor basic-block not found building successor."); 473 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 474 auto *PredBBTerminator = PredBB->getTerminator(); 475 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n'); 476 477 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator); 478 if (isa<UnreachableInst>(PredBBTerminator)) { 479 assert(PredVPSuccessors.size() == 1 && 480 "Predecessor ending w/o branch must have single successor."); 481 DebugLoc DL = PredBBTerminator->getDebugLoc(); 482 PredBBTerminator->eraseFromParent(); 483 auto *Br = BranchInst::Create(NewBB, PredBB); 484 Br->setDebugLoc(DL); 485 } else if (TermBr && !TermBr->isConditional()) { 486 TermBr->setSuccessor(0, NewBB); 487 } else { 488 // Set each forward successor here when it is created, excluding 489 // backedges. A backward successor is set when the branch is created. 490 // Branches to VPIRBasicBlocks must have the same successors in VPlan as 491 // in the original IR, except when the predecessor is the entry block. 492 // This enables including SCEV and memory runtime check blocks in VPlan. 493 // TODO: Remove exception by modeling the terminator of entry block using 494 // BranchOnCond. 495 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 496 assert((TermBr && (!TermBr->getSuccessor(idx) || 497 (isa<VPIRBasicBlock>(this) && 498 (TermBr->getSuccessor(idx) == NewBB || 499 PredVPBlock == getPlan()->getEntry())))) && 500 "Trying to reset an existing successor block."); 501 TermBr->setSuccessor(idx, NewBB); 502 } 503 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}}); 504 } 505 } 506 507 void VPIRBasicBlock::execute(VPTransformState *State) { 508 assert(getHierarchicalSuccessors().size() <= 2 && 509 "VPIRBasicBlock can have at most two successors at the moment!"); 510 State->Builder.SetInsertPoint(IRBB->getTerminator()); 511 State->CFG.PrevBB = IRBB; 512 State->CFG.VPBB2IRBB[this] = IRBB; 513 executeRecipes(State, IRBB); 514 // Create a branch instruction to terminate IRBB if one was not created yet 515 // and is needed. 516 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) { 517 auto *Br = State->Builder.CreateBr(IRBB); 518 Br->setOperand(0, nullptr); 519 IRBB->getTerminator()->eraseFromParent(); 520 } else { 521 assert( 522 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) && 523 "other blocks must be terminated by a branch"); 524 } 525 526 connectToPredecessors(*State); 527 } 528 529 VPIRBasicBlock *VPIRBasicBlock::clone() { 530 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB); 531 for (VPRecipeBase &R : Recipes) 532 NewBlock->appendRecipe(R.clone()); 533 return NewBlock; 534 } 535 536 void VPBasicBlock::execute(VPTransformState *State) { 537 bool Replica = bool(State->Lane); 538 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 539 540 if (VPBlockUtils::isHeader(this, State->VPDT)) { 541 // Create and register the new vector loop. 542 Loop *PrevParentLoop = State->CurrentParentLoop; 543 State->CurrentParentLoop = State->LI->AllocateLoop(); 544 545 // Insert the new loop into the loop nest and register the new basic blocks 546 // before calling any utilities such as SCEV that require valid LoopInfo. 547 if (PrevParentLoop) 548 PrevParentLoop->addChildLoop(State->CurrentParentLoop); 549 else 550 State->LI->addTopLevelLoop(State->CurrentParentLoop); 551 } 552 553 auto IsReplicateRegion = [](VPBlockBase *BB) { 554 auto *R = dyn_cast_or_null<VPRegionBlock>(BB); 555 assert((!R || R->isReplicator()) && 556 "only replicate region blocks should remain"); 557 return R; 558 }; 559 // 1. Create an IR basic block. 560 if ((Replica && this == getParent()->getEntry()) || 561 IsReplicateRegion(getSingleHierarchicalPredecessor())) { 562 // Reuse the previous basic block if the current VPBB is either 563 // * the entry to a replicate region, or 564 // * the exit of a replicate region. 565 State->CFG.VPBB2IRBB[this] = NewBB; 566 } else { 567 NewBB = createEmptyBasicBlock(*State); 568 569 State->Builder.SetInsertPoint(NewBB); 570 // Temporarily terminate with unreachable until CFG is rewired. 571 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 572 State->Builder.SetInsertPoint(Terminator); 573 574 State->CFG.PrevBB = NewBB; 575 State->CFG.VPBB2IRBB[this] = NewBB; 576 connectToPredecessors(*State); 577 } 578 579 // 2. Fill the IR basic block with IR instructions. 580 executeRecipes(State, NewBB); 581 582 // If this block is a latch, update CurrentParentLoop. 583 if (VPBlockUtils::isLatch(this, State->VPDT)) 584 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop(); 585 } 586 587 VPBasicBlock *VPBasicBlock::clone() { 588 auto *NewBlock = getPlan()->createVPBasicBlock(getName()); 589 for (VPRecipeBase &R : *this) 590 NewBlock->appendRecipe(R.clone()); 591 return NewBlock; 592 } 593 594 void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { 595 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName() 596 << " in BB: " << BB->getName() << '\n'); 597 598 State->CFG.PrevVPBB = this; 599 600 for (VPRecipeBase &Recipe : Recipes) { 601 State->setDebugLocFrom(Recipe.getDebugLoc()); 602 Recipe.execute(*State); 603 } 604 605 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB); 606 } 607 608 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { 609 assert((SplitAt == end() || SplitAt->getParent() == this) && 610 "can only split at a position in the same block"); 611 612 // Create new empty block after the block to split. 613 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split"); 614 VPBlockUtils::insertBlockAfter(SplitBlock, this); 615 616 // Finally, move the recipes starting at SplitAt to new block. 617 for (VPRecipeBase &ToMove : 618 make_early_inc_range(make_range(SplitAt, this->end()))) 619 ToMove.moveBefore(*SplitBlock, SplitBlock->end()); 620 621 return SplitBlock; 622 } 623 624 /// Return the enclosing loop region for region \p P. The templated version is 625 /// used to support both const and non-const block arguments. 626 template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) { 627 if (P && P->isReplicator()) { 628 P = P->getParent(); 629 // Multiple loop regions can be nested, but replicate regions can only be 630 // nested inside a loop region or must be outside any other region. 631 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions"); 632 } 633 return P; 634 } 635 636 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { 637 return getEnclosingLoopRegionForRegion(getParent()); 638 } 639 640 const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const { 641 return getEnclosingLoopRegionForRegion(getParent()); 642 } 643 644 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { 645 if (VPBB->empty()) { 646 assert( 647 VPBB->getNumSuccessors() < 2 && 648 "block with multiple successors doesn't have a recipe as terminator"); 649 return false; 650 } 651 652 const VPRecipeBase *R = &VPBB->back(); 653 bool IsSwitch = isa<VPInstruction>(R) && 654 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch; 655 bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || 656 match(R, m_BranchOnCond(m_VPValue())) || 657 match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); 658 (void)IsCondBranch; 659 (void)IsSwitch; 660 if (VPBB->getNumSuccessors() == 2 || 661 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) { 662 assert((IsCondBranch || IsSwitch) && 663 "block with multiple successors not terminated by " 664 "conditional branch nor switch recipe"); 665 666 return true; 667 } 668 669 if (VPBB->getNumSuccessors() > 2) { 670 assert(IsSwitch && "block with more than 2 successors not terminated by " 671 "a switch recipe"); 672 return true; 673 } 674 675 assert( 676 !IsCondBranch && 677 "block with 0 or 1 successors terminated by conditional branch recipe"); 678 return false; 679 } 680 681 VPRecipeBase *VPBasicBlock::getTerminator() { 682 if (hasConditionalTerminator(this)) 683 return &back(); 684 return nullptr; 685 } 686 687 const VPRecipeBase *VPBasicBlock::getTerminator() const { 688 if (hasConditionalTerminator(this)) 689 return &back(); 690 return nullptr; 691 } 692 693 bool VPBasicBlock::isExiting() const { 694 return getParent() && getParent()->getExitingBasicBlock() == this; 695 } 696 697 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 698 void VPBlockBase::print(raw_ostream &O) const { 699 VPSlotTracker SlotTracker(getPlan()); 700 print(O, "", SlotTracker); 701 } 702 703 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { 704 if (getSuccessors().empty()) { 705 O << Indent << "No successors\n"; 706 } else { 707 O << Indent << "Successor(s): "; 708 ListSeparator LS; 709 for (auto *Succ : getSuccessors()) 710 O << LS << Succ->getName(); 711 O << '\n'; 712 } 713 } 714 715 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, 716 VPSlotTracker &SlotTracker) const { 717 O << Indent << getName() << ":\n"; 718 719 auto RecipeIndent = Indent + " "; 720 for (const VPRecipeBase &Recipe : *this) { 721 Recipe.print(O, RecipeIndent, SlotTracker); 722 O << '\n'; 723 } 724 725 printSuccessors(O, Indent); 726 } 727 #endif 728 729 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry); 730 731 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning 732 // the blocks and their recipes. Operands of cloned recipes will NOT be updated. 733 // Remapping of operands must be done separately. Returns a pair with the new 734 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a 735 // region, return nullptr for the exiting block. 736 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { 737 DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks; 738 VPBlockBase *Exiting = nullptr; 739 bool InRegion = Entry->getParent(); 740 // First, clone blocks reachable from Entry. 741 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) { 742 VPBlockBase *NewBB = BB->clone(); 743 Old2NewVPBlocks[BB] = NewBB; 744 if (InRegion && BB->getNumSuccessors() == 0) { 745 assert(!Exiting && "Multiple exiting blocks?"); 746 Exiting = BB; 747 } 748 } 749 assert((!InRegion || Exiting) && "regions must have a single exiting block"); 750 751 // Second, update the predecessors & successors of the cloned blocks. 752 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) { 753 VPBlockBase *NewBB = Old2NewVPBlocks[BB]; 754 SmallVector<VPBlockBase *> NewPreds; 755 for (VPBlockBase *Pred : BB->getPredecessors()) { 756 NewPreds.push_back(Old2NewVPBlocks[Pred]); 757 } 758 NewBB->setPredecessors(NewPreds); 759 SmallVector<VPBlockBase *> NewSuccs; 760 for (VPBlockBase *Succ : BB->successors()) { 761 NewSuccs.push_back(Old2NewVPBlocks[Succ]); 762 } 763 NewBB->setSuccessors(NewSuccs); 764 } 765 766 #if !defined(NDEBUG) 767 // Verify that the order of predecessors and successors matches in the cloned 768 // version. 769 for (const auto &[OldBB, NewBB] : 770 zip(vp_depth_first_shallow(Entry), 771 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) { 772 for (const auto &[OldPred, NewPred] : 773 zip(OldBB->getPredecessors(), NewBB->getPredecessors())) 774 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors"); 775 776 for (const auto &[OldSucc, NewSucc] : 777 zip(OldBB->successors(), NewBB->successors())) 778 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors"); 779 } 780 #endif 781 782 return std::make_pair(Old2NewVPBlocks[Entry], 783 Exiting ? Old2NewVPBlocks[Exiting] : nullptr); 784 } 785 786 VPRegionBlock *VPRegionBlock::clone() { 787 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); 788 auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, 789 getName(), isReplicator()); 790 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) 791 Block->setParent(NewRegion); 792 return NewRegion; 793 } 794 795 void VPRegionBlock::execute(VPTransformState *State) { 796 assert(isReplicator() && 797 "Loop regions should have been lowered to plain CFG"); 798 assert(!State->Lane && "Replicating a Region with non-null instance."); 799 assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); 800 801 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( 802 Entry); 803 State->Lane = VPLane(0); 804 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) { 805 State->Lane = VPLane(Lane, VPLane::Kind::First); 806 // Visit the VPBlocks connected to \p this, starting from it. 807 for (VPBlockBase *Block : RPOT) { 808 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 809 Block->execute(State); 810 } 811 } 812 813 // Exit replicating mode. 814 State->Lane.reset(); 815 } 816 817 InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { 818 InstructionCost Cost = 0; 819 for (VPRecipeBase &R : Recipes) 820 Cost += R.cost(VF, Ctx); 821 return Cost; 822 } 823 824 const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const { 825 const VPBlockBase *Pred = nullptr; 826 if (getNumPredecessors() > 0) { 827 Pred = getPredecessors()[Idx]; 828 } else { 829 auto *Region = getParent(); 830 assert(Region && !Region->isReplicator() && Region->getEntry() == this && 831 "must be in the entry block of a non-replicate region"); 832 assert(Idx < 2 && Region->getNumPredecessors() == 1 && 833 "loop region has a single predecessor (preheader), its entry block " 834 "has 2 incoming blocks"); 835 836 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the 837 // region itself whose exiting block feeds the phi across the backedge. 838 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region; 839 } 840 return Pred->getExitingBasicBlock(); 841 } 842 843 InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { 844 if (!isReplicator()) { 845 InstructionCost Cost = 0; 846 for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) 847 Cost += Block->cost(VF, Ctx); 848 InstructionCost BackedgeCost = 849 ForceTargetInstructionCost.getNumOccurrences() 850 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences()) 851 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind); 852 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF 853 << ": vector loop backedge\n"); 854 Cost += BackedgeCost; 855 return Cost; 856 } 857 858 // Compute the cost of a replicate region. Replicating isn't supported for 859 // scalable vectors, return an invalid cost for them. 860 // TODO: Discard scalable VPlans with replicate recipes earlier after 861 // construction. 862 if (VF.isScalable()) 863 return InstructionCost::getInvalid(); 864 865 // First compute the cost of the conditionally executed recipes, followed by 866 // account for the branching cost, except if the mask is a header mask or 867 // uniform condition. 868 using namespace llvm::VPlanPatternMatch; 869 VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]); 870 InstructionCost ThenCost = Then->cost(VF, Ctx); 871 872 // For the scalar case, we may not always execute the original predicated 873 // block, Thus, scale the block's cost by the probability of executing it. 874 if (VF.isScalar()) 875 return ThenCost / getPredBlockCostDivisor(Ctx.CostKind); 876 877 return ThenCost; 878 } 879 880 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 881 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, 882 VPSlotTracker &SlotTracker) const { 883 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; 884 auto NewIndent = Indent + " "; 885 for (auto *BlockBase : vp_depth_first_shallow(Entry)) { 886 O << '\n'; 887 BlockBase->print(O, NewIndent, SlotTracker); 888 } 889 O << Indent << "}\n"; 890 891 printSuccessors(O, Indent); 892 } 893 #endif 894 895 void VPRegionBlock::dissolveToCFGLoop() { 896 auto *Header = cast<VPBasicBlock>(getEntry()); 897 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) { 898 assert(this == getPlan()->getVectorLoopRegion() && 899 "Canonical IV must be in the entry of the top-level loop region"); 900 auto *ScalarR = VPBuilder(CanIV).createScalarPhi( 901 {CanIV->getStartValue(), CanIV->getBackedgeValue()}, 902 CanIV->getDebugLoc(), "index"); 903 CanIV->replaceAllUsesWith(ScalarR); 904 CanIV->eraseFromParent(); 905 } 906 907 VPBlockBase *Preheader = getSinglePredecessor(); 908 auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); 909 VPBlockBase *Middle = getSingleSuccessor(); 910 VPBlockUtils::disconnectBlocks(Preheader, this); 911 VPBlockUtils::disconnectBlocks(this, Middle); 912 913 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry)) 914 VPB->setParent(getParent()); 915 916 VPBlockUtils::connectBlocks(Preheader, Header); 917 VPBlockUtils::connectBlocks(ExitingLatch, Middle); 918 VPBlockUtils::connectBlocks(ExitingLatch, Header); 919 } 920 921 VPlan::VPlan(Loop *L) { 922 setEntry(createVPIRBasicBlock(L->getLoopPreheader())); 923 ScalarHeader = createVPIRBasicBlock(L->getHeader()); 924 925 SmallVector<BasicBlock *> IRExitBlocks; 926 L->getUniqueExitBlocks(IRExitBlocks); 927 for (BasicBlock *EB : IRExitBlocks) 928 ExitBlocks.push_back(createVPIRBasicBlock(EB)); 929 } 930 931 VPlan::~VPlan() { 932 VPValue DummyValue; 933 934 for (auto *VPB : CreatedBlocks) { 935 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) { 936 // Replace all operands of recipes and all VPValues defined in VPBB with 937 // DummyValue so the block can be deleted. 938 for (VPRecipeBase &R : *VPBB) { 939 for (auto *Def : R.definedValues()) 940 Def->replaceAllUsesWith(&DummyValue); 941 942 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) 943 R.setOperand(I, &DummyValue); 944 } 945 } 946 delete VPB; 947 } 948 for (VPValue *VPV : getLiveIns()) 949 delete VPV; 950 if (BackedgeTakenCount) 951 delete BackedgeTakenCount; 952 } 953 954 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, 955 VPTransformState &State) { 956 Type *TCTy = TripCountV->getType(); 957 // Check if the backedge taken count is needed, and if so build it. 958 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 959 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 960 auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1), 961 "trip.count.minus.1"); 962 BackedgeTakenCount->setUnderlyingValue(TCMO); 963 } 964 965 VectorTripCount.setUnderlyingValue(VectorTripCountV); 966 967 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 968 // FIXME: Model VF * UF computation completely in VPlan. 969 unsigned UF = getUF(); 970 if (VF.getNumUsers()) { 971 Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF); 972 VF.setUnderlyingValue(RuntimeVF); 973 VFxUF.setUnderlyingValue( 974 UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF)) 975 : RuntimeVF); 976 } else { 977 VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF)); 978 } 979 } 980 981 VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const { 982 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) { 983 return VPIRBB->getIRBasicBlock() == IRBB; 984 }); 985 assert(Iter != getExitBlocks().end() && "no exit block found"); 986 return *Iter; 987 } 988 989 bool VPlan::isExitBlock(VPBlockBase *VPBB) { 990 return is_contained(ExitBlocks, VPBB); 991 } 992 993 /// Generate the code inside the preheader and body of the vectorized loop. 994 /// Assumes a single pre-header basic-block was created for this. Introduce 995 /// additional basic-blocks as needed, and fill them all. 996 void VPlan::execute(VPTransformState *State) { 997 // Initialize CFG state. 998 State->CFG.PrevVPBB = nullptr; 999 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); 1000 1001 // Update VPDominatorTree since VPBasicBlock may be removed after State was 1002 // constructed. 1003 State->VPDT.recalculate(*this); 1004 1005 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. 1006 BasicBlock *VectorPreHeader = State->CFG.PrevBB; 1007 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); 1008 State->CFG.DTU.applyUpdates( 1009 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); 1010 1011 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF 1012 << ", UF=" << getUF() << '\n'); 1013 setName("Final VPlan"); 1014 LLVM_DEBUG(dump()); 1015 1016 // Disconnect scalar preheader and scalar header, as the dominator tree edge 1017 // will be updated as part of VPlan execution. This allows keeping the DTU 1018 // logic generic during VPlan execution. 1019 BasicBlock *ScalarPh = State->CFG.ExitBB; 1020 State->CFG.DTU.applyUpdates( 1021 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}}); 1022 1023 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( 1024 Entry); 1025 // Generate code for the VPlan, in parts of the vector skeleton, loop body and 1026 // successor blocks including the middle, exit and scalar preheader blocks. 1027 for (VPBlockBase *Block : RPOT) 1028 Block->execute(State); 1029 1030 State->CFG.DTU.flush(); 1031 1032 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT); 1033 if (!Header) 1034 return; 1035 1036 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]); 1037 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; 1038 1039 // Fix the latch value of canonical, reduction and first-order recurrences 1040 // phis in the vector loop. 1041 for (VPRecipeBase &R : Header->phis()) { 1042 // Skip phi-like recipes that generate their backedege values themselves. 1043 if (isa<VPWidenPHIRecipe>(&R)) 1044 continue; 1045 1046 if (auto *WidenPhi = dyn_cast<VPWidenPointerInductionRecipe>(&R)) { 1047 assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && 1048 "recipe generating only scalars should have been replaced"); 1049 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi)); 1050 PHINode *Phi = cast<PHINode>(GEP->getPointerOperand()); 1051 1052 Phi->setIncomingBlock(1, VectorLatchBB); 1053 1054 // Move the last step to the end of the latch block. This ensures 1055 // consistent placement of all induction updates. 1056 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); 1057 Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator())); 1058 continue; 1059 } 1060 1061 auto *PhiR = cast<VPSingleDefRecipe>(&R); 1062 // VPInstructions currently model scalar Phis only. 1063 bool NeedsScalar = isa<VPInstruction>(PhiR) || 1064 (isa<VPReductionPHIRecipe>(PhiR) && 1065 cast<VPReductionPHIRecipe>(PhiR)->isInLoop()); 1066 1067 Value *Phi = State->get(PhiR, NeedsScalar); 1068 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does 1069 // not. 1070 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar); 1071 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); 1072 } 1073 } 1074 1075 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { 1076 // For now only return the cost of the vector loop region, ignoring any other 1077 // blocks, like the preheader or middle blocks. 1078 InstructionCost Cost = getVectorLoopRegion()->cost(VF, Ctx); 1079 1080 // If any instructions in the middle block are invalid return invalid. 1081 // TODO: Remove once no VPlans with VF == vscale x 1 and first-order recurrences are created. 1082 if (!getMiddleBlock()->cost(VF, Ctx).isValid()) 1083 return InstructionCost::getInvalid(); 1084 1085 return Cost; 1086 } 1087 1088 VPRegionBlock *VPlan::getVectorLoopRegion() { 1089 // TODO: Cache if possible. 1090 for (VPBlockBase *B : vp_depth_first_shallow(getEntry())) 1091 if (auto *R = dyn_cast<VPRegionBlock>(B)) 1092 return R->isReplicator() ? nullptr : R; 1093 return nullptr; 1094 } 1095 1096 const VPRegionBlock *VPlan::getVectorLoopRegion() const { 1097 for (const VPBlockBase *B : vp_depth_first_shallow(getEntry())) 1098 if (auto *R = dyn_cast<VPRegionBlock>(B)) 1099 return R->isReplicator() ? nullptr : R; 1100 return nullptr; 1101 } 1102 1103 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1104 void VPlan::printLiveIns(raw_ostream &O) const { 1105 VPSlotTracker SlotTracker(this); 1106 1107 if (VF.getNumUsers() > 0) { 1108 O << "\nLive-in "; 1109 VF.printAsOperand(O, SlotTracker); 1110 O << " = VF"; 1111 } 1112 1113 if (VFxUF.getNumUsers() > 0) { 1114 O << "\nLive-in "; 1115 VFxUF.printAsOperand(O, SlotTracker); 1116 O << " = VF * UF"; 1117 } 1118 1119 if (VectorTripCount.getNumUsers() > 0) { 1120 O << "\nLive-in "; 1121 VectorTripCount.printAsOperand(O, SlotTracker); 1122 O << " = vector-trip-count"; 1123 } 1124 1125 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 1126 O << "\nLive-in "; 1127 BackedgeTakenCount->printAsOperand(O, SlotTracker); 1128 O << " = backedge-taken count"; 1129 } 1130 1131 O << "\n"; 1132 if (TripCount) { 1133 if (TripCount->isLiveIn()) 1134 O << "Live-in "; 1135 TripCount->printAsOperand(O, SlotTracker); 1136 O << " = original trip-count"; 1137 O << "\n"; 1138 } 1139 } 1140 1141 LLVM_DUMP_METHOD 1142 void VPlan::print(raw_ostream &O) const { 1143 VPSlotTracker SlotTracker(this); 1144 1145 O << "VPlan '" << getName() << "' {"; 1146 1147 printLiveIns(O); 1148 1149 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>> 1150 RPOT(getEntry()); 1151 for (const VPBlockBase *Block : RPOT) { 1152 O << '\n'; 1153 Block->print(O, "", SlotTracker); 1154 } 1155 1156 O << "}\n"; 1157 } 1158 1159 std::string VPlan::getName() const { 1160 std::string Out; 1161 raw_string_ostream RSO(Out); 1162 RSO << Name << " for "; 1163 if (!VFs.empty()) { 1164 RSO << "VF={" << VFs[0]; 1165 for (ElementCount VF : drop_begin(VFs)) 1166 RSO << "," << VF; 1167 RSO << "},"; 1168 } 1169 1170 if (UFs.empty()) { 1171 RSO << "UF>=1"; 1172 } else { 1173 RSO << "UF={" << UFs[0]; 1174 for (unsigned UF : drop_begin(UFs)) 1175 RSO << "," << UF; 1176 RSO << "}"; 1177 } 1178 1179 return Out; 1180 } 1181 1182 LLVM_DUMP_METHOD 1183 void VPlan::printDOT(raw_ostream &O) const { 1184 VPlanPrinter Printer(O, *this); 1185 Printer.dump(); 1186 } 1187 1188 LLVM_DUMP_METHOD 1189 void VPlan::dump() const { print(dbgs()); } 1190 #endif 1191 1192 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, 1193 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) { 1194 // Update the operands of all cloned recipes starting at NewEntry. This 1195 // traverses all reachable blocks. This is done in two steps, to handle cycles 1196 // in PHI recipes. 1197 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> 1198 OldDeepRPOT(Entry); 1199 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> 1200 NewDeepRPOT(NewEntry); 1201 // First, collect all mappings from old to new VPValues defined by cloned 1202 // recipes. 1203 for (const auto &[OldBB, NewBB] : 1204 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT), 1205 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) { 1206 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() && 1207 "blocks must have the same number of recipes"); 1208 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) { 1209 assert(OldR.getNumOperands() == NewR.getNumOperands() && 1210 "recipes must have the same number of operands"); 1211 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() && 1212 "recipes must define the same number of operands"); 1213 for (const auto &[OldV, NewV] : 1214 zip(OldR.definedValues(), NewR.definedValues())) 1215 Old2NewVPValues[OldV] = NewV; 1216 } 1217 } 1218 1219 // Update all operands to use cloned VPValues. 1220 for (VPBasicBlock *NewBB : 1221 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) { 1222 for (VPRecipeBase &NewR : *NewBB) 1223 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) { 1224 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I)); 1225 NewR.setOperand(I, NewOp); 1226 } 1227 } 1228 } 1229 1230 VPlan *VPlan::duplicate() { 1231 unsigned NumBlocksBeforeCloning = CreatedBlocks.size(); 1232 // Clone blocks. 1233 const auto &[NewEntry, __] = cloneFrom(Entry); 1234 1235 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); 1236 VPIRBasicBlock *NewScalarHeader = nullptr; 1237 if (getScalarHeader()->getNumPredecessors() == 0) { 1238 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB); 1239 } else { 1240 NewScalarHeader = cast<VPIRBasicBlock>(*find_if( 1241 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) { 1242 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB); 1243 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; 1244 })); 1245 } 1246 // Create VPlan, clone live-ins and remap operands in the cloned blocks. 1247 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader); 1248 DenseMap<VPValue *, VPValue *> Old2NewVPValues; 1249 for (VPValue *OldLiveIn : getLiveIns()) { 1250 Old2NewVPValues[OldLiveIn] = 1251 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue()); 1252 } 1253 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount; 1254 Old2NewVPValues[&VF] = &NewPlan->VF; 1255 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF; 1256 if (BackedgeTakenCount) { 1257 NewPlan->BackedgeTakenCount = new VPValue(); 1258 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount; 1259 } 1260 if (TripCount && TripCount->isLiveIn()) 1261 Old2NewVPValues[TripCount] = 1262 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue()); 1263 // else NewTripCount will be created and inserted into Old2NewVPValues when 1264 // TripCount is cloned. In any case NewPlan->TripCount is updated below. 1265 1266 remapOperands(Entry, NewEntry, Old2NewVPValues); 1267 1268 // Initialize remaining fields of cloned VPlan. 1269 NewPlan->VFs = VFs; 1270 NewPlan->UFs = UFs; 1271 // TODO: Adjust names. 1272 NewPlan->Name = Name; 1273 if (TripCount) { 1274 assert(Old2NewVPValues.contains(TripCount) && 1275 "TripCount must have been added to Old2NewVPValues"); 1276 NewPlan->TripCount = Old2NewVPValues[TripCount]; 1277 } 1278 1279 // Transfer all cloned blocks (the second half of all current blocks) from 1280 // current to new VPlan. 1281 unsigned NumBlocksAfterCloning = CreatedBlocks.size(); 1282 for (unsigned I : 1283 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning)) 1284 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]); 1285 CreatedBlocks.truncate(NumBlocksBeforeCloning); 1286 1287 // Update ExitBlocks of the new plan. 1288 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) { 1289 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) && 1290 VPB != NewScalarHeader) 1291 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB)); 1292 } 1293 1294 return NewPlan; 1295 } 1296 1297 VPIRBasicBlock *VPlan::createEmptyVPIRBasicBlock(BasicBlock *IRBB) { 1298 auto *VPIRBB = new VPIRBasicBlock(IRBB); 1299 CreatedBlocks.push_back(VPIRBB); 1300 return VPIRBB; 1301 } 1302 1303 VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) { 1304 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB); 1305 for (Instruction &I : 1306 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) 1307 VPIRBB->appendRecipe(VPIRInstruction::create(I)); 1308 return VPIRBB; 1309 } 1310 1311 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1312 1313 Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 1314 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 1315 Twine(getOrCreateBID(Block)); 1316 } 1317 1318 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 1319 const std::string &Name = Block->getName(); 1320 if (!Name.empty()) 1321 return Name; 1322 return "VPB" + Twine(getOrCreateBID(Block)); 1323 } 1324 1325 void VPlanPrinter::dump() { 1326 Depth = 1; 1327 bumpIndent(0); 1328 OS << "digraph VPlan {\n"; 1329 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 1330 if (!Plan.getName().empty()) 1331 OS << "\\n" << DOT::EscapeString(Plan.getName()); 1332 1333 { 1334 // Print live-ins. 1335 std::string Str; 1336 raw_string_ostream SS(Str); 1337 Plan.printLiveIns(SS); 1338 SmallVector<StringRef, 0> Lines; 1339 StringRef(Str).rtrim('\n').split(Lines, "\n"); 1340 for (auto Line : Lines) 1341 OS << DOT::EscapeString(Line.str()) << "\\n"; 1342 } 1343 1344 OS << "\"]\n"; 1345 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 1346 OS << "edge [fontname=Courier, fontsize=30]\n"; 1347 OS << "compound=true\n"; 1348 1349 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) 1350 dumpBlock(Block); 1351 1352 OS << "}\n"; 1353 } 1354 1355 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 1356 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 1357 dumpBasicBlock(BasicBlock); 1358 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1359 dumpRegion(Region); 1360 else 1361 llvm_unreachable("Unsupported kind of VPBlock."); 1362 } 1363 1364 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 1365 bool Hidden, const Twine &Label) { 1366 // Due to "dot" we print an edge between two regions as an edge between the 1367 // exiting basic block and the entry basic of the respective regions. 1368 const VPBlockBase *Tail = From->getExitingBasicBlock(); 1369 const VPBlockBase *Head = To->getEntryBasicBlock(); 1370 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 1371 OS << " [ label=\"" << Label << '\"'; 1372 if (Tail != From) 1373 OS << " ltail=" << getUID(From); 1374 if (Head != To) 1375 OS << " lhead=" << getUID(To); 1376 if (Hidden) 1377 OS << "; splines=none"; 1378 OS << "]\n"; 1379 } 1380 1381 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 1382 auto &Successors = Block->getSuccessors(); 1383 if (Successors.size() == 1) 1384 drawEdge(Block, Successors.front(), false, ""); 1385 else if (Successors.size() == 2) { 1386 drawEdge(Block, Successors.front(), false, "T"); 1387 drawEdge(Block, Successors.back(), false, "F"); 1388 } else { 1389 unsigned SuccessorNumber = 0; 1390 for (auto *Successor : Successors) 1391 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 1392 } 1393 } 1394 1395 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 1396 // Implement dot-formatted dump by performing plain-text dump into the 1397 // temporary storage followed by some post-processing. 1398 OS << Indent << getUID(BasicBlock) << " [label =\n"; 1399 bumpIndent(1); 1400 std::string Str; 1401 raw_string_ostream SS(Str); 1402 // Use no indentation as we need to wrap the lines into quotes ourselves. 1403 BasicBlock->print(SS, "", SlotTracker); 1404 1405 // We need to process each line of the output separately, so split 1406 // single-string plain-text dump. 1407 SmallVector<StringRef, 0> Lines; 1408 StringRef(Str).rtrim('\n').split(Lines, "\n"); 1409 1410 auto EmitLine = [&](StringRef Line, StringRef Suffix) { 1411 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; 1412 }; 1413 1414 // Don't need the "+" after the last line. 1415 for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) 1416 EmitLine(Line, " +\n"); 1417 EmitLine(Lines.back(), "\n"); 1418 1419 bumpIndent(-1); 1420 OS << Indent << "]\n"; 1421 1422 dumpEdges(BasicBlock); 1423 } 1424 1425 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 1426 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 1427 bumpIndent(1); 1428 OS << Indent << "fontname=Courier\n" 1429 << Indent << "label=\"" 1430 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 1431 << DOT::EscapeString(Region->getName()) << "\"\n"; 1432 // Dump the blocks of the region. 1433 assert(Region->getEntry() && "Region contains no inner blocks."); 1434 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry())) 1435 dumpBlock(Block); 1436 bumpIndent(-1); 1437 OS << Indent << "}\n"; 1438 dumpEdges(Region); 1439 } 1440 1441 #endif 1442 1443 /// Returns true if there is a vector loop region and \p VPV is defined in a 1444 /// loop region. 1445 static bool isDefinedInsideLoopRegions(const VPValue *VPV) { 1446 const VPRecipeBase *DefR = VPV->getDefiningRecipe(); 1447 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || 1448 DefR->getParent()->getEnclosingLoopRegion()); 1449 } 1450 1451 bool VPValue::isDefinedOutsideLoopRegions() const { 1452 return !isDefinedInsideLoopRegions(this); 1453 } 1454 void VPValue::replaceAllUsesWith(VPValue *New) { 1455 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); 1456 } 1457 1458 void VPValue::replaceUsesWithIf( 1459 VPValue *New, 1460 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) { 1461 // Note that this early exit is required for correctness; the implementation 1462 // below relies on the number of users for this VPValue to decrease, which 1463 // isn't the case if this == New. 1464 if (this == New) 1465 return; 1466 1467 for (unsigned J = 0; J < getNumUsers();) { 1468 VPUser *User = Users[J]; 1469 bool RemovedUser = false; 1470 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { 1471 if (User->getOperand(I) != this || !ShouldReplace(*User, I)) 1472 continue; 1473 1474 RemovedUser = true; 1475 User->setOperand(I, New); 1476 } 1477 // If a user got removed after updating the current user, the next user to 1478 // update will be moved to the current position, so we only need to 1479 // increment the index if the number of users did not change. 1480 if (!RemovedUser) 1481 J++; 1482 } 1483 } 1484 1485 void VPUser::replaceUsesOfWith(VPValue *From, VPValue *To) { 1486 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) { 1487 if (getOperand(Idx) == From) 1488 setOperand(Idx, To); 1489 } 1490 } 1491 1492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1493 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { 1494 OS << Tracker.getOrCreateName(this); 1495 } 1496 1497 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { 1498 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1499 Op->printAsOperand(O, SlotTracker); 1500 }); 1501 } 1502 #endif 1503 1504 void VPSlotTracker::assignName(const VPValue *V) { 1505 assert(!VPValue2Name.contains(V) && "VPValue already has a name!"); 1506 auto *UV = V->getUnderlyingValue(); 1507 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe()); 1508 if (!UV && !(VPI && !VPI->getName().empty())) { 1509 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str(); 1510 NextSlot++; 1511 return; 1512 } 1513 1514 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by 1515 // appending ".Number" to the name if there are multiple uses. 1516 std::string Name; 1517 if (UV) 1518 Name = getName(UV); 1519 else 1520 Name = VPI->getName(); 1521 1522 assert(!Name.empty() && "Name cannot be empty."); 1523 StringRef Prefix = UV ? "ir<" : "vp<%"; 1524 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str(); 1525 1526 // First assign the base name for V. 1527 const auto &[A, _] = VPValue2Name.insert({V, BaseName}); 1528 // Integer or FP constants with different types will result in he same string 1529 // due to stripping types. 1530 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV)) 1531 return; 1532 1533 // If it is already used by C > 0 other VPValues, increase the version counter 1534 // C and use it for V. 1535 const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0}); 1536 if (!UseInserted) { 1537 C->second++; 1538 A->second = (BaseName + Twine(".") + Twine(C->second)).str(); 1539 } 1540 } 1541 1542 void VPSlotTracker::assignNames(const VPlan &Plan) { 1543 if (Plan.VF.getNumUsers() > 0) 1544 assignName(&Plan.VF); 1545 if (Plan.VFxUF.getNumUsers() > 0) 1546 assignName(&Plan.VFxUF); 1547 assignName(&Plan.VectorTripCount); 1548 if (Plan.BackedgeTakenCount) 1549 assignName(Plan.BackedgeTakenCount); 1550 for (VPValue *LI : Plan.getLiveIns()) 1551 assignName(LI); 1552 1553 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> 1554 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); 1555 for (const VPBasicBlock *VPBB : 1556 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) 1557 assignNames(VPBB); 1558 } 1559 1560 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { 1561 for (const VPRecipeBase &Recipe : *VPBB) 1562 for (VPValue *Def : Recipe.definedValues()) 1563 assignName(Def); 1564 } 1565 1566 std::string VPSlotTracker::getName(const Value *V) { 1567 std::string Name; 1568 raw_string_ostream S(Name); 1569 if (V->hasName() || !isa<Instruction>(V)) { 1570 V->printAsOperand(S, false); 1571 return Name; 1572 } 1573 1574 if (!MST) { 1575 // Lazily create the ModuleSlotTracker when we first hit an unnamed 1576 // instruction. 1577 auto *I = cast<Instruction>(V); 1578 // This check is required to support unit tests with incomplete IR. 1579 if (I->getParent()) { 1580 MST = std::make_unique<ModuleSlotTracker>(I->getModule()); 1581 MST->incorporateFunction(*I->getFunction()); 1582 } else { 1583 MST = std::make_unique<ModuleSlotTracker>(nullptr); 1584 } 1585 } 1586 V->printAsOperand(S, false, *MST); 1587 return Name; 1588 } 1589 1590 std::string VPSlotTracker::getOrCreateName(const VPValue *V) const { 1591 std::string Name = VPValue2Name.lookup(V); 1592 if (!Name.empty()) 1593 return Name; 1594 1595 // If no name was assigned, no VPlan was provided when creating the slot 1596 // tracker or it is not reachable from the provided VPlan. This can happen, 1597 // e.g. when trying to print a recipe that has not been inserted into a VPlan 1598 // in a debugger. 1599 // TODO: Update VPSlotTracker constructor to assign names to recipes & 1600 // VPValues not associated with a VPlan, instead of constructing names ad-hoc 1601 // here. 1602 const VPRecipeBase *DefR = V->getDefiningRecipe(); 1603 (void)DefR; 1604 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) && 1605 "VPValue defined by a recipe in a VPlan?"); 1606 1607 // Use the underlying value's name, if there is one. 1608 if (auto *UV = V->getUnderlyingValue()) { 1609 std::string Name; 1610 raw_string_ostream S(Name); 1611 UV->printAsOperand(S, false); 1612 return (Twine("ir<") + Name + ">").str(); 1613 } 1614 1615 return "<badref>"; 1616 } 1617 1618 bool LoopVectorizationPlanner::getDecisionAndClampRange( 1619 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) { 1620 assert(!Range.isEmpty() && "Trying to test an empty VF range."); 1621 bool PredicateAtRangeStart = Predicate(Range.Start); 1622 1623 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End)) 1624 if (Predicate(TmpVF) != PredicateAtRangeStart) { 1625 Range.End = TmpVF; 1626 break; 1627 } 1628 1629 return PredicateAtRangeStart; 1630 } 1631 1632 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF, 1633 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range 1634 /// of VF's starting at a given VF and extending it as much as possible. Each 1635 /// vectorization decision can potentially shorten this sub-range during 1636 /// buildVPlan(). 1637 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, 1638 ElementCount MaxVF) { 1639 auto MaxVFTimes2 = MaxVF * 2; 1640 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { 1641 VFRange SubRange = {VF, MaxVFTimes2}; 1642 if (auto Plan = tryToBuildVPlan(SubRange)) { 1643 VPlanTransforms::optimize(*Plan); 1644 // Update the name of the latch of the top-level vector loop region region 1645 // after optimizations which includes block folding. 1646 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch"); 1647 VPlans.push_back(std::move(Plan)); 1648 } 1649 VF = SubRange.End; 1650 } 1651 } 1652 1653 VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const { 1654 assert(count_if(VPlans, 1655 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == 1656 1 && 1657 "Multiple VPlans for VF."); 1658 1659 for (const VPlanPtr &Plan : VPlans) { 1660 if (Plan->hasVF(VF)) 1661 return *Plan.get(); 1662 } 1663 llvm_unreachable("No plan found!"); 1664 } 1665 1666 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1667 void LoopVectorizationPlanner::printPlans(raw_ostream &O) { 1668 if (VPlans.empty()) { 1669 O << "LV: No VPlans built.\n"; 1670 return; 1671 } 1672 for (const auto &Plan : VPlans) 1673 if (PrintVPlansInDotFormat) 1674 Plan->printDOT(O); 1675 else 1676 Plan->print(O); 1677 } 1678 #endif 1679 1680 TargetTransformInfo::OperandValueInfo 1681 VPCostContext::getOperandInfo(VPValue *V) const { 1682 if (!V->isLiveIn()) 1683 return {}; 1684 1685 return TTI::getOperandInfo(V->getLiveInIRValue()); 1686 } 1687