1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the LLVM vectorization plan. It represents a candidate for 11 /// vectorization, allowing to plan and optimize how to vectorize a given loop 12 /// before generating LLVM-IR. 13 /// The vectorizer uses vectorization plans to estimate the costs of potential 14 /// candidates and if profitable to execute the desired plan, generating vector 15 /// LLVM-IR code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "VPlan.h" 20 #include "VPlanCFG.h" 21 #include "VPlanDominatorTree.h" 22 #include "llvm/ADT/DepthFirstIterator.h" 23 #include "llvm/ADT/PostOrderIterator.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/Analysis/LoopInfo.h" 29 #include "llvm/IR/BasicBlock.h" 30 #include "llvm/IR/CFG.h" 31 #include "llvm/IR/IRBuilder.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/GenericDomTreeConstruction.h" 40 #include "llvm/Support/GraphWriter.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 #include "llvm/Transforms/Utils/LoopVersioning.h" 44 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 45 #include <cassert> 46 #include <string> 47 #include <vector> 48 49 using namespace llvm; 50 51 namespace llvm { 52 extern cl::opt<bool> EnableVPlanNativePath; 53 } 54 55 #define DEBUG_TYPE "vplan" 56 57 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 58 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { 59 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); 60 VPSlotTracker SlotTracker( 61 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 62 V.print(OS, SlotTracker); 63 return OS; 64 } 65 #endif 66 67 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, 68 const ElementCount &VF) const { 69 switch (LaneKind) { 70 case VPLane::Kind::ScalableLast: 71 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane 72 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), 73 Builder.getInt32(VF.getKnownMinValue() - Lane)); 74 case VPLane::Kind::First: 75 return Builder.getInt32(Lane); 76 } 77 llvm_unreachable("Unknown lane kind"); 78 } 79 80 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) 81 : SubclassID(SC), UnderlyingVal(UV), Def(Def) { 82 if (Def) 83 Def->addDefinedValue(this); 84 } 85 86 VPValue::~VPValue() { 87 assert(Users.empty() && "trying to delete a VPValue with remaining users"); 88 if (Def) 89 Def->removeDefinedValue(this); 90 } 91 92 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 93 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { 94 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) 95 R->print(OS, "", SlotTracker); 96 else 97 printAsOperand(OS, SlotTracker); 98 } 99 100 void VPValue::dump() const { 101 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); 102 VPSlotTracker SlotTracker( 103 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 104 print(dbgs(), SlotTracker); 105 dbgs() << "\n"; 106 } 107 108 void VPDef::dump() const { 109 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); 110 VPSlotTracker SlotTracker( 111 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 112 print(dbgs(), "", SlotTracker); 113 dbgs() << "\n"; 114 } 115 #endif 116 117 VPRecipeBase *VPValue::getDefiningRecipe() { 118 return cast_or_null<VPRecipeBase>(Def); 119 } 120 121 const VPRecipeBase *VPValue::getDefiningRecipe() const { 122 return cast_or_null<VPRecipeBase>(Def); 123 } 124 125 // Get the top-most entry block of \p Start. This is the entry block of the 126 // containing VPlan. This function is templated to support both const and non-const blocks 127 template <typename T> static T *getPlanEntry(T *Start) { 128 T *Next = Start; 129 T *Current = Start; 130 while ((Next = Next->getParent())) 131 Current = Next; 132 133 SmallSetVector<T *, 8> WorkList; 134 WorkList.insert(Current); 135 136 for (unsigned i = 0; i < WorkList.size(); i++) { 137 T *Current = WorkList[i]; 138 if (Current->getNumPredecessors() == 0) 139 return Current; 140 auto &Predecessors = Current->getPredecessors(); 141 WorkList.insert(Predecessors.begin(), Predecessors.end()); 142 } 143 144 llvm_unreachable("VPlan without any entry node without predecessors"); 145 } 146 147 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } 148 149 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } 150 151 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 152 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 153 const VPBlockBase *Block = this; 154 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 155 Block = Region->getEntry(); 156 return cast<VPBasicBlock>(Block); 157 } 158 159 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 160 VPBlockBase *Block = this; 161 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 162 Block = Region->getEntry(); 163 return cast<VPBasicBlock>(Block); 164 } 165 166 void VPBlockBase::setPlan(VPlan *ParentPlan) { 167 assert( 168 (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && 169 "Can only set plan on its entry or preheader block."); 170 Plan = ParentPlan; 171 } 172 173 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 174 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { 175 const VPBlockBase *Block = this; 176 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 177 Block = Region->getExiting(); 178 return cast<VPBasicBlock>(Block); 179 } 180 181 VPBasicBlock *VPBlockBase::getExitingBasicBlock() { 182 VPBlockBase *Block = this; 183 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 184 Block = Region->getExiting(); 185 return cast<VPBasicBlock>(Block); 186 } 187 188 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 189 if (!Successors.empty() || !Parent) 190 return this; 191 assert(Parent->getExiting() == this && 192 "Block w/o successors not the exiting block of its parent."); 193 return Parent->getEnclosingBlockWithSuccessors(); 194 } 195 196 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 197 if (!Predecessors.empty() || !Parent) 198 return this; 199 assert(Parent->getEntry() == this && 200 "Block w/o predecessors not the entry of its parent."); 201 return Parent->getEnclosingBlockWithPredecessors(); 202 } 203 204 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 205 for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry))) 206 delete Block; 207 } 208 209 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { 210 iterator It = begin(); 211 while (It != end() && It->isPhi()) 212 It++; 213 return It; 214 } 215 216 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { 217 if (Def->isLiveIn()) 218 return Def->getLiveInIRValue(); 219 220 if (hasScalarValue(Def, Instance)) { 221 return Data 222 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)]; 223 } 224 225 assert(hasVectorValue(Def, Instance.Part)); 226 auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; 227 if (!VecPart->getType()->isVectorTy()) { 228 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); 229 return VecPart; 230 } 231 // TODO: Cache created scalar values. 232 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); 233 auto *Extract = Builder.CreateExtractElement(VecPart, Lane); 234 // set(Def, Extract, Instance); 235 return Extract; 236 } 237 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) { 238 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); 239 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; 240 } 241 242 void VPTransformState::addNewMetadata(Instruction *To, 243 const Instruction *Orig) { 244 // If the loop was versioned with memchecks, add the corresponding no-alias 245 // metadata. 246 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig))) 247 LVer->annotateInstWithNoAlias(To, Orig); 248 } 249 250 void VPTransformState::addMetadata(Instruction *To, Instruction *From) { 251 // No source instruction to transfer metadata from? 252 if (!From) 253 return; 254 255 propagateMetadata(To, From); 256 addNewMetadata(To, From); 257 } 258 259 void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) { 260 // No source instruction to transfer metadata from? 261 if (!From) 262 return; 263 264 for (Value *V : To) { 265 if (Instruction *I = dyn_cast<Instruction>(V)) 266 addMetadata(I, From); 267 } 268 } 269 270 void VPTransformState::setDebugLocFromInst(const Value *V) { 271 const Instruction *Inst = dyn_cast<Instruction>(V); 272 if (!Inst) { 273 Builder.SetCurrentDebugLocation(DebugLoc()); 274 return; 275 } 276 277 const DILocation *DIL = Inst->getDebugLoc(); 278 // When a FSDiscriminator is enabled, we don't need to add the multiply 279 // factors to the discriminators. 280 if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() && 281 !Inst->isDebugOrPseudoInst() && !EnableFSDiscriminator) { 282 // FIXME: For scalable vectors, assume vscale=1. 283 auto NewDIL = 284 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); 285 if (NewDIL) 286 Builder.SetCurrentDebugLocation(*NewDIL); 287 else 288 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " 289 << DIL->getFilename() << " Line: " << DIL->getLine()); 290 } else 291 Builder.SetCurrentDebugLocation(DIL); 292 } 293 294 BasicBlock * 295 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 296 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 297 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 298 BasicBlock *PrevBB = CFG.PrevBB; 299 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 300 PrevBB->getParent(), CFG.ExitBB); 301 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 302 303 // Hook up the new basic block to its predecessors. 304 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 305 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); 306 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); 307 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 308 309 assert(PredBB && "Predecessor basic-block not found building successor."); 310 auto *PredBBTerminator = PredBB->getTerminator(); 311 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 312 313 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator); 314 if (isa<UnreachableInst>(PredBBTerminator)) { 315 assert(PredVPSuccessors.size() == 1 && 316 "Predecessor ending w/o branch must have single successor."); 317 DebugLoc DL = PredBBTerminator->getDebugLoc(); 318 PredBBTerminator->eraseFromParent(); 319 auto *Br = BranchInst::Create(NewBB, PredBB); 320 Br->setDebugLoc(DL); 321 } else if (TermBr && !TermBr->isConditional()) { 322 TermBr->setSuccessor(0, NewBB); 323 } else { 324 // Set each forward successor here when it is created, excluding 325 // backedges. A backward successor is set when the branch is created. 326 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 327 assert(!TermBr->getSuccessor(idx) && 328 "Trying to reset an existing successor block."); 329 TermBr->setSuccessor(idx, NewBB); 330 } 331 } 332 return NewBB; 333 } 334 335 void VPBasicBlock::execute(VPTransformState *State) { 336 bool Replica = State->Instance && !State->Instance->isFirstIteration(); 337 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 338 VPBlockBase *SingleHPred = nullptr; 339 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 340 341 auto IsLoopRegion = [](VPBlockBase *BB) { 342 auto *R = dyn_cast<VPRegionBlock>(BB); 343 return R && !R->isReplicator(); 344 }; 345 346 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible. 347 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) { 348 // ExitBB can be re-used for the exit block of the Plan. 349 NewBB = State->CFG.ExitBB; 350 State->CFG.PrevBB = NewBB; 351 352 // Update the branch instruction in the predecessor to branch to ExitBB. 353 VPBlockBase *PredVPB = getSingleHierarchicalPredecessor(); 354 VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock(); 355 assert(PredVPB->getSingleSuccessor() == this && 356 "predecessor must have the current block as only successor"); 357 BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB]; 358 // The Exit block of a loop is always set to be successor 0 of the Exiting 359 // block. 360 cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB); 361 } else if (PrevVPBB && /* A */ 362 !((SingleHPred = getSingleHierarchicalPredecessor()) && 363 SingleHPred->getExitingBasicBlock() == PrevVPBB && 364 PrevVPBB->getSingleHierarchicalSuccessor() && 365 (SingleHPred->getParent() == getEnclosingLoopRegion() && 366 !IsLoopRegion(SingleHPred))) && /* B */ 367 !(Replica && getPredecessors().empty())) { /* C */ 368 // The last IR basic block is reused, as an optimization, in three cases: 369 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; 370 // B. when the current VPBB has a single (hierarchical) predecessor which 371 // is PrevVPBB and the latter has a single (hierarchical) successor which 372 // both are in the same non-replicator region; and 373 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 374 // is the exiting VPBB of this region from a previous instance, or the 375 // predecessor of this region. 376 377 NewBB = createEmptyBasicBlock(State->CFG); 378 State->Builder.SetInsertPoint(NewBB); 379 // Temporarily terminate with unreachable until CFG is rewired. 380 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 381 // Register NewBB in its loop. In innermost loops its the same for all 382 // BB's. 383 if (State->CurrentVectorLoop) 384 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); 385 State->Builder.SetInsertPoint(Terminator); 386 State->CFG.PrevBB = NewBB; 387 } 388 389 // 2. Fill the IR basic block with IR instructions. 390 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 391 << " in BB:" << NewBB->getName() << '\n'); 392 393 State->CFG.VPBB2IRBB[this] = NewBB; 394 State->CFG.PrevVPBB = this; 395 396 for (VPRecipeBase &Recipe : Recipes) 397 Recipe.execute(*State); 398 399 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); 400 } 401 402 void VPBasicBlock::dropAllReferences(VPValue *NewValue) { 403 for (VPRecipeBase &R : Recipes) { 404 for (auto *Def : R.definedValues()) 405 Def->replaceAllUsesWith(NewValue); 406 407 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) 408 R.setOperand(I, NewValue); 409 } 410 } 411 412 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { 413 assert((SplitAt == end() || SplitAt->getParent() == this) && 414 "can only split at a position in the same block"); 415 416 SmallVector<VPBlockBase *, 2> Succs(successors()); 417 // First, disconnect the current block from its successors. 418 for (VPBlockBase *Succ : Succs) 419 VPBlockUtils::disconnectBlocks(this, Succ); 420 421 // Create new empty block after the block to split. 422 auto *SplitBlock = new VPBasicBlock(getName() + ".split"); 423 VPBlockUtils::insertBlockAfter(SplitBlock, this); 424 425 // Add successors for block to split to new block. 426 for (VPBlockBase *Succ : Succs) 427 VPBlockUtils::connectBlocks(SplitBlock, Succ); 428 429 // Finally, move the recipes starting at SplitAt to new block. 430 for (VPRecipeBase &ToMove : 431 make_early_inc_range(make_range(SplitAt, this->end()))) 432 ToMove.moveBefore(*SplitBlock, SplitBlock->end()); 433 434 return SplitBlock; 435 } 436 437 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { 438 VPRegionBlock *P = getParent(); 439 if (P && P->isReplicator()) { 440 P = P->getParent(); 441 assert(!cast<VPRegionBlock>(P)->isReplicator() && 442 "unexpected nested replicate regions"); 443 } 444 return P; 445 } 446 447 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { 448 if (VPBB->empty()) { 449 assert( 450 VPBB->getNumSuccessors() < 2 && 451 "block with multiple successors doesn't have a recipe as terminator"); 452 return false; 453 } 454 455 const VPRecipeBase *R = &VPBB->back(); 456 auto *VPI = dyn_cast<VPInstruction>(R); 457 bool IsCondBranch = 458 isa<VPBranchOnMaskRecipe>(R) || 459 (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond || 460 VPI->getOpcode() == VPInstruction::BranchOnCount)); 461 (void)IsCondBranch; 462 463 if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) { 464 assert(IsCondBranch && "block with multiple successors not terminated by " 465 "conditional branch recipe"); 466 467 return true; 468 } 469 470 assert( 471 !IsCondBranch && 472 "block with 0 or 1 successors terminated by conditional branch recipe"); 473 return false; 474 } 475 476 VPRecipeBase *VPBasicBlock::getTerminator() { 477 if (hasConditionalTerminator(this)) 478 return &back(); 479 return nullptr; 480 } 481 482 const VPRecipeBase *VPBasicBlock::getTerminator() const { 483 if (hasConditionalTerminator(this)) 484 return &back(); 485 return nullptr; 486 } 487 488 bool VPBasicBlock::isExiting() const { 489 return getParent()->getExitingBasicBlock() == this; 490 } 491 492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 493 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { 494 if (getSuccessors().empty()) { 495 O << Indent << "No successors\n"; 496 } else { 497 O << Indent << "Successor(s): "; 498 ListSeparator LS; 499 for (auto *Succ : getSuccessors()) 500 O << LS << Succ->getName(); 501 O << '\n'; 502 } 503 } 504 505 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, 506 VPSlotTracker &SlotTracker) const { 507 O << Indent << getName() << ":\n"; 508 509 auto RecipeIndent = Indent + " "; 510 for (const VPRecipeBase &Recipe : *this) { 511 Recipe.print(O, RecipeIndent, SlotTracker); 512 O << '\n'; 513 } 514 515 printSuccessors(O, Indent); 516 } 517 #endif 518 519 void VPRegionBlock::dropAllReferences(VPValue *NewValue) { 520 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 521 // Drop all references in VPBasicBlocks and replace all uses with 522 // DummyValue. 523 Block->dropAllReferences(NewValue); 524 } 525 526 void VPRegionBlock::execute(VPTransformState *State) { 527 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 528 RPOT(Entry); 529 530 if (!isReplicator()) { 531 // Create and register the new vector loop. 532 Loop *PrevLoop = State->CurrentVectorLoop; 533 State->CurrentVectorLoop = State->LI->AllocateLoop(); 534 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; 535 Loop *ParentLoop = State->LI->getLoopFor(VectorPH); 536 537 // Insert the new loop into the loop nest and register the new basic blocks 538 // before calling any utilities such as SCEV that require valid LoopInfo. 539 if (ParentLoop) 540 ParentLoop->addChildLoop(State->CurrentVectorLoop); 541 else 542 State->LI->addTopLevelLoop(State->CurrentVectorLoop); 543 544 // Visit the VPBlocks connected to "this", starting from it. 545 for (VPBlockBase *Block : RPOT) { 546 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 547 Block->execute(State); 548 } 549 550 State->CurrentVectorLoop = PrevLoop; 551 return; 552 } 553 554 assert(!State->Instance && "Replicating a Region with non-null instance."); 555 556 // Enter replicating mode. 557 State->Instance = VPIteration(0, 0); 558 559 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { 560 State->Instance->Part = Part; 561 assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); 562 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; 563 ++Lane) { 564 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); 565 // Visit the VPBlocks connected to \p this, starting from it. 566 for (VPBlockBase *Block : RPOT) { 567 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 568 Block->execute(State); 569 } 570 } 571 } 572 573 // Exit replicating mode. 574 State->Instance.reset(); 575 } 576 577 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 578 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, 579 VPSlotTracker &SlotTracker) const { 580 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; 581 auto NewIndent = Indent + " "; 582 for (auto *BlockBase : vp_depth_first_shallow(Entry)) { 583 O << '\n'; 584 BlockBase->print(O, NewIndent, SlotTracker); 585 } 586 O << Indent << "}\n"; 587 588 printSuccessors(O, Indent); 589 } 590 #endif 591 592 VPlan::~VPlan() { 593 for (auto &KV : LiveOuts) 594 delete KV.second; 595 LiveOuts.clear(); 596 597 if (Entry) { 598 VPValue DummyValue; 599 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 600 Block->dropAllReferences(&DummyValue); 601 602 VPBlockBase::deleteCFG(Entry); 603 604 Preheader->dropAllReferences(&DummyValue); 605 delete Preheader; 606 } 607 for (VPValue *VPV : VPLiveInsToFree) 608 delete VPV; 609 if (BackedgeTakenCount) 610 delete BackedgeTakenCount; 611 } 612 613 VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { 614 VPBasicBlock *Preheader = new VPBasicBlock("ph"); 615 VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); 616 auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader); 617 Plan->TripCount = 618 vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); 619 return Plan; 620 } 621 622 VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() { 623 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); 624 for (VPRecipeBase &R : Header->phis()) { 625 if (isa<VPActiveLaneMaskPHIRecipe>(&R)) 626 return cast<VPActiveLaneMaskPHIRecipe>(&R); 627 } 628 return nullptr; 629 } 630 631 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, 632 Value *CanonicalIVStartValue, 633 VPTransformState &State, 634 bool IsEpilogueVectorization) { 635 // Check if the backedge taken count is needed, and if so build it. 636 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 637 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 638 auto *TCMO = Builder.CreateSub(TripCountV, 639 ConstantInt::get(TripCountV->getType(), 1), 640 "trip.count.minus.1"); 641 auto VF = State.VF; 642 Value *VTCMO = 643 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast"); 644 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 645 State.set(BackedgeTakenCount, VTCMO, Part); 646 } 647 648 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 649 State.set(&VectorTripCount, VectorTripCountV, Part); 650 651 // When vectorizing the epilogue loop, the canonical induction start value 652 // needs to be changed from zero to the value after the main vector loop. 653 // FIXME: Improve modeling for canonical IV start values in the epilogue loop. 654 if (CanonicalIVStartValue) { 655 VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue); 656 auto *IV = getCanonicalIV(); 657 assert(all_of(IV->users(), 658 [](const VPUser *U) { 659 if (isa<VPScalarIVStepsRecipe>(U) || 660 isa<VPDerivedIVRecipe>(U)) 661 return true; 662 auto *VPI = cast<VPInstruction>(U); 663 return VPI->getOpcode() == 664 VPInstruction::CanonicalIVIncrement || 665 VPI->getOpcode() == 666 VPInstruction::CanonicalIVIncrementNUW; 667 }) && 668 "the canonical IV should only be used by its increments or " 669 "ScalarIVSteps when resetting the start value"); 670 IV->setOperand(0, VPV); 671 } 672 } 673 674 /// Generate the code inside the preheader and body of the vectorized loop. 675 /// Assumes a single pre-header basic-block was created for this. Introduce 676 /// additional basic-blocks as needed, and fill them all. 677 void VPlan::execute(VPTransformState *State) { 678 // Set the reverse mapping from VPValues to Values for code generation. 679 for (auto &Entry : Value2VPValue) 680 State->VPValue2Value[Entry.second] = Entry.first; 681 682 // Initialize CFG state. 683 State->CFG.PrevVPBB = nullptr; 684 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); 685 BasicBlock *VectorPreHeader = State->CFG.PrevBB; 686 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); 687 688 // Generate code in the loop pre-header and body. 689 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 690 Block->execute(State); 691 692 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); 693 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; 694 695 // Fix the latch value of canonical, reduction and first-order recurrences 696 // phis in the vector loop. 697 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); 698 for (VPRecipeBase &R : Header->phis()) { 699 // Skip phi-like recipes that generate their backedege values themselves. 700 if (isa<VPWidenPHIRecipe>(&R)) 701 continue; 702 703 if (isa<VPWidenPointerInductionRecipe>(&R) || 704 isa<VPWidenIntOrFpInductionRecipe>(&R)) { 705 PHINode *Phi = nullptr; 706 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { 707 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0)); 708 } else { 709 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R); 710 // TODO: Split off the case that all users of a pointer phi are scalar 711 // from the VPWidenPointerInductionRecipe. 712 if (WidenPhi->onlyScalarsGenerated(State->VF)) 713 continue; 714 715 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0)); 716 Phi = cast<PHINode>(GEP->getPointerOperand()); 717 } 718 719 Phi->setIncomingBlock(1, VectorLatchBB); 720 721 // Move the last step to the end of the latch block. This ensures 722 // consistent placement of all induction updates. 723 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); 724 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); 725 continue; 726 } 727 728 auto *PhiR = cast<VPHeaderPHIRecipe>(&R); 729 // For canonical IV, first-order recurrences and in-order reduction phis, 730 // only a single part is generated, which provides the last part from the 731 // previous iteration. For non-ordered reductions all UF parts are 732 // generated. 733 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) || 734 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) || 735 (isa<VPReductionPHIRecipe>(PhiR) && 736 cast<VPReductionPHIRecipe>(PhiR)->isOrdered()); 737 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; 738 739 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 740 Value *Phi = State->get(PhiR, Part); 741 Value *Val = State->get(PhiR->getBackedgeValue(), 742 SinglePartNeeded ? State->UF - 1 : Part); 743 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); 744 } 745 } 746 747 // We do not attempt to preserve DT for outer loop vectorization currently. 748 if (!EnableVPlanNativePath) { 749 BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header]; 750 State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader); 751 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, 752 State->CFG.ExitBB); 753 } 754 } 755 756 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 757 LLVM_DUMP_METHOD 758 void VPlan::print(raw_ostream &O) const { 759 VPSlotTracker SlotTracker(this); 760 761 O << "VPlan '" << getName() << "' {"; 762 763 if (VectorTripCount.getNumUsers() > 0) { 764 O << "\nLive-in "; 765 VectorTripCount.printAsOperand(O, SlotTracker); 766 O << " = vector-trip-count"; 767 } 768 769 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 770 O << "\nLive-in "; 771 BackedgeTakenCount->printAsOperand(O, SlotTracker); 772 O << " = backedge-taken count"; 773 } 774 775 O << "\n"; 776 if (TripCount->isLiveIn()) 777 O << "Live-in "; 778 TripCount->printAsOperand(O, SlotTracker); 779 O << " = original trip-count"; 780 O << "\n"; 781 782 if (!getPreheader()->empty()) { 783 O << "\n"; 784 getPreheader()->print(O, "", SlotTracker); 785 } 786 787 for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) { 788 O << '\n'; 789 Block->print(O, "", SlotTracker); 790 } 791 792 if (!LiveOuts.empty()) 793 O << "\n"; 794 for (const auto &KV : LiveOuts) { 795 KV.second->print(O, SlotTracker); 796 } 797 798 O << "}\n"; 799 } 800 801 std::string VPlan::getName() const { 802 std::string Out; 803 raw_string_ostream RSO(Out); 804 RSO << Name << " for "; 805 if (!VFs.empty()) { 806 RSO << "VF={" << VFs[0]; 807 for (ElementCount VF : drop_begin(VFs)) 808 RSO << "," << VF; 809 RSO << "},"; 810 } 811 812 if (UFs.empty()) { 813 RSO << "UF>=1"; 814 } else { 815 RSO << "UF={" << UFs[0]; 816 for (unsigned UF : drop_begin(UFs)) 817 RSO << "," << UF; 818 RSO << "}"; 819 } 820 821 return Out; 822 } 823 824 LLVM_DUMP_METHOD 825 void VPlan::printDOT(raw_ostream &O) const { 826 VPlanPrinter Printer(O, *this); 827 Printer.dump(); 828 } 829 830 LLVM_DUMP_METHOD 831 void VPlan::dump() const { print(dbgs()); } 832 #endif 833 834 void VPlan::addLiveOut(PHINode *PN, VPValue *V) { 835 assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); 836 LiveOuts.insert({PN, new VPLiveOut(PN, V)}); 837 } 838 839 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, 840 BasicBlock *LoopLatchBB, 841 BasicBlock *LoopExitBB) { 842 // The vector body may be more than a single basic-block by this point. 843 // Update the dominator tree information inside the vector body by propagating 844 // it from header to latch, expecting only triangular control-flow, if any. 845 BasicBlock *PostDomSucc = nullptr; 846 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { 847 // Get the list of successors of this block. 848 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); 849 assert(Succs.size() <= 2 && 850 "Basic block in vector loop has more than 2 successors."); 851 PostDomSucc = Succs[0]; 852 if (Succs.size() == 1) { 853 assert(PostDomSucc->getSinglePredecessor() && 854 "PostDom successor has more than one predecessor."); 855 DT->addNewBlock(PostDomSucc, BB); 856 continue; 857 } 858 BasicBlock *InterimSucc = Succs[1]; 859 if (PostDomSucc->getSingleSuccessor() == InterimSucc) { 860 PostDomSucc = Succs[1]; 861 InterimSucc = Succs[0]; 862 } 863 assert(InterimSucc->getSingleSuccessor() == PostDomSucc && 864 "One successor of a basic block does not lead to the other."); 865 assert(InterimSucc->getSinglePredecessor() && 866 "Interim successor has more than one predecessor."); 867 assert(PostDomSucc->hasNPredecessors(2) && 868 "PostDom successor has more than two predecessors."); 869 DT->addNewBlock(InterimSucc, BB); 870 DT->addNewBlock(PostDomSucc, BB); 871 } 872 // Latch block is a new dominator for the loop exit. 873 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); 874 assert(DT->verify(DominatorTree::VerificationLevel::Fast)); 875 } 876 877 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 878 879 Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 880 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 881 Twine(getOrCreateBID(Block)); 882 } 883 884 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 885 const std::string &Name = Block->getName(); 886 if (!Name.empty()) 887 return Name; 888 return "VPB" + Twine(getOrCreateBID(Block)); 889 } 890 891 void VPlanPrinter::dump() { 892 Depth = 1; 893 bumpIndent(0); 894 OS << "digraph VPlan {\n"; 895 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 896 if (!Plan.getName().empty()) 897 OS << "\\n" << DOT::EscapeString(Plan.getName()); 898 if (Plan.BackedgeTakenCount) { 899 OS << ", where:\\n"; 900 Plan.BackedgeTakenCount->print(OS, SlotTracker); 901 OS << " := BackedgeTakenCount"; 902 } 903 OS << "\"]\n"; 904 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 905 OS << "edge [fontname=Courier, fontsize=30]\n"; 906 OS << "compound=true\n"; 907 908 dumpBlock(Plan.getPreheader()); 909 910 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) 911 dumpBlock(Block); 912 913 OS << "}\n"; 914 } 915 916 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 917 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 918 dumpBasicBlock(BasicBlock); 919 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 920 dumpRegion(Region); 921 else 922 llvm_unreachable("Unsupported kind of VPBlock."); 923 } 924 925 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 926 bool Hidden, const Twine &Label) { 927 // Due to "dot" we print an edge between two regions as an edge between the 928 // exiting basic block and the entry basic of the respective regions. 929 const VPBlockBase *Tail = From->getExitingBasicBlock(); 930 const VPBlockBase *Head = To->getEntryBasicBlock(); 931 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 932 OS << " [ label=\"" << Label << '\"'; 933 if (Tail != From) 934 OS << " ltail=" << getUID(From); 935 if (Head != To) 936 OS << " lhead=" << getUID(To); 937 if (Hidden) 938 OS << "; splines=none"; 939 OS << "]\n"; 940 } 941 942 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 943 auto &Successors = Block->getSuccessors(); 944 if (Successors.size() == 1) 945 drawEdge(Block, Successors.front(), false, ""); 946 else if (Successors.size() == 2) { 947 drawEdge(Block, Successors.front(), false, "T"); 948 drawEdge(Block, Successors.back(), false, "F"); 949 } else { 950 unsigned SuccessorNumber = 0; 951 for (auto *Successor : Successors) 952 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 953 } 954 } 955 956 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 957 // Implement dot-formatted dump by performing plain-text dump into the 958 // temporary storage followed by some post-processing. 959 OS << Indent << getUID(BasicBlock) << " [label =\n"; 960 bumpIndent(1); 961 std::string Str; 962 raw_string_ostream SS(Str); 963 // Use no indentation as we need to wrap the lines into quotes ourselves. 964 BasicBlock->print(SS, "", SlotTracker); 965 966 // We need to process each line of the output separately, so split 967 // single-string plain-text dump. 968 SmallVector<StringRef, 0> Lines; 969 StringRef(Str).rtrim('\n').split(Lines, "\n"); 970 971 auto EmitLine = [&](StringRef Line, StringRef Suffix) { 972 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; 973 }; 974 975 // Don't need the "+" after the last line. 976 for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) 977 EmitLine(Line, " +\n"); 978 EmitLine(Lines.back(), "\n"); 979 980 bumpIndent(-1); 981 OS << Indent << "]\n"; 982 983 dumpEdges(BasicBlock); 984 } 985 986 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 987 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 988 bumpIndent(1); 989 OS << Indent << "fontname=Courier\n" 990 << Indent << "label=\"" 991 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 992 << DOT::EscapeString(Region->getName()) << "\"\n"; 993 // Dump the blocks of the region. 994 assert(Region->getEntry() && "Region contains no inner blocks."); 995 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry())) 996 dumpBlock(Block); 997 bumpIndent(-1); 998 OS << Indent << "}\n"; 999 dumpEdges(Region); 1000 } 1001 1002 void VPlanIngredient::print(raw_ostream &O) const { 1003 if (auto *Inst = dyn_cast<Instruction>(V)) { 1004 if (!Inst->getType()->isVoidTy()) { 1005 Inst->printAsOperand(O, false); 1006 O << " = "; 1007 } 1008 O << Inst->getOpcodeName() << " "; 1009 unsigned E = Inst->getNumOperands(); 1010 if (E > 0) { 1011 Inst->getOperand(0)->printAsOperand(O, false); 1012 for (unsigned I = 1; I < E; ++I) 1013 Inst->getOperand(I)->printAsOperand(O << ", ", false); 1014 } 1015 } else // !Inst 1016 V->printAsOperand(O, false); 1017 } 1018 1019 #endif 1020 1021 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); 1022 1023 void VPValue::replaceAllUsesWith(VPValue *New) { 1024 for (unsigned J = 0; J < getNumUsers();) { 1025 VPUser *User = Users[J]; 1026 unsigned NumUsers = getNumUsers(); 1027 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) 1028 if (User->getOperand(I) == this) 1029 User->setOperand(I, New); 1030 // If a user got removed after updating the current user, the next user to 1031 // update will be moved to the current position, so we only need to 1032 // increment the index if the number of users did not change. 1033 if (NumUsers == getNumUsers()) 1034 J++; 1035 } 1036 } 1037 1038 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1039 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { 1040 if (const Value *UV = getUnderlyingValue()) { 1041 OS << "ir<"; 1042 UV->printAsOperand(OS, false); 1043 OS << ">"; 1044 return; 1045 } 1046 1047 unsigned Slot = Tracker.getSlot(this); 1048 if (Slot == unsigned(-1)) 1049 OS << "<badref>"; 1050 else 1051 OS << "vp<%" << Tracker.getSlot(this) << ">"; 1052 } 1053 1054 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { 1055 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1056 Op->printAsOperand(O, SlotTracker); 1057 }); 1058 } 1059 #endif 1060 1061 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, 1062 Old2NewTy &Old2New, 1063 InterleavedAccessInfo &IAI) { 1064 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 1065 RPOT(Region->getEntry()); 1066 for (VPBlockBase *Base : RPOT) { 1067 visitBlock(Base, Old2New, IAI); 1068 } 1069 } 1070 1071 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 1072 InterleavedAccessInfo &IAI) { 1073 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { 1074 for (VPRecipeBase &VPI : *VPBB) { 1075 if (isa<VPHeaderPHIRecipe>(&VPI)) 1076 continue; 1077 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); 1078 auto *VPInst = cast<VPInstruction>(&VPI); 1079 1080 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue()); 1081 if (!Inst) 1082 continue; 1083 auto *IG = IAI.getInterleaveGroup(Inst); 1084 if (!IG) 1085 continue; 1086 1087 auto NewIGIter = Old2New.find(IG); 1088 if (NewIGIter == Old2New.end()) 1089 Old2New[IG] = new InterleaveGroup<VPInstruction>( 1090 IG->getFactor(), IG->isReverse(), IG->getAlign()); 1091 1092 if (Inst == IG->getInsertPos()) 1093 Old2New[IG]->setInsertPos(VPInst); 1094 1095 InterleaveGroupMap[VPInst] = Old2New[IG]; 1096 InterleaveGroupMap[VPInst]->insertMember( 1097 VPInst, IG->getIndex(Inst), 1098 Align(IG->isReverse() ? (-1) * int(IG->getFactor()) 1099 : IG->getFactor())); 1100 } 1101 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1102 visitRegion(Region, Old2New, IAI); 1103 else 1104 llvm_unreachable("Unsupported kind of VPBlock."); 1105 } 1106 1107 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, 1108 InterleavedAccessInfo &IAI) { 1109 Old2NewTy Old2New; 1110 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI); 1111 } 1112 1113 void VPSlotTracker::assignSlot(const VPValue *V) { 1114 assert(!Slots.contains(V) && "VPValue already has a slot!"); 1115 Slots[V] = NextSlot++; 1116 } 1117 1118 void VPSlotTracker::assignSlots(const VPlan &Plan) { 1119 assignSlot(&Plan.VectorTripCount); 1120 if (Plan.BackedgeTakenCount) 1121 assignSlot(Plan.BackedgeTakenCount); 1122 assignSlots(Plan.getPreheader()); 1123 1124 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> 1125 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); 1126 for (const VPBasicBlock *VPBB : 1127 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) 1128 assignSlots(VPBB); 1129 } 1130 1131 void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) { 1132 for (const VPRecipeBase &Recipe : *VPBB) 1133 for (VPValue *Def : Recipe.definedValues()) 1134 assignSlot(Def); 1135 } 1136 1137 bool vputils::onlyFirstLaneUsed(VPValue *Def) { 1138 return all_of(Def->users(), 1139 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); }); 1140 } 1141 1142 VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, 1143 ScalarEvolution &SE) { 1144 if (auto *Expanded = Plan.getSCEVExpansion(Expr)) 1145 return Expanded; 1146 VPValue *Expanded = nullptr; 1147 if (auto *E = dyn_cast<SCEVConstant>(Expr)) 1148 Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); 1149 else if (auto *E = dyn_cast<SCEVUnknown>(Expr)) 1150 Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); 1151 else { 1152 Expanded = new VPExpandSCEVRecipe(Expr, SE); 1153 Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe()); 1154 } 1155 Plan.addSCEVExpansion(Expr, Expanded); 1156 return Expanded; 1157 } 1158