1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 31 #include <cassert> 32 33 using namespace llvm; 34 35 using VectorParts = SmallVector<Value *, 2>; 36 37 extern cl::opt<bool> EnableVPlanNativePath; 38 39 #define LV_NAME "loop-vectorize" 40 #define DEBUG_TYPE LV_NAME 41 42 bool VPRecipeBase::mayWriteToMemory() const { 43 switch (getVPDefID()) { 44 case VPWidenMemoryInstructionSC: { 45 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 46 } 47 case VPReplicateSC: 48 case VPWidenCallSC: 49 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 50 ->mayWriteToMemory(); 51 case VPBranchOnMaskSC: 52 return false; 53 case VPWidenIntOrFpInductionSC: 54 case VPWidenCanonicalIVSC: 55 case VPWidenPHISC: 56 case VPBlendSC: 57 case VPWidenSC: 58 case VPWidenGEPSC: 59 case VPReductionSC: 60 case VPWidenSelectSC: { 61 const Instruction *I = 62 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 63 (void)I; 64 assert((!I || !I->mayWriteToMemory()) && 65 "underlying instruction may write to memory"); 66 return false; 67 } 68 default: 69 return true; 70 } 71 } 72 73 bool VPRecipeBase::mayReadFromMemory() const { 74 switch (getVPDefID()) { 75 case VPWidenMemoryInstructionSC: { 76 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 77 } 78 case VPReplicateSC: 79 case VPWidenCallSC: 80 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 81 ->mayReadFromMemory(); 82 case VPBranchOnMaskSC: 83 return false; 84 case VPWidenIntOrFpInductionSC: 85 case VPWidenCanonicalIVSC: 86 case VPWidenPHISC: 87 case VPBlendSC: 88 case VPWidenSC: 89 case VPWidenGEPSC: 90 case VPReductionSC: 91 case VPWidenSelectSC: { 92 const Instruction *I = 93 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 94 (void)I; 95 assert((!I || !I->mayReadFromMemory()) && 96 "underlying instruction may read from memory"); 97 return false; 98 } 99 default: 100 return true; 101 } 102 } 103 104 bool VPRecipeBase::mayHaveSideEffects() const { 105 switch (getVPDefID()) { 106 case VPWidenIntOrFpInductionSC: 107 case VPWidenPointerInductionSC: 108 case VPWidenCanonicalIVSC: 109 case VPWidenPHISC: 110 case VPBlendSC: 111 case VPWidenSC: 112 case VPWidenGEPSC: 113 case VPReductionSC: 114 case VPWidenSelectSC: 115 case VPScalarIVStepsSC: { 116 const Instruction *I = 117 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 118 (void)I; 119 assert((!I || !I->mayHaveSideEffects()) && 120 "underlying instruction has side-effects"); 121 return false; 122 } 123 case VPReplicateSC: { 124 auto *R = cast<VPReplicateRecipe>(this); 125 return R->getUnderlyingInstr()->mayHaveSideEffects(); 126 } 127 default: 128 return true; 129 } 130 } 131 132 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 133 auto Lane = VPLane::getLastLaneForVF(State.VF); 134 VPValue *ExitValue = getOperand(0); 135 if (Plan.isUniformAfterVectorization(ExitValue)) 136 Lane = VPLane::getFirstLane(); 137 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 138 State.Builder.GetInsertBlock()); 139 } 140 141 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 142 assert(!Parent && "Recipe already in some VPBasicBlock"); 143 assert(InsertPos->getParent() && 144 "Insertion position not in any VPBasicBlock"); 145 Parent = InsertPos->getParent(); 146 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 147 } 148 149 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 150 iplist<VPRecipeBase>::iterator I) { 151 assert(!Parent && "Recipe already in some VPBasicBlock"); 152 assert(I == BB.end() || I->getParent() == &BB); 153 Parent = &BB; 154 BB.getRecipeList().insert(I, this); 155 } 156 157 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 158 assert(!Parent && "Recipe already in some VPBasicBlock"); 159 assert(InsertPos->getParent() && 160 "Insertion position not in any VPBasicBlock"); 161 Parent = InsertPos->getParent(); 162 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 163 } 164 165 void VPRecipeBase::removeFromParent() { 166 assert(getParent() && "Recipe not in any VPBasicBlock"); 167 getParent()->getRecipeList().remove(getIterator()); 168 Parent = nullptr; 169 } 170 171 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 172 assert(getParent() && "Recipe not in any VPBasicBlock"); 173 return getParent()->getRecipeList().erase(getIterator()); 174 } 175 176 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 177 removeFromParent(); 178 insertAfter(InsertPos); 179 } 180 181 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 182 iplist<VPRecipeBase>::iterator I) { 183 removeFromParent(); 184 insertBefore(BB, I); 185 } 186 187 void VPInstruction::generateInstruction(VPTransformState &State, 188 unsigned Part) { 189 IRBuilderBase &Builder = State.Builder; 190 Builder.SetCurrentDebugLocation(DL); 191 192 if (Instruction::isBinaryOp(getOpcode())) { 193 Value *A = State.get(getOperand(0), Part); 194 Value *B = State.get(getOperand(1), Part); 195 Value *V = 196 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 197 State.set(this, V, Part); 198 return; 199 } 200 201 switch (getOpcode()) { 202 case VPInstruction::Not: { 203 Value *A = State.get(getOperand(0), Part); 204 Value *V = Builder.CreateNot(A, Name); 205 State.set(this, V, Part); 206 break; 207 } 208 case VPInstruction::ICmpULE: { 209 Value *IV = State.get(getOperand(0), Part); 210 Value *TC = State.get(getOperand(1), Part); 211 Value *V = Builder.CreateICmpULE(IV, TC, Name); 212 State.set(this, V, Part); 213 break; 214 } 215 case Instruction::Select: { 216 Value *Cond = State.get(getOperand(0), Part); 217 Value *Op1 = State.get(getOperand(1), Part); 218 Value *Op2 = State.get(getOperand(2), Part); 219 Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); 220 State.set(this, V, Part); 221 break; 222 } 223 case VPInstruction::ActiveLaneMask: { 224 // Get first lane of vector induction variable. 225 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 226 // Get the original loop tripcount. 227 Value *ScalarTC = State.get(getOperand(1), Part); 228 229 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 230 auto *PredTy = VectorType::get(Int1Ty, State.VF); 231 Instruction *Call = Builder.CreateIntrinsic( 232 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 233 {VIVElem0, ScalarTC}, nullptr, Name); 234 State.set(this, Call, Part); 235 break; 236 } 237 case VPInstruction::FirstOrderRecurrenceSplice: { 238 // Generate code to combine the previous and current values in vector v3. 239 // 240 // vector.ph: 241 // v_init = vector(..., ..., ..., a[-1]) 242 // br vector.body 243 // 244 // vector.body 245 // i = phi [0, vector.ph], [i+4, vector.body] 246 // v1 = phi [v_init, vector.ph], [v2, vector.body] 247 // v2 = a[i, i+1, i+2, i+3]; 248 // v3 = vector(v1(3), v2(0, 1, 2)) 249 250 // For the first part, use the recurrence phi (v1), otherwise v2. 251 auto *V1 = State.get(getOperand(0), 0); 252 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 253 if (!PartMinus1->getType()->isVectorTy()) { 254 State.set(this, PartMinus1, Part); 255 } else { 256 Value *V2 = State.get(getOperand(1), Part); 257 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), 258 Part); 259 } 260 break; 261 } 262 case VPInstruction::CanonicalIVIncrement: 263 case VPInstruction::CanonicalIVIncrementNUW: { 264 Value *Next = nullptr; 265 if (Part == 0) { 266 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 267 auto *Phi = State.get(getOperand(0), 0); 268 // The loop step is equal to the vectorization factor (num of SIMD 269 // elements) times the unroll factor (num of SIMD instructions). 270 Value *Step = 271 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 272 Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); 273 } else { 274 Next = State.get(this, 0); 275 } 276 277 State.set(this, Next, Part); 278 break; 279 } 280 281 case VPInstruction::CanonicalIVIncrementForPart: 282 case VPInstruction::CanonicalIVIncrementForPartNUW: { 283 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; 284 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 285 if (Part == 0) { 286 State.set(this, IV, Part); 287 break; 288 } 289 290 // The canonical IV is incremented by the vectorization factor (num of SIMD 291 // elements) times the unroll part. 292 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 293 Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); 294 State.set(this, Next, Part); 295 break; 296 } 297 case VPInstruction::BranchOnCond: { 298 if (Part != 0) 299 break; 300 301 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 302 VPRegionBlock *ParentRegion = getParent()->getParent(); 303 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 304 305 // Replace the temporary unreachable terminator with a new conditional 306 // branch, hooking it up to backward destination for exiting blocks now and 307 // to forward destination(s) later when they are created. 308 BranchInst *CondBr = 309 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 310 311 if (getParent()->isExiting()) 312 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 313 314 CondBr->setSuccessor(0, nullptr); 315 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 316 break; 317 } 318 case VPInstruction::BranchOnCount: { 319 if (Part != 0) 320 break; 321 // First create the compare. 322 Value *IV = State.get(getOperand(0), Part); 323 Value *TC = State.get(getOperand(1), Part); 324 Value *Cond = Builder.CreateICmpEQ(IV, TC); 325 326 // Now create the branch. 327 auto *Plan = getParent()->getPlan(); 328 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 329 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 330 331 // Replace the temporary unreachable terminator with a new conditional 332 // branch, hooking it up to backward destination (the header) now and to the 333 // forward destination (the exit/middle block) later when it is created. 334 // Note that CreateCondBr expects a valid BB as first argument, so we need 335 // to set it to nullptr later. 336 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 337 State.CFG.VPBB2IRBB[Header]); 338 CondBr->setSuccessor(0, nullptr); 339 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 340 break; 341 } 342 default: 343 llvm_unreachable("Unsupported opcode for instruction"); 344 } 345 } 346 347 void VPInstruction::execute(VPTransformState &State) { 348 assert(!State.Instance && "VPInstruction executing an Instance"); 349 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 350 State.Builder.setFastMathFlags(FMF); 351 for (unsigned Part = 0; Part < State.UF; ++Part) 352 generateInstruction(State, Part); 353 } 354 355 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 356 void VPInstruction::dump() const { 357 VPSlotTracker SlotTracker(getParent()->getPlan()); 358 print(dbgs(), "", SlotTracker); 359 } 360 361 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 362 VPSlotTracker &SlotTracker) const { 363 O << Indent << "EMIT "; 364 365 if (hasResult()) { 366 printAsOperand(O, SlotTracker); 367 O << " = "; 368 } 369 370 switch (getOpcode()) { 371 case VPInstruction::Not: 372 O << "not"; 373 break; 374 case VPInstruction::ICmpULE: 375 O << "icmp ule"; 376 break; 377 case VPInstruction::SLPLoad: 378 O << "combined load"; 379 break; 380 case VPInstruction::SLPStore: 381 O << "combined store"; 382 break; 383 case VPInstruction::ActiveLaneMask: 384 O << "active lane mask"; 385 break; 386 case VPInstruction::FirstOrderRecurrenceSplice: 387 O << "first-order splice"; 388 break; 389 case VPInstruction::CanonicalIVIncrement: 390 O << "VF * UF + "; 391 break; 392 case VPInstruction::CanonicalIVIncrementNUW: 393 O << "VF * UF +(nuw) "; 394 break; 395 case VPInstruction::BranchOnCond: 396 O << "branch-on-cond"; 397 break; 398 case VPInstruction::CanonicalIVIncrementForPart: 399 O << "VF * Part + "; 400 break; 401 case VPInstruction::CanonicalIVIncrementForPartNUW: 402 O << "VF * Part +(nuw) "; 403 break; 404 case VPInstruction::BranchOnCount: 405 O << "branch-on-count "; 406 break; 407 default: 408 O << Instruction::getOpcodeName(getOpcode()); 409 } 410 411 O << FMF; 412 413 for (const VPValue *Operand : operands()) { 414 O << " "; 415 Operand->printAsOperand(O, SlotTracker); 416 } 417 418 if (DL) { 419 O << ", !dbg "; 420 DL.print(O); 421 } 422 } 423 #endif 424 425 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 426 // Make sure the VPInstruction is a floating-point operation. 427 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 428 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 429 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 430 Opcode == Instruction::FCmp) && 431 "this op can't take fast-math flags"); 432 FMF = FMFNew; 433 } 434 435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 436 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 437 VPSlotTracker &SlotTracker) const { 438 O << Indent << "WIDEN-CALL "; 439 440 auto *CI = cast<CallInst>(getUnderlyingInstr()); 441 if (CI->getType()->isVoidTy()) 442 O << "void "; 443 else { 444 printAsOperand(O, SlotTracker); 445 O << " = "; 446 } 447 448 O << "call @" << CI->getCalledFunction()->getName() << "("; 449 printOperands(O, SlotTracker); 450 O << ")"; 451 } 452 453 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 454 VPSlotTracker &SlotTracker) const { 455 O << Indent << "WIDEN-SELECT "; 456 printAsOperand(O, SlotTracker); 457 O << " = select "; 458 getOperand(0)->printAsOperand(O, SlotTracker); 459 O << ", "; 460 getOperand(1)->printAsOperand(O, SlotTracker); 461 O << ", "; 462 getOperand(2)->printAsOperand(O, SlotTracker); 463 O << (InvariantCond ? " (condition is loop invariant)" : ""); 464 } 465 #endif 466 467 void VPWidenSelectRecipe::execute(VPTransformState &State) { 468 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 469 State.setDebugLocFromInst(&I); 470 471 // The condition can be loop invariant but still defined inside the 472 // loop. This means that we can't just use the original 'cond' value. 473 // We have to take the 'vectorized' value and pick the first lane. 474 // Instcombine will make this a no-op. 475 auto *InvarCond = 476 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 477 478 for (unsigned Part = 0; Part < State.UF; ++Part) { 479 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 480 Value *Op0 = State.get(getOperand(1), Part); 481 Value *Op1 = State.get(getOperand(2), Part); 482 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 483 State.set(this, Sel, Part); 484 State.addMetadata(Sel, &I); 485 } 486 } 487 488 void VPWidenRecipe::execute(VPTransformState &State) { 489 auto &I = *cast<Instruction>(getUnderlyingValue()); 490 auto &Builder = State.Builder; 491 switch (I.getOpcode()) { 492 case Instruction::Call: 493 case Instruction::Br: 494 case Instruction::PHI: 495 case Instruction::GetElementPtr: 496 case Instruction::Select: 497 llvm_unreachable("This instruction is handled by a different recipe."); 498 case Instruction::UDiv: 499 case Instruction::SDiv: 500 case Instruction::SRem: 501 case Instruction::URem: 502 case Instruction::Add: 503 case Instruction::FAdd: 504 case Instruction::Sub: 505 case Instruction::FSub: 506 case Instruction::FNeg: 507 case Instruction::Mul: 508 case Instruction::FMul: 509 case Instruction::FDiv: 510 case Instruction::FRem: 511 case Instruction::Shl: 512 case Instruction::LShr: 513 case Instruction::AShr: 514 case Instruction::And: 515 case Instruction::Or: 516 case Instruction::Xor: { 517 // Just widen unops and binops. 518 State.setDebugLocFromInst(&I); 519 520 for (unsigned Part = 0; Part < State.UF; ++Part) { 521 SmallVector<Value *, 2> Ops; 522 for (VPValue *VPOp : operands()) 523 Ops.push_back(State.get(VPOp, Part)); 524 525 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 526 527 if (auto *VecOp = dyn_cast<Instruction>(V)) { 528 VecOp->copyIRFlags(&I); 529 530 // If the instruction is vectorized and was in a basic block that needed 531 // predication, we can't propagate poison-generating flags (nuw/nsw, 532 // exact, etc.). The control flow has been linearized and the 533 // instruction is no longer guarded by the predicate, which could make 534 // the flag properties to no longer hold. 535 if (State.MayGeneratePoisonRecipes.contains(this)) 536 VecOp->dropPoisonGeneratingFlags(); 537 } 538 539 // Use this vector value for all users of the original instruction. 540 State.set(this, V, Part); 541 State.addMetadata(V, &I); 542 } 543 544 break; 545 } 546 case Instruction::Freeze: { 547 State.setDebugLocFromInst(&I); 548 549 for (unsigned Part = 0; Part < State.UF; ++Part) { 550 Value *Op = State.get(getOperand(0), Part); 551 552 Value *Freeze = Builder.CreateFreeze(Op); 553 State.set(this, Freeze, Part); 554 } 555 break; 556 } 557 case Instruction::ICmp: 558 case Instruction::FCmp: { 559 // Widen compares. Generate vector compares. 560 bool FCmp = (I.getOpcode() == Instruction::FCmp); 561 auto *Cmp = cast<CmpInst>(&I); 562 State.setDebugLocFromInst(Cmp); 563 for (unsigned Part = 0; Part < State.UF; ++Part) { 564 Value *A = State.get(getOperand(0), Part); 565 Value *B = State.get(getOperand(1), Part); 566 Value *C = nullptr; 567 if (FCmp) { 568 // Propagate fast math flags. 569 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 570 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 571 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 572 } else { 573 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 574 } 575 State.set(this, C, Part); 576 State.addMetadata(C, &I); 577 } 578 579 break; 580 } 581 582 case Instruction::ZExt: 583 case Instruction::SExt: 584 case Instruction::FPToUI: 585 case Instruction::FPToSI: 586 case Instruction::FPExt: 587 case Instruction::PtrToInt: 588 case Instruction::IntToPtr: 589 case Instruction::SIToFP: 590 case Instruction::UIToFP: 591 case Instruction::Trunc: 592 case Instruction::FPTrunc: 593 case Instruction::BitCast: { 594 auto *CI = cast<CastInst>(&I); 595 State.setDebugLocFromInst(CI); 596 597 /// Vectorize casts. 598 Type *DestTy = (State.VF.isScalar()) 599 ? CI->getType() 600 : VectorType::get(CI->getType(), State.VF); 601 602 for (unsigned Part = 0; Part < State.UF; ++Part) { 603 Value *A = State.get(getOperand(0), Part); 604 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); 605 State.set(this, Cast, Part); 606 State.addMetadata(Cast, &I); 607 } 608 break; 609 } 610 default: 611 // This instruction is not vectorized by simple widening. 612 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 613 llvm_unreachable("Unhandled instruction!"); 614 } // end of switch. 615 } 616 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 617 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 618 VPSlotTracker &SlotTracker) const { 619 O << Indent << "WIDEN "; 620 printAsOperand(O, SlotTracker); 621 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 622 printOperands(O, SlotTracker); 623 } 624 625 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 626 VPSlotTracker &SlotTracker) const { 627 O << Indent << "WIDEN-INDUCTION"; 628 if (getTruncInst()) { 629 O << "\\l\""; 630 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 631 O << " +\n" << Indent << "\" "; 632 getVPValue(0)->printAsOperand(O, SlotTracker); 633 } else 634 O << " " << VPlanIngredient(IV); 635 636 O << ", "; 637 getStepValue()->printAsOperand(O, SlotTracker); 638 } 639 #endif 640 641 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 642 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 643 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 644 return StartC && StartC->isZero() && StepC && StepC->isOne(); 645 } 646 647 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 648 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 649 } 650 651 bool VPScalarIVStepsRecipe::isCanonical() const { 652 auto *CanIV = getCanonicalIV(); 653 // The start value of the steps-recipe must match the start value of the 654 // canonical induction and it must step by 1. 655 if (CanIV->getStartValue() != getStartValue()) 656 return false; 657 auto *StepVPV = getStepValue(); 658 if (StepVPV->getDef()) 659 return false; 660 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 661 return StepC && StepC->isOne(); 662 } 663 664 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 665 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 666 VPSlotTracker &SlotTracker) const { 667 O << Indent; 668 printAsOperand(O, SlotTracker); 669 O << Indent << "= SCALAR-STEPS "; 670 printOperands(O, SlotTracker); 671 } 672 #endif 673 674 void VPWidenGEPRecipe::execute(VPTransformState &State) { 675 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 676 // Construct a vector GEP by widening the operands of the scalar GEP as 677 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 678 // results in a vector of pointers when at least one operand of the GEP 679 // is vector-typed. Thus, to keep the representation compact, we only use 680 // vector-typed operands for loop-varying values. 681 682 if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { 683 // If we are vectorizing, but the GEP has only loop-invariant operands, 684 // the GEP we build (by only using vector-typed operands for 685 // loop-varying values) would be a scalar pointer. Thus, to ensure we 686 // produce a vector of pointers, we need to either arbitrarily pick an 687 // operand to broadcast, or broadcast a clone of the original GEP. 688 // Here, we broadcast a clone of the original. 689 // 690 // TODO: If at some point we decide to scalarize instructions having 691 // loop-invariant operands, this special case will no longer be 692 // required. We would add the scalarization decision to 693 // collectLoopScalars() and teach getVectorValue() to broadcast 694 // the lane-zero scalar value. 695 auto *Clone = State.Builder.Insert(GEP->clone()); 696 for (unsigned Part = 0; Part < State.UF; ++Part) { 697 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); 698 State.set(this, EntryPart, Part); 699 State.addMetadata(EntryPart, GEP); 700 } 701 } else { 702 // If the GEP has at least one loop-varying operand, we are sure to 703 // produce a vector of pointers. But if we are only unrolling, we want 704 // to produce a scalar GEP for each unroll part. Thus, the GEP we 705 // produce with the code below will be scalar (if VF == 1) or vector 706 // (otherwise). Note that for the unroll-only case, we still maintain 707 // values in the vector mapping with initVector, as we do for other 708 // instructions. 709 for (unsigned Part = 0; Part < State.UF; ++Part) { 710 // The pointer operand of the new GEP. If it's loop-invariant, we 711 // won't broadcast it. 712 auto *Ptr = IsPtrLoopInvariant 713 ? State.get(getOperand(0), VPIteration(0, 0)) 714 : State.get(getOperand(0), Part); 715 716 // Collect all the indices for the new GEP. If any index is 717 // loop-invariant, we won't broadcast it. 718 SmallVector<Value *, 4> Indices; 719 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 720 VPValue *Operand = getOperand(I); 721 if (IsIndexLoopInvariant[I - 1]) 722 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 723 else 724 Indices.push_back(State.get(Operand, Part)); 725 } 726 727 // If the GEP instruction is vectorized and was in a basic block that 728 // needed predication, we can't propagate the poison-generating 'inbounds' 729 // flag. The control flow has been linearized and the GEP is no longer 730 // guarded by the predicate, which could make the 'inbounds' properties to 731 // no longer hold. 732 bool IsInBounds = 733 GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; 734 735 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 736 // but it should be a vector, otherwise. 737 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 738 Indices, "", IsInBounds); 739 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 740 "NewGEP is not a pointer vector"); 741 State.set(this, NewGEP, Part); 742 State.addMetadata(NewGEP, GEP); 743 } 744 } 745 } 746 747 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 748 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 749 VPSlotTracker &SlotTracker) const { 750 O << Indent << "WIDEN-GEP "; 751 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 752 size_t IndicesNumber = IsIndexLoopInvariant.size(); 753 for (size_t I = 0; I < IndicesNumber; ++I) 754 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 755 756 O << " "; 757 printAsOperand(O, SlotTracker); 758 O << " = getelementptr "; 759 printOperands(O, SlotTracker); 760 } 761 #endif 762 763 void VPBlendRecipe::execute(VPTransformState &State) { 764 State.setDebugLocFromInst(Phi); 765 // We know that all PHIs in non-header blocks are converted into 766 // selects, so we don't have to worry about the insertion order and we 767 // can just use the builder. 768 // At this point we generate the predication tree. There may be 769 // duplications since this is a simple recursive scan, but future 770 // optimizations will clean it up. 771 772 unsigned NumIncoming = getNumIncomingValues(); 773 774 // Generate a sequence of selects of the form: 775 // SELECT(Mask3, In3, 776 // SELECT(Mask2, In2, 777 // SELECT(Mask1, In1, 778 // In0))) 779 // Note that Mask0 is never used: lanes for which no path reaches this phi and 780 // are essentially undef are taken from In0. 781 VectorParts Entry(State.UF); 782 for (unsigned In = 0; In < NumIncoming; ++In) { 783 for (unsigned Part = 0; Part < State.UF; ++Part) { 784 // We might have single edge PHIs (blocks) - use an identity 785 // 'select' for the first PHI operand. 786 Value *In0 = State.get(getIncomingValue(In), Part); 787 if (In == 0) 788 Entry[Part] = In0; // Initialize with the first incoming value. 789 else { 790 // Select between the current value and the previous incoming edge 791 // based on the incoming mask. 792 Value *Cond = State.get(getMask(In), Part); 793 Entry[Part] = 794 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 795 } 796 } 797 } 798 for (unsigned Part = 0; Part < State.UF; ++Part) 799 State.set(this, Entry[Part], Part); 800 } 801 802 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 803 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 804 VPSlotTracker &SlotTracker) const { 805 O << Indent << "BLEND "; 806 Phi->printAsOperand(O, false); 807 O << " ="; 808 if (getNumIncomingValues() == 1) { 809 // Not a User of any mask: not really blending, this is a 810 // single-predecessor phi. 811 O << " "; 812 getIncomingValue(0)->printAsOperand(O, SlotTracker); 813 } else { 814 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 815 O << " "; 816 getIncomingValue(I)->printAsOperand(O, SlotTracker); 817 O << "/"; 818 getMask(I)->printAsOperand(O, SlotTracker); 819 } 820 } 821 } 822 823 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 824 VPSlotTracker &SlotTracker) const { 825 O << Indent << "REDUCE "; 826 printAsOperand(O, SlotTracker); 827 O << " = "; 828 getChainOp()->printAsOperand(O, SlotTracker); 829 O << " +"; 830 if (isa<FPMathOperator>(getUnderlyingInstr())) 831 O << getUnderlyingInstr()->getFastMathFlags(); 832 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 833 getVecOp()->printAsOperand(O, SlotTracker); 834 if (getCondOp()) { 835 O << ", "; 836 getCondOp()->printAsOperand(O, SlotTracker); 837 } 838 O << ")"; 839 if (RdxDesc->IntermediateStore) 840 O << " (with final reduction value stored in invariant address sank " 841 "outside of loop)"; 842 } 843 844 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 845 VPSlotTracker &SlotTracker) const { 846 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 847 848 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 849 printAsOperand(O, SlotTracker); 850 O << " = "; 851 } 852 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 853 O << "call @" << CB->getCalledFunction()->getName() << "("; 854 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 855 O, [&O, &SlotTracker](VPValue *Op) { 856 Op->printAsOperand(O, SlotTracker); 857 }); 858 O << ")"; 859 } else { 860 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 861 printOperands(O, SlotTracker); 862 } 863 864 if (AlsoPack) 865 O << " (S->V)"; 866 } 867 #endif 868 869 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 870 assert(State.Instance && "Branch on Mask works only on single instance."); 871 872 unsigned Part = State.Instance->Part; 873 unsigned Lane = State.Instance->Lane.getKnownLane(); 874 875 Value *ConditionBit = nullptr; 876 VPValue *BlockInMask = getMask(); 877 if (BlockInMask) { 878 ConditionBit = State.get(BlockInMask, Part); 879 if (ConditionBit->getType()->isVectorTy()) 880 ConditionBit = State.Builder.CreateExtractElement( 881 ConditionBit, State.Builder.getInt32(Lane)); 882 } else // Block in mask is all-one. 883 ConditionBit = State.Builder.getTrue(); 884 885 // Replace the temporary unreachable terminator with a new conditional branch, 886 // whose two destinations will be set later when they are created. 887 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 888 assert(isa<UnreachableInst>(CurrentTerminator) && 889 "Expected to replace unreachable terminator with conditional branch."); 890 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 891 CondBr->setSuccessor(0, nullptr); 892 ReplaceInstWithInst(CurrentTerminator, CondBr); 893 } 894 895 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 896 assert(State.Instance && "Predicated instruction PHI works per instance."); 897 Instruction *ScalarPredInst = 898 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 899 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 900 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 901 assert(PredicatingBB && "Predicated block has no single predecessor."); 902 assert(isa<VPReplicateRecipe>(getOperand(0)) && 903 "operand must be VPReplicateRecipe"); 904 905 // By current pack/unpack logic we need to generate only a single phi node: if 906 // a vector value for the predicated instruction exists at this point it means 907 // the instruction has vector users only, and a phi for the vector value is 908 // needed. In this case the recipe of the predicated instruction is marked to 909 // also do that packing, thereby "hoisting" the insert-element sequence. 910 // Otherwise, a phi node for the scalar value is needed. 911 unsigned Part = State.Instance->Part; 912 if (State.hasVectorValue(getOperand(0), Part)) { 913 Value *VectorValue = State.get(getOperand(0), Part); 914 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 915 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 916 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 917 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 918 if (State.hasVectorValue(this, Part)) 919 State.reset(this, VPhi, Part); 920 else 921 State.set(this, VPhi, Part); 922 // NOTE: Currently we need to update the value of the operand, so the next 923 // predicated iteration inserts its generated value in the correct vector. 924 State.reset(getOperand(0), VPhi, Part); 925 } else { 926 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 927 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 928 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 929 PredicatingBB); 930 Phi->addIncoming(ScalarPredInst, PredicatedBB); 931 if (State.hasScalarValue(this, *State.Instance)) 932 State.reset(this, Phi, *State.Instance); 933 else 934 State.set(this, Phi, *State.Instance); 935 // NOTE: Currently we need to update the value of the operand, so the next 936 // predicated iteration inserts its generated value in the correct vector. 937 State.reset(getOperand(0), Phi, *State.Instance); 938 } 939 } 940 941 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 942 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 943 VPSlotTracker &SlotTracker) const { 944 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 945 printAsOperand(O, SlotTracker); 946 O << " = "; 947 printOperands(O, SlotTracker); 948 } 949 950 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 951 VPSlotTracker &SlotTracker) const { 952 O << Indent << "WIDEN "; 953 954 if (!isStore()) { 955 getVPSingleValue()->printAsOperand(O, SlotTracker); 956 O << " = "; 957 } 958 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 959 960 printOperands(O, SlotTracker); 961 } 962 #endif 963 964 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 965 Value *Start = getStartValue()->getLiveInIRValue(); 966 PHINode *EntryPart = PHINode::Create( 967 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 968 969 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 970 EntryPart->addIncoming(Start, VectorPH); 971 EntryPart->setDebugLoc(DL); 972 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 973 State.set(this, EntryPart, Part); 974 } 975 976 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 977 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 978 VPSlotTracker &SlotTracker) const { 979 O << Indent << "EMIT "; 980 printAsOperand(O, SlotTracker); 981 O << " = CANONICAL-INDUCTION"; 982 } 983 #endif 984 985 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 986 return IsScalarAfterVectorization && 987 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this)); 988 } 989 990 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 991 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 992 VPSlotTracker &SlotTracker) const { 993 O << Indent << "EMIT "; 994 printAsOperand(O, SlotTracker); 995 O << " = WIDEN-POINTER-INDUCTION "; 996 getStartValue()->printAsOperand(O, SlotTracker); 997 O << ", " << *IndDesc.getStep(); 998 } 999 #endif 1000 1001 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1002 assert(!State.Instance && "cannot be used in per-lane"); 1003 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1004 SCEVExpander Exp(SE, DL, "induction"); 1005 1006 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1007 &*State.Builder.GetInsertPoint()); 1008 1009 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1010 State.set(this, Res, Part); 1011 } 1012 1013 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1014 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1015 VPSlotTracker &SlotTracker) const { 1016 O << Indent << "EMIT "; 1017 getVPSingleValue()->printAsOperand(O, SlotTracker); 1018 O << " = EXPAND SCEV " << *Expr; 1019 } 1020 #endif 1021 1022 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1023 Value *CanonicalIV = State.get(getOperand(0), 0); 1024 Type *STy = CanonicalIV->getType(); 1025 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1026 ElementCount VF = State.VF; 1027 Value *VStart = VF.isScalar() 1028 ? CanonicalIV 1029 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1030 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1031 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1032 if (VF.isVector()) { 1033 VStep = Builder.CreateVectorSplat(VF, VStep); 1034 VStep = 1035 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1036 } 1037 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1038 State.set(this, CanonicalVectorIV, Part); 1039 } 1040 } 1041 1042 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1043 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1044 VPSlotTracker &SlotTracker) const { 1045 O << Indent << "EMIT "; 1046 printAsOperand(O, SlotTracker); 1047 O << " = WIDEN-CANONICAL-INDUCTION "; 1048 printOperands(O, SlotTracker); 1049 } 1050 #endif 1051 1052 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1053 auto &Builder = State.Builder; 1054 // Create a vector from the initial value. 1055 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1056 1057 Type *VecTy = State.VF.isScalar() 1058 ? VectorInit->getType() 1059 : VectorType::get(VectorInit->getType(), State.VF); 1060 1061 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1062 if (State.VF.isVector()) { 1063 auto *IdxTy = Builder.getInt32Ty(); 1064 auto *One = ConstantInt::get(IdxTy, 1); 1065 IRBuilder<>::InsertPointGuard Guard(Builder); 1066 Builder.SetInsertPoint(VectorPH->getTerminator()); 1067 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1068 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1069 VectorInit = Builder.CreateInsertElement( 1070 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1071 } 1072 1073 // Create a phi node for the new recurrence. 1074 PHINode *EntryPart = PHINode::Create( 1075 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 1076 EntryPart->addIncoming(VectorInit, VectorPH); 1077 State.set(this, EntryPart, 0); 1078 } 1079 1080 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1081 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1082 VPSlotTracker &SlotTracker) const { 1083 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1084 printAsOperand(O, SlotTracker); 1085 O << " = phi "; 1086 printOperands(O, SlotTracker); 1087 } 1088 #endif 1089 1090 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1091 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 1092 auto &Builder = State.Builder; 1093 1094 // In order to support recurrences we need to be able to vectorize Phi nodes. 1095 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1096 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1097 // this value when we vectorize all of the instructions that use the PHI. 1098 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1099 Type *VecTy = 1100 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 1101 1102 BasicBlock *HeaderBB = State.CFG.PrevBB; 1103 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1104 "recipe must be in the vector loop header"); 1105 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1106 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1107 Value *EntryPart = 1108 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 1109 State.set(this, EntryPart, Part); 1110 } 1111 1112 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1113 1114 // Reductions do not have to start at zero. They can start with 1115 // any loop invariant values. 1116 VPValue *StartVPV = getStartValue(); 1117 Value *StartV = StartVPV->getLiveInIRValue(); 1118 1119 Value *Iden = nullptr; 1120 RecurKind RK = RdxDesc.getRecurrenceKind(); 1121 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1122 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 1123 // MinMax reduction have the start value as their identify. 1124 if (ScalarPHI) { 1125 Iden = StartV; 1126 } else { 1127 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1128 Builder.SetInsertPoint(VectorPH->getTerminator()); 1129 StartV = Iden = 1130 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1131 } 1132 } else { 1133 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1134 RdxDesc.getFastMathFlags()); 1135 1136 if (!ScalarPHI) { 1137 Iden = Builder.CreateVectorSplat(State.VF, Iden); 1138 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1139 Builder.SetInsertPoint(VectorPH->getTerminator()); 1140 Constant *Zero = Builder.getInt32(0); 1141 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1142 } 1143 } 1144 1145 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1146 Value *EntryPart = State.get(this, Part); 1147 // Make sure to add the reduction start value only to the 1148 // first unroll part. 1149 Value *StartVal = (Part == 0) ? StartV : Iden; 1150 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 1151 } 1152 } 1153 1154 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1155 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1156 VPSlotTracker &SlotTracker) const { 1157 O << Indent << "WIDEN-REDUCTION-PHI "; 1158 1159 printAsOperand(O, SlotTracker); 1160 O << " = phi "; 1161 printOperands(O, SlotTracker); 1162 } 1163 #endif 1164 1165 void VPWidenPHIRecipe::execute(VPTransformState &State) { 1166 assert(EnableVPlanNativePath && 1167 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 1168 1169 // Currently we enter here in the VPlan-native path for non-induction 1170 // PHIs where all control flow is uniform. We simply widen these PHIs. 1171 // Create a vector phi with no operands - the vector phi operands will be 1172 // set at the end of vector code generation. 1173 VPBasicBlock *Parent = getParent(); 1174 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 1175 unsigned StartIdx = 0; 1176 // For phis in header blocks of loop regions, use the index of the value 1177 // coming from the preheader. 1178 if (LoopRegion->getEntryBasicBlock() == Parent) { 1179 for (unsigned I = 0; I < getNumOperands(); ++I) { 1180 if (getIncomingBlock(I) == 1181 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 1182 StartIdx = I; 1183 } 1184 } 1185 Value *Op0 = State.get(getOperand(StartIdx), 0); 1186 Type *VecTy = Op0->getType(); 1187 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 1188 State.set(this, VecPhi, 0); 1189 } 1190 1191 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1192 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1193 VPSlotTracker &SlotTracker) const { 1194 O << Indent << "WIDEN-PHI "; 1195 1196 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1197 // Unless all incoming values are modeled in VPlan print the original PHI 1198 // directly. 1199 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1200 // values as VPValues. 1201 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1202 O << VPlanIngredient(OriginalPhi); 1203 return; 1204 } 1205 1206 printAsOperand(O, SlotTracker); 1207 O << " = phi "; 1208 printOperands(O, SlotTracker); 1209 } 1210 #endif 1211 1212 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 1213 // remove VPActiveLaneMaskPHIRecipe. 1214 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 1215 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1216 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1217 Value *StartMask = State.get(getOperand(0), Part); 1218 PHINode *EntryPart = 1219 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 1220 EntryPart->addIncoming(StartMask, VectorPH); 1221 EntryPart->setDebugLoc(DL); 1222 State.set(this, EntryPart, Part); 1223 } 1224 } 1225 1226 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1227 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1228 VPSlotTracker &SlotTracker) const { 1229 O << Indent << "ACTIVE-LANE-MASK-PHI "; 1230 1231 printAsOperand(O, SlotTracker); 1232 O << " = phi "; 1233 printOperands(O, SlotTracker); 1234 } 1235 #endif 1236