1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 31 #include <cassert> 32 33 using namespace llvm; 34 35 using VectorParts = SmallVector<Value *, 2>; 36 37 extern cl::opt<bool> EnableVPlanNativePath; 38 39 #define LV_NAME "loop-vectorize" 40 #define DEBUG_TYPE LV_NAME 41 42 bool VPRecipeBase::mayWriteToMemory() const { 43 switch (getVPDefID()) { 44 case VPWidenMemoryInstructionSC: { 45 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 46 } 47 case VPReplicateSC: 48 case VPWidenCallSC: 49 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 50 ->mayWriteToMemory(); 51 case VPBranchOnMaskSC: 52 case VPScalarIVStepsSC: 53 return false; 54 case VPWidenIntOrFpInductionSC: 55 case VPWidenCanonicalIVSC: 56 case VPWidenPHISC: 57 case VPBlendSC: 58 case VPWidenSC: 59 case VPWidenGEPSC: 60 case VPReductionSC: 61 case VPWidenSelectSC: { 62 const Instruction *I = 63 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 64 (void)I; 65 assert((!I || !I->mayWriteToMemory()) && 66 "underlying instruction may write to memory"); 67 return false; 68 } 69 default: 70 return true; 71 } 72 } 73 74 bool VPRecipeBase::mayReadFromMemory() const { 75 switch (getVPDefID()) { 76 case VPWidenMemoryInstructionSC: { 77 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 78 } 79 case VPReplicateSC: 80 case VPWidenCallSC: 81 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 82 ->mayReadFromMemory(); 83 case VPBranchOnMaskSC: 84 case VPScalarIVStepsSC: 85 return false; 86 case VPWidenIntOrFpInductionSC: 87 case VPWidenCanonicalIVSC: 88 case VPWidenPHISC: 89 case VPBlendSC: 90 case VPWidenSC: 91 case VPWidenGEPSC: 92 case VPReductionSC: 93 case VPWidenSelectSC: { 94 const Instruction *I = 95 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 96 (void)I; 97 assert((!I || !I->mayReadFromMemory()) && 98 "underlying instruction may read from memory"); 99 return false; 100 } 101 default: 102 return true; 103 } 104 } 105 106 bool VPRecipeBase::mayHaveSideEffects() const { 107 switch (getVPDefID()) { 108 case VPDerivedIVSC: 109 case VPPredInstPHISC: 110 return false; 111 case VPWidenIntOrFpInductionSC: 112 case VPWidenPointerInductionSC: 113 case VPWidenCanonicalIVSC: 114 case VPWidenPHISC: 115 case VPBlendSC: 116 case VPWidenSC: 117 case VPWidenGEPSC: 118 case VPReductionSC: 119 case VPWidenSelectSC: 120 case VPScalarIVStepsSC: { 121 const Instruction *I = 122 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 123 (void)I; 124 assert((!I || !I->mayHaveSideEffects()) && 125 "underlying instruction has side-effects"); 126 return false; 127 } 128 case VPReplicateSC: { 129 auto *R = cast<VPReplicateRecipe>(this); 130 return R->getUnderlyingInstr()->mayHaveSideEffects(); 131 } 132 default: 133 return true; 134 } 135 } 136 137 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 138 auto Lane = VPLane::getLastLaneForVF(State.VF); 139 VPValue *ExitValue = getOperand(0); 140 if (vputils::isUniformAfterVectorization(ExitValue)) 141 Lane = VPLane::getFirstLane(); 142 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 143 State.Builder.GetInsertBlock()); 144 } 145 146 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 147 assert(!Parent && "Recipe already in some VPBasicBlock"); 148 assert(InsertPos->getParent() && 149 "Insertion position not in any VPBasicBlock"); 150 Parent = InsertPos->getParent(); 151 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 152 } 153 154 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 155 iplist<VPRecipeBase>::iterator I) { 156 assert(!Parent && "Recipe already in some VPBasicBlock"); 157 assert(I == BB.end() || I->getParent() == &BB); 158 Parent = &BB; 159 BB.getRecipeList().insert(I, this); 160 } 161 162 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 163 assert(!Parent && "Recipe already in some VPBasicBlock"); 164 assert(InsertPos->getParent() && 165 "Insertion position not in any VPBasicBlock"); 166 Parent = InsertPos->getParent(); 167 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 168 } 169 170 void VPRecipeBase::removeFromParent() { 171 assert(getParent() && "Recipe not in any VPBasicBlock"); 172 getParent()->getRecipeList().remove(getIterator()); 173 Parent = nullptr; 174 } 175 176 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 177 assert(getParent() && "Recipe not in any VPBasicBlock"); 178 return getParent()->getRecipeList().erase(getIterator()); 179 } 180 181 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 182 removeFromParent(); 183 insertAfter(InsertPos); 184 } 185 186 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 187 iplist<VPRecipeBase>::iterator I) { 188 removeFromParent(); 189 insertBefore(BB, I); 190 } 191 192 void VPInstruction::generateInstruction(VPTransformState &State, 193 unsigned Part) { 194 IRBuilderBase &Builder = State.Builder; 195 Builder.SetCurrentDebugLocation(DL); 196 197 if (Instruction::isBinaryOp(getOpcode())) { 198 Value *A = State.get(getOperand(0), Part); 199 Value *B = State.get(getOperand(1), Part); 200 Value *V = 201 Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 202 State.set(this, V, Part); 203 return; 204 } 205 206 switch (getOpcode()) { 207 case VPInstruction::Not: { 208 Value *A = State.get(getOperand(0), Part); 209 Value *V = Builder.CreateNot(A, Name); 210 State.set(this, V, Part); 211 break; 212 } 213 case VPInstruction::ICmpULE: { 214 Value *IV = State.get(getOperand(0), Part); 215 Value *TC = State.get(getOperand(1), Part); 216 Value *V = Builder.CreateICmpULE(IV, TC, Name); 217 State.set(this, V, Part); 218 break; 219 } 220 case Instruction::Select: { 221 Value *Cond = State.get(getOperand(0), Part); 222 Value *Op1 = State.get(getOperand(1), Part); 223 Value *Op2 = State.get(getOperand(2), Part); 224 Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); 225 State.set(this, V, Part); 226 break; 227 } 228 case VPInstruction::ActiveLaneMask: { 229 // Get first lane of vector induction variable. 230 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 231 // Get the original loop tripcount. 232 Value *ScalarTC = State.get(getOperand(1), Part); 233 234 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 235 auto *PredTy = VectorType::get(Int1Ty, State.VF); 236 Instruction *Call = Builder.CreateIntrinsic( 237 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 238 {VIVElem0, ScalarTC}, nullptr, Name); 239 State.set(this, Call, Part); 240 break; 241 } 242 case VPInstruction::FirstOrderRecurrenceSplice: { 243 // Generate code to combine the previous and current values in vector v3. 244 // 245 // vector.ph: 246 // v_init = vector(..., ..., ..., a[-1]) 247 // br vector.body 248 // 249 // vector.body 250 // i = phi [0, vector.ph], [i+4, vector.body] 251 // v1 = phi [v_init, vector.ph], [v2, vector.body] 252 // v2 = a[i, i+1, i+2, i+3]; 253 // v3 = vector(v1(3), v2(0, 1, 2)) 254 255 // For the first part, use the recurrence phi (v1), otherwise v2. 256 auto *V1 = State.get(getOperand(0), 0); 257 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 258 if (!PartMinus1->getType()->isVectorTy()) { 259 State.set(this, PartMinus1, Part); 260 } else { 261 Value *V2 = State.get(getOperand(1), Part); 262 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), 263 Part); 264 } 265 break; 266 } 267 case VPInstruction::CanonicalIVIncrement: 268 case VPInstruction::CanonicalIVIncrementNUW: { 269 Value *Next = nullptr; 270 if (Part == 0) { 271 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 272 auto *Phi = State.get(getOperand(0), 0); 273 // The loop step is equal to the vectorization factor (num of SIMD 274 // elements) times the unroll factor (num of SIMD instructions). 275 Value *Step = 276 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 277 Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); 278 } else { 279 Next = State.get(this, 0); 280 } 281 282 State.set(this, Next, Part); 283 break; 284 } 285 286 case VPInstruction::CanonicalIVIncrementForPart: 287 case VPInstruction::CanonicalIVIncrementForPartNUW: { 288 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; 289 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 290 if (Part == 0) { 291 State.set(this, IV, Part); 292 break; 293 } 294 295 // The canonical IV is incremented by the vectorization factor (num of SIMD 296 // elements) times the unroll part. 297 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 298 Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); 299 State.set(this, Next, Part); 300 break; 301 } 302 case VPInstruction::BranchOnCond: { 303 if (Part != 0) 304 break; 305 306 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 307 VPRegionBlock *ParentRegion = getParent()->getParent(); 308 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 309 310 // Replace the temporary unreachable terminator with a new conditional 311 // branch, hooking it up to backward destination for exiting blocks now and 312 // to forward destination(s) later when they are created. 313 BranchInst *CondBr = 314 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 315 316 if (getParent()->isExiting()) 317 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 318 319 CondBr->setSuccessor(0, nullptr); 320 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 321 break; 322 } 323 case VPInstruction::BranchOnCount: { 324 if (Part != 0) 325 break; 326 // First create the compare. 327 Value *IV = State.get(getOperand(0), Part); 328 Value *TC = State.get(getOperand(1), Part); 329 Value *Cond = Builder.CreateICmpEQ(IV, TC); 330 331 // Now create the branch. 332 auto *Plan = getParent()->getPlan(); 333 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 334 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 335 336 // Replace the temporary unreachable terminator with a new conditional 337 // branch, hooking it up to backward destination (the header) now and to the 338 // forward destination (the exit/middle block) later when it is created. 339 // Note that CreateCondBr expects a valid BB as first argument, so we need 340 // to set it to nullptr later. 341 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 342 State.CFG.VPBB2IRBB[Header]); 343 CondBr->setSuccessor(0, nullptr); 344 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 345 break; 346 } 347 default: 348 llvm_unreachable("Unsupported opcode for instruction"); 349 } 350 } 351 352 void VPInstruction::execute(VPTransformState &State) { 353 assert(!State.Instance && "VPInstruction executing an Instance"); 354 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 355 State.Builder.setFastMathFlags(FMF); 356 for (unsigned Part = 0; Part < State.UF; ++Part) 357 generateInstruction(State, Part); 358 } 359 360 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 361 void VPInstruction::dump() const { 362 VPSlotTracker SlotTracker(getParent()->getPlan()); 363 print(dbgs(), "", SlotTracker); 364 } 365 366 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 367 VPSlotTracker &SlotTracker) const { 368 O << Indent << "EMIT "; 369 370 if (hasResult()) { 371 printAsOperand(O, SlotTracker); 372 O << " = "; 373 } 374 375 switch (getOpcode()) { 376 case VPInstruction::Not: 377 O << "not"; 378 break; 379 case VPInstruction::ICmpULE: 380 O << "icmp ule"; 381 break; 382 case VPInstruction::SLPLoad: 383 O << "combined load"; 384 break; 385 case VPInstruction::SLPStore: 386 O << "combined store"; 387 break; 388 case VPInstruction::ActiveLaneMask: 389 O << "active lane mask"; 390 break; 391 case VPInstruction::FirstOrderRecurrenceSplice: 392 O << "first-order splice"; 393 break; 394 case VPInstruction::CanonicalIVIncrement: 395 O << "VF * UF + "; 396 break; 397 case VPInstruction::CanonicalIVIncrementNUW: 398 O << "VF * UF +(nuw) "; 399 break; 400 case VPInstruction::BranchOnCond: 401 O << "branch-on-cond"; 402 break; 403 case VPInstruction::CanonicalIVIncrementForPart: 404 O << "VF * Part + "; 405 break; 406 case VPInstruction::CanonicalIVIncrementForPartNUW: 407 O << "VF * Part +(nuw) "; 408 break; 409 case VPInstruction::BranchOnCount: 410 O << "branch-on-count "; 411 break; 412 default: 413 O << Instruction::getOpcodeName(getOpcode()); 414 } 415 416 O << FMF; 417 418 for (const VPValue *Operand : operands()) { 419 O << " "; 420 Operand->printAsOperand(O, SlotTracker); 421 } 422 423 if (DL) { 424 O << ", !dbg "; 425 DL.print(O); 426 } 427 } 428 #endif 429 430 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 431 // Make sure the VPInstruction is a floating-point operation. 432 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 433 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 434 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 435 Opcode == Instruction::FCmp) && 436 "this op can't take fast-math flags"); 437 FMF = FMFNew; 438 } 439 440 void VPWidenCallRecipe::execute(VPTransformState &State) { 441 auto &CI = *cast<CallInst>(getUnderlyingInstr()); 442 assert(!isa<DbgInfoIntrinsic>(CI) && 443 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 444 State.setDebugLocFromInst(&CI); 445 446 SmallVector<Type *, 4> Tys; 447 for (Value *ArgOperand : CI.args()) 448 Tys.push_back( 449 ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue())); 450 451 for (unsigned Part = 0; Part < State.UF; ++Part) { 452 SmallVector<Type *, 2> TysForDecl = {CI.getType()}; 453 SmallVector<Value *, 4> Args; 454 for (const auto &I : enumerate(operands())) { 455 // Some intrinsics have a scalar argument - don't replace it with a 456 // vector. 457 Value *Arg; 458 if (VectorIntrinsicID == Intrinsic::not_intrinsic || 459 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 460 Arg = State.get(I.value(), Part); 461 else 462 Arg = State.get(I.value(), VPIteration(0, 0)); 463 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 464 TysForDecl.push_back(Arg->getType()); 465 Args.push_back(Arg); 466 } 467 468 Function *VectorF; 469 if (VectorIntrinsicID != Intrinsic::not_intrinsic) { 470 // Use vector version of the intrinsic. 471 if (State.VF.isVector()) 472 TysForDecl[0] = 473 VectorType::get(CI.getType()->getScalarType(), State.VF); 474 Module *M = State.Builder.GetInsertBlock()->getModule(); 475 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 476 assert(VectorF && "Can't retrieve vector intrinsic."); 477 } else { 478 // Use vector version of the function call. 479 const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/); 480 #ifndef NDEBUG 481 assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && 482 "Can't create vector function."); 483 #endif 484 VectorF = VFDatabase(CI).getVectorizedFunction(Shape); 485 } 486 SmallVector<OperandBundleDef, 1> OpBundles; 487 CI.getOperandBundlesAsDefs(OpBundles); 488 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 489 490 if (isa<FPMathOperator>(V)) 491 V->copyFastMathFlags(&CI); 492 493 State.set(this, V, Part); 494 State.addMetadata(V, &CI); 495 } 496 } 497 498 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 499 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 500 VPSlotTracker &SlotTracker) const { 501 O << Indent << "WIDEN-CALL "; 502 503 auto *CI = cast<CallInst>(getUnderlyingInstr()); 504 if (CI->getType()->isVoidTy()) 505 O << "void "; 506 else { 507 printAsOperand(O, SlotTracker); 508 O << " = "; 509 } 510 511 O << "call @" << CI->getCalledFunction()->getName() << "("; 512 printOperands(O, SlotTracker); 513 O << ")"; 514 515 if (VectorIntrinsicID) 516 O << " (using vector intrinsic)"; 517 else 518 O << " (using library function)"; 519 } 520 521 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 522 VPSlotTracker &SlotTracker) const { 523 O << Indent << "WIDEN-SELECT "; 524 printAsOperand(O, SlotTracker); 525 O << " = select "; 526 getOperand(0)->printAsOperand(O, SlotTracker); 527 O << ", "; 528 getOperand(1)->printAsOperand(O, SlotTracker); 529 O << ", "; 530 getOperand(2)->printAsOperand(O, SlotTracker); 531 O << (InvariantCond ? " (condition is loop invariant)" : ""); 532 } 533 #endif 534 535 void VPWidenSelectRecipe::execute(VPTransformState &State) { 536 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 537 State.setDebugLocFromInst(&I); 538 539 // The condition can be loop invariant but still defined inside the 540 // loop. This means that we can't just use the original 'cond' value. 541 // We have to take the 'vectorized' value and pick the first lane. 542 // Instcombine will make this a no-op. 543 auto *InvarCond = 544 InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; 545 546 for (unsigned Part = 0; Part < State.UF; ++Part) { 547 Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); 548 Value *Op0 = State.get(getOperand(1), Part); 549 Value *Op1 = State.get(getOperand(2), Part); 550 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 551 State.set(this, Sel, Part); 552 State.addMetadata(Sel, &I); 553 } 554 } 555 556 void VPWidenRecipe::execute(VPTransformState &State) { 557 auto &I = *cast<Instruction>(getUnderlyingValue()); 558 auto &Builder = State.Builder; 559 switch (I.getOpcode()) { 560 case Instruction::Call: 561 case Instruction::Br: 562 case Instruction::PHI: 563 case Instruction::GetElementPtr: 564 case Instruction::Select: 565 llvm_unreachable("This instruction is handled by a different recipe."); 566 case Instruction::UDiv: 567 case Instruction::SDiv: 568 case Instruction::SRem: 569 case Instruction::URem: 570 case Instruction::Add: 571 case Instruction::FAdd: 572 case Instruction::Sub: 573 case Instruction::FSub: 574 case Instruction::FNeg: 575 case Instruction::Mul: 576 case Instruction::FMul: 577 case Instruction::FDiv: 578 case Instruction::FRem: 579 case Instruction::Shl: 580 case Instruction::LShr: 581 case Instruction::AShr: 582 case Instruction::And: 583 case Instruction::Or: 584 case Instruction::Xor: { 585 // Just widen unops and binops. 586 State.setDebugLocFromInst(&I); 587 588 for (unsigned Part = 0; Part < State.UF; ++Part) { 589 SmallVector<Value *, 2> Ops; 590 for (VPValue *VPOp : operands()) 591 Ops.push_back(State.get(VPOp, Part)); 592 593 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 594 595 if (auto *VecOp = dyn_cast<Instruction>(V)) { 596 VecOp->copyIRFlags(&I); 597 598 // If the instruction is vectorized and was in a basic block that needed 599 // predication, we can't propagate poison-generating flags (nuw/nsw, 600 // exact, etc.). The control flow has been linearized and the 601 // instruction is no longer guarded by the predicate, which could make 602 // the flag properties to no longer hold. 603 if (State.MayGeneratePoisonRecipes.contains(this)) 604 VecOp->dropPoisonGeneratingFlags(); 605 } 606 607 // Use this vector value for all users of the original instruction. 608 State.set(this, V, Part); 609 State.addMetadata(V, &I); 610 } 611 612 break; 613 } 614 case Instruction::Freeze: { 615 State.setDebugLocFromInst(&I); 616 617 for (unsigned Part = 0; Part < State.UF; ++Part) { 618 Value *Op = State.get(getOperand(0), Part); 619 620 Value *Freeze = Builder.CreateFreeze(Op); 621 State.set(this, Freeze, Part); 622 } 623 break; 624 } 625 case Instruction::ICmp: 626 case Instruction::FCmp: { 627 // Widen compares. Generate vector compares. 628 bool FCmp = (I.getOpcode() == Instruction::FCmp); 629 auto *Cmp = cast<CmpInst>(&I); 630 State.setDebugLocFromInst(Cmp); 631 for (unsigned Part = 0; Part < State.UF; ++Part) { 632 Value *A = State.get(getOperand(0), Part); 633 Value *B = State.get(getOperand(1), Part); 634 Value *C = nullptr; 635 if (FCmp) { 636 // Propagate fast math flags. 637 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 638 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 639 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 640 } else { 641 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 642 } 643 State.set(this, C, Part); 644 State.addMetadata(C, &I); 645 } 646 647 break; 648 } 649 650 case Instruction::ZExt: 651 case Instruction::SExt: 652 case Instruction::FPToUI: 653 case Instruction::FPToSI: 654 case Instruction::FPExt: 655 case Instruction::PtrToInt: 656 case Instruction::IntToPtr: 657 case Instruction::SIToFP: 658 case Instruction::UIToFP: 659 case Instruction::Trunc: 660 case Instruction::FPTrunc: 661 case Instruction::BitCast: { 662 auto *CI = cast<CastInst>(&I); 663 State.setDebugLocFromInst(CI); 664 665 /// Vectorize casts. 666 Type *DestTy = (State.VF.isScalar()) 667 ? CI->getType() 668 : VectorType::get(CI->getType(), State.VF); 669 670 for (unsigned Part = 0; Part < State.UF; ++Part) { 671 Value *A = State.get(getOperand(0), Part); 672 Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); 673 State.set(this, Cast, Part); 674 State.addMetadata(Cast, &I); 675 } 676 break; 677 } 678 default: 679 // This instruction is not vectorized by simple widening. 680 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 681 llvm_unreachable("Unhandled instruction!"); 682 } // end of switch. 683 } 684 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 685 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 686 VPSlotTracker &SlotTracker) const { 687 O << Indent << "WIDEN "; 688 printAsOperand(O, SlotTracker); 689 const Instruction *UI = getUnderlyingInstr(); 690 O << " = " << UI->getOpcodeName() << " "; 691 if (auto *Cmp = dyn_cast<CmpInst>(UI)) 692 O << CmpInst::getPredicateName(Cmp->getPredicate()) << " "; 693 printOperands(O, SlotTracker); 694 } 695 696 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 697 VPSlotTracker &SlotTracker) const { 698 O << Indent << "WIDEN-INDUCTION"; 699 if (getTruncInst()) { 700 O << "\\l\""; 701 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 702 O << " +\n" << Indent << "\" "; 703 getVPValue(0)->printAsOperand(O, SlotTracker); 704 } else 705 O << " " << VPlanIngredient(IV); 706 707 O << ", "; 708 getStepValue()->printAsOperand(O, SlotTracker); 709 } 710 #endif 711 712 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 713 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 714 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 715 return StartC && StartC->isZero() && StepC && StepC->isOne(); 716 } 717 718 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 719 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 720 VPSlotTracker &SlotTracker) const { 721 O << Indent; 722 printAsOperand(O, SlotTracker); 723 O << Indent << "= DERIVED-IV "; 724 getStartValue()->printAsOperand(O, SlotTracker); 725 O << " + "; 726 getCanonicalIV()->printAsOperand(O, SlotTracker); 727 O << " * "; 728 getStepValue()->printAsOperand(O, SlotTracker); 729 730 if (IndDesc.getStep()->getType() != ResultTy) 731 O << " (truncated to " << *ResultTy << ")"; 732 } 733 #endif 734 735 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 736 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 737 VPSlotTracker &SlotTracker) const { 738 O << Indent; 739 printAsOperand(O, SlotTracker); 740 O << Indent << "= SCALAR-STEPS "; 741 printOperands(O, SlotTracker); 742 } 743 #endif 744 745 void VPWidenGEPRecipe::execute(VPTransformState &State) { 746 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 747 // Construct a vector GEP by widening the operands of the scalar GEP as 748 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 749 // results in a vector of pointers when at least one operand of the GEP 750 // is vector-typed. Thus, to keep the representation compact, we only use 751 // vector-typed operands for loop-varying values. 752 753 if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { 754 // If we are vectorizing, but the GEP has only loop-invariant operands, 755 // the GEP we build (by only using vector-typed operands for 756 // loop-varying values) would be a scalar pointer. Thus, to ensure we 757 // produce a vector of pointers, we need to either arbitrarily pick an 758 // operand to broadcast, or broadcast a clone of the original GEP. 759 // Here, we broadcast a clone of the original. 760 // 761 // TODO: If at some point we decide to scalarize instructions having 762 // loop-invariant operands, this special case will no longer be 763 // required. We would add the scalarization decision to 764 // collectLoopScalars() and teach getVectorValue() to broadcast 765 // the lane-zero scalar value. 766 auto *Clone = State.Builder.Insert(GEP->clone()); 767 for (unsigned Part = 0; Part < State.UF; ++Part) { 768 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); 769 State.set(this, EntryPart, Part); 770 State.addMetadata(EntryPart, GEP); 771 } 772 } else { 773 // If the GEP has at least one loop-varying operand, we are sure to 774 // produce a vector of pointers. But if we are only unrolling, we want 775 // to produce a scalar GEP for each unroll part. Thus, the GEP we 776 // produce with the code below will be scalar (if VF == 1) or vector 777 // (otherwise). Note that for the unroll-only case, we still maintain 778 // values in the vector mapping with initVector, as we do for other 779 // instructions. 780 for (unsigned Part = 0; Part < State.UF; ++Part) { 781 // The pointer operand of the new GEP. If it's loop-invariant, we 782 // won't broadcast it. 783 auto *Ptr = IsPtrLoopInvariant 784 ? State.get(getOperand(0), VPIteration(0, 0)) 785 : State.get(getOperand(0), Part); 786 787 // Collect all the indices for the new GEP. If any index is 788 // loop-invariant, we won't broadcast it. 789 SmallVector<Value *, 4> Indices; 790 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 791 VPValue *Operand = getOperand(I); 792 if (IsIndexLoopInvariant[I - 1]) 793 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 794 else 795 Indices.push_back(State.get(Operand, Part)); 796 } 797 798 // If the GEP instruction is vectorized and was in a basic block that 799 // needed predication, we can't propagate the poison-generating 'inbounds' 800 // flag. The control flow has been linearized and the GEP is no longer 801 // guarded by the predicate, which could make the 'inbounds' properties to 802 // no longer hold. 803 bool IsInBounds = 804 GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; 805 806 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 807 // but it should be a vector, otherwise. 808 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 809 Indices, "", IsInBounds); 810 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 811 "NewGEP is not a pointer vector"); 812 State.set(this, NewGEP, Part); 813 State.addMetadata(NewGEP, GEP); 814 } 815 } 816 } 817 818 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 819 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 820 VPSlotTracker &SlotTracker) const { 821 O << Indent << "WIDEN-GEP "; 822 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 823 size_t IndicesNumber = IsIndexLoopInvariant.size(); 824 for (size_t I = 0; I < IndicesNumber; ++I) 825 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 826 827 O << " "; 828 printAsOperand(O, SlotTracker); 829 O << " = getelementptr "; 830 printOperands(O, SlotTracker); 831 } 832 #endif 833 834 void VPBlendRecipe::execute(VPTransformState &State) { 835 State.setDebugLocFromInst(Phi); 836 // We know that all PHIs in non-header blocks are converted into 837 // selects, so we don't have to worry about the insertion order and we 838 // can just use the builder. 839 // At this point we generate the predication tree. There may be 840 // duplications since this is a simple recursive scan, but future 841 // optimizations will clean it up. 842 843 unsigned NumIncoming = getNumIncomingValues(); 844 845 // Generate a sequence of selects of the form: 846 // SELECT(Mask3, In3, 847 // SELECT(Mask2, In2, 848 // SELECT(Mask1, In1, 849 // In0))) 850 // Note that Mask0 is never used: lanes for which no path reaches this phi and 851 // are essentially undef are taken from In0. 852 VectorParts Entry(State.UF); 853 for (unsigned In = 0; In < NumIncoming; ++In) { 854 for (unsigned Part = 0; Part < State.UF; ++Part) { 855 // We might have single edge PHIs (blocks) - use an identity 856 // 'select' for the first PHI operand. 857 Value *In0 = State.get(getIncomingValue(In), Part); 858 if (In == 0) 859 Entry[Part] = In0; // Initialize with the first incoming value. 860 else { 861 // Select between the current value and the previous incoming edge 862 // based on the incoming mask. 863 Value *Cond = State.get(getMask(In), Part); 864 Entry[Part] = 865 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 866 } 867 } 868 } 869 for (unsigned Part = 0; Part < State.UF; ++Part) 870 State.set(this, Entry[Part], Part); 871 } 872 873 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 874 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 875 VPSlotTracker &SlotTracker) const { 876 O << Indent << "BLEND "; 877 Phi->printAsOperand(O, false); 878 O << " ="; 879 if (getNumIncomingValues() == 1) { 880 // Not a User of any mask: not really blending, this is a 881 // single-predecessor phi. 882 O << " "; 883 getIncomingValue(0)->printAsOperand(O, SlotTracker); 884 } else { 885 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 886 O << " "; 887 getIncomingValue(I)->printAsOperand(O, SlotTracker); 888 O << "/"; 889 getMask(I)->printAsOperand(O, SlotTracker); 890 } 891 } 892 } 893 894 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 895 VPSlotTracker &SlotTracker) const { 896 O << Indent << "REDUCE "; 897 printAsOperand(O, SlotTracker); 898 O << " = "; 899 getChainOp()->printAsOperand(O, SlotTracker); 900 O << " +"; 901 if (isa<FPMathOperator>(getUnderlyingInstr())) 902 O << getUnderlyingInstr()->getFastMathFlags(); 903 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 904 getVecOp()->printAsOperand(O, SlotTracker); 905 if (getCondOp()) { 906 O << ", "; 907 getCondOp()->printAsOperand(O, SlotTracker); 908 } 909 O << ")"; 910 if (RdxDesc->IntermediateStore) 911 O << " (with final reduction value stored in invariant address sank " 912 "outside of loop)"; 913 } 914 915 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 916 VPSlotTracker &SlotTracker) const { 917 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 918 919 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 920 printAsOperand(O, SlotTracker); 921 O << " = "; 922 } 923 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 924 O << "call @" << CB->getCalledFunction()->getName() << "("; 925 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 926 O, [&O, &SlotTracker](VPValue *Op) { 927 Op->printAsOperand(O, SlotTracker); 928 }); 929 O << ")"; 930 } else { 931 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 932 printOperands(O, SlotTracker); 933 } 934 935 if (AlsoPack) 936 O << " (S->V)"; 937 } 938 #endif 939 940 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 941 assert(State.Instance && "Branch on Mask works only on single instance."); 942 943 unsigned Part = State.Instance->Part; 944 unsigned Lane = State.Instance->Lane.getKnownLane(); 945 946 Value *ConditionBit = nullptr; 947 VPValue *BlockInMask = getMask(); 948 if (BlockInMask) { 949 ConditionBit = State.get(BlockInMask, Part); 950 if (ConditionBit->getType()->isVectorTy()) 951 ConditionBit = State.Builder.CreateExtractElement( 952 ConditionBit, State.Builder.getInt32(Lane)); 953 } else // Block in mask is all-one. 954 ConditionBit = State.Builder.getTrue(); 955 956 // Replace the temporary unreachable terminator with a new conditional branch, 957 // whose two destinations will be set later when they are created. 958 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 959 assert(isa<UnreachableInst>(CurrentTerminator) && 960 "Expected to replace unreachable terminator with conditional branch."); 961 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 962 CondBr->setSuccessor(0, nullptr); 963 ReplaceInstWithInst(CurrentTerminator, CondBr); 964 } 965 966 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 967 assert(State.Instance && "Predicated instruction PHI works per instance."); 968 Instruction *ScalarPredInst = 969 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 970 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 971 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 972 assert(PredicatingBB && "Predicated block has no single predecessor."); 973 assert(isa<VPReplicateRecipe>(getOperand(0)) && 974 "operand must be VPReplicateRecipe"); 975 976 // By current pack/unpack logic we need to generate only a single phi node: if 977 // a vector value for the predicated instruction exists at this point it means 978 // the instruction has vector users only, and a phi for the vector value is 979 // needed. In this case the recipe of the predicated instruction is marked to 980 // also do that packing, thereby "hoisting" the insert-element sequence. 981 // Otherwise, a phi node for the scalar value is needed. 982 unsigned Part = State.Instance->Part; 983 if (State.hasVectorValue(getOperand(0), Part)) { 984 Value *VectorValue = State.get(getOperand(0), Part); 985 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 986 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 987 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 988 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 989 if (State.hasVectorValue(this, Part)) 990 State.reset(this, VPhi, Part); 991 else 992 State.set(this, VPhi, Part); 993 // NOTE: Currently we need to update the value of the operand, so the next 994 // predicated iteration inserts its generated value in the correct vector. 995 State.reset(getOperand(0), VPhi, Part); 996 } else { 997 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 998 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 999 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1000 PredicatingBB); 1001 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1002 if (State.hasScalarValue(this, *State.Instance)) 1003 State.reset(this, Phi, *State.Instance); 1004 else 1005 State.set(this, Phi, *State.Instance); 1006 // NOTE: Currently we need to update the value of the operand, so the next 1007 // predicated iteration inserts its generated value in the correct vector. 1008 State.reset(getOperand(0), Phi, *State.Instance); 1009 } 1010 } 1011 1012 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1013 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1014 VPSlotTracker &SlotTracker) const { 1015 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1016 printAsOperand(O, SlotTracker); 1017 O << " = "; 1018 printOperands(O, SlotTracker); 1019 } 1020 1021 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 1022 VPSlotTracker &SlotTracker) const { 1023 O << Indent << "WIDEN "; 1024 1025 if (!isStore()) { 1026 getVPSingleValue()->printAsOperand(O, SlotTracker); 1027 O << " = "; 1028 } 1029 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 1030 1031 printOperands(O, SlotTracker); 1032 } 1033 #endif 1034 1035 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1036 Value *Start = getStartValue()->getLiveInIRValue(); 1037 PHINode *EntryPart = PHINode::Create( 1038 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 1039 1040 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1041 EntryPart->addIncoming(Start, VectorPH); 1042 EntryPart->setDebugLoc(DL); 1043 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1044 State.set(this, EntryPart, Part); 1045 } 1046 1047 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1048 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1049 VPSlotTracker &SlotTracker) const { 1050 O << Indent << "EMIT "; 1051 printAsOperand(O, SlotTracker); 1052 O << " = CANONICAL-INDUCTION"; 1053 } 1054 #endif 1055 1056 bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID, 1057 Type *Ty) const { 1058 if (Ty != getScalarType()) 1059 return false; 1060 // The start value of ID must match the start value of this canonical 1061 // induction. 1062 if (getStartValue()->getLiveInIRValue() != ID.getStartValue()) 1063 return false; 1064 1065 ConstantInt *Step = ID.getConstIntStepValue(); 1066 // ID must also be incremented by one. IK_IntInduction always increment the 1067 // induction by Step, but the binary op may not be set. 1068 return ID.getKind() == InductionDescriptor::IK_IntInduction && Step && 1069 Step->isOne(); 1070 } 1071 1072 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 1073 return IsScalarAfterVectorization && 1074 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this)); 1075 } 1076 1077 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1078 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1079 VPSlotTracker &SlotTracker) const { 1080 O << Indent << "EMIT "; 1081 printAsOperand(O, SlotTracker); 1082 O << " = WIDEN-POINTER-INDUCTION "; 1083 getStartValue()->printAsOperand(O, SlotTracker); 1084 O << ", " << *IndDesc.getStep(); 1085 } 1086 #endif 1087 1088 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1089 assert(!State.Instance && "cannot be used in per-lane"); 1090 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1091 SCEVExpander Exp(SE, DL, "induction"); 1092 1093 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1094 &*State.Builder.GetInsertPoint()); 1095 1096 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1097 State.set(this, Res, Part); 1098 } 1099 1100 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1101 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1102 VPSlotTracker &SlotTracker) const { 1103 O << Indent << "EMIT "; 1104 getVPSingleValue()->printAsOperand(O, SlotTracker); 1105 O << " = EXPAND SCEV " << *Expr; 1106 } 1107 #endif 1108 1109 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1110 Value *CanonicalIV = State.get(getOperand(0), 0); 1111 Type *STy = CanonicalIV->getType(); 1112 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1113 ElementCount VF = State.VF; 1114 Value *VStart = VF.isScalar() 1115 ? CanonicalIV 1116 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1117 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1118 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1119 if (VF.isVector()) { 1120 VStep = Builder.CreateVectorSplat(VF, VStep); 1121 VStep = 1122 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1123 } 1124 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1125 State.set(this, CanonicalVectorIV, Part); 1126 } 1127 } 1128 1129 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1130 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1131 VPSlotTracker &SlotTracker) const { 1132 O << Indent << "EMIT "; 1133 printAsOperand(O, SlotTracker); 1134 O << " = WIDEN-CANONICAL-INDUCTION "; 1135 printOperands(O, SlotTracker); 1136 } 1137 #endif 1138 1139 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1140 auto &Builder = State.Builder; 1141 // Create a vector from the initial value. 1142 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1143 1144 Type *VecTy = State.VF.isScalar() 1145 ? VectorInit->getType() 1146 : VectorType::get(VectorInit->getType(), State.VF); 1147 1148 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1149 if (State.VF.isVector()) { 1150 auto *IdxTy = Builder.getInt32Ty(); 1151 auto *One = ConstantInt::get(IdxTy, 1); 1152 IRBuilder<>::InsertPointGuard Guard(Builder); 1153 Builder.SetInsertPoint(VectorPH->getTerminator()); 1154 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1155 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1156 VectorInit = Builder.CreateInsertElement( 1157 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1158 } 1159 1160 // Create a phi node for the new recurrence. 1161 PHINode *EntryPart = PHINode::Create( 1162 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 1163 EntryPart->addIncoming(VectorInit, VectorPH); 1164 State.set(this, EntryPart, 0); 1165 } 1166 1167 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1168 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1169 VPSlotTracker &SlotTracker) const { 1170 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1171 printAsOperand(O, SlotTracker); 1172 O << " = phi "; 1173 printOperands(O, SlotTracker); 1174 } 1175 #endif 1176 1177 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1178 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 1179 auto &Builder = State.Builder; 1180 1181 // In order to support recurrences we need to be able to vectorize Phi nodes. 1182 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1183 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1184 // this value when we vectorize all of the instructions that use the PHI. 1185 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1186 Type *VecTy = 1187 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 1188 1189 BasicBlock *HeaderBB = State.CFG.PrevBB; 1190 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1191 "recipe must be in the vector loop header"); 1192 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1193 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1194 Value *EntryPart = 1195 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 1196 State.set(this, EntryPart, Part); 1197 } 1198 1199 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1200 1201 // Reductions do not have to start at zero. They can start with 1202 // any loop invariant values. 1203 VPValue *StartVPV = getStartValue(); 1204 Value *StartV = StartVPV->getLiveInIRValue(); 1205 1206 Value *Iden = nullptr; 1207 RecurKind RK = RdxDesc.getRecurrenceKind(); 1208 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1209 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 1210 // MinMax reduction have the start value as their identify. 1211 if (ScalarPHI) { 1212 Iden = StartV; 1213 } else { 1214 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1215 Builder.SetInsertPoint(VectorPH->getTerminator()); 1216 StartV = Iden = 1217 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1218 } 1219 } else { 1220 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1221 RdxDesc.getFastMathFlags()); 1222 1223 if (!ScalarPHI) { 1224 Iden = Builder.CreateVectorSplat(State.VF, Iden); 1225 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1226 Builder.SetInsertPoint(VectorPH->getTerminator()); 1227 Constant *Zero = Builder.getInt32(0); 1228 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1229 } 1230 } 1231 1232 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1233 Value *EntryPart = State.get(this, Part); 1234 // Make sure to add the reduction start value only to the 1235 // first unroll part. 1236 Value *StartVal = (Part == 0) ? StartV : Iden; 1237 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 1238 } 1239 } 1240 1241 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1242 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1243 VPSlotTracker &SlotTracker) const { 1244 O << Indent << "WIDEN-REDUCTION-PHI "; 1245 1246 printAsOperand(O, SlotTracker); 1247 O << " = phi "; 1248 printOperands(O, SlotTracker); 1249 } 1250 #endif 1251 1252 void VPWidenPHIRecipe::execute(VPTransformState &State) { 1253 assert(EnableVPlanNativePath && 1254 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 1255 1256 // Currently we enter here in the VPlan-native path for non-induction 1257 // PHIs where all control flow is uniform. We simply widen these PHIs. 1258 // Create a vector phi with no operands - the vector phi operands will be 1259 // set at the end of vector code generation. 1260 VPBasicBlock *Parent = getParent(); 1261 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 1262 unsigned StartIdx = 0; 1263 // For phis in header blocks of loop regions, use the index of the value 1264 // coming from the preheader. 1265 if (LoopRegion->getEntryBasicBlock() == Parent) { 1266 for (unsigned I = 0; I < getNumOperands(); ++I) { 1267 if (getIncomingBlock(I) == 1268 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 1269 StartIdx = I; 1270 } 1271 } 1272 Value *Op0 = State.get(getOperand(StartIdx), 0); 1273 Type *VecTy = Op0->getType(); 1274 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 1275 State.set(this, VecPhi, 0); 1276 } 1277 1278 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1279 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1280 VPSlotTracker &SlotTracker) const { 1281 O << Indent << "WIDEN-PHI "; 1282 1283 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1284 // Unless all incoming values are modeled in VPlan print the original PHI 1285 // directly. 1286 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1287 // values as VPValues. 1288 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1289 O << VPlanIngredient(OriginalPhi); 1290 return; 1291 } 1292 1293 printAsOperand(O, SlotTracker); 1294 O << " = phi "; 1295 printOperands(O, SlotTracker); 1296 } 1297 #endif 1298 1299 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 1300 // remove VPActiveLaneMaskPHIRecipe. 1301 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 1302 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1303 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1304 Value *StartMask = State.get(getOperand(0), Part); 1305 PHINode *EntryPart = 1306 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 1307 EntryPart->addIncoming(StartMask, VectorPH); 1308 EntryPart->setDebugLoc(DL); 1309 State.set(this, EntryPart, Part); 1310 } 1311 } 1312 1313 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1314 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1315 VPSlotTracker &SlotTracker) const { 1316 O << Indent << "ACTIVE-LANE-MASK-PHI "; 1317 1318 printAsOperand(O, SlotTracker); 1319 O << " = phi "; 1320 printOperands(O, SlotTracker); 1321 } 1322 #endif 1323