1 //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains implementations for different VPlan recipes. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "VPlan.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Analysis/IVDescriptors.h" 19 #include "llvm/IR/BasicBlock.h" 20 #include "llvm/IR/IRBuilder.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/Instructions.h" 23 #include "llvm/IR/Type.h" 24 #include "llvm/IR/Value.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 31 #include <cassert> 32 33 using namespace llvm; 34 35 using VectorParts = SmallVector<Value *, 2>; 36 37 namespace llvm { 38 extern cl::opt<bool> EnableVPlanNativePath; 39 } 40 41 #define LV_NAME "loop-vectorize" 42 #define DEBUG_TYPE LV_NAME 43 44 bool VPRecipeBase::mayWriteToMemory() const { 45 switch (getVPDefID()) { 46 case VPWidenMemoryInstructionSC: { 47 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 48 } 49 case VPReplicateSC: 50 case VPWidenCallSC: 51 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 52 ->mayWriteToMemory(); 53 case VPBranchOnMaskSC: 54 case VPScalarIVStepsSC: 55 case VPPredInstPHISC: 56 return false; 57 case VPBlendSC: 58 case VPReductionSC: 59 case VPWidenCanonicalIVSC: 60 case VPWidenCastSC: 61 case VPWidenGEPSC: 62 case VPWidenIntOrFpInductionSC: 63 case VPWidenPHISC: 64 case VPWidenSC: 65 case VPWidenSelectSC: { 66 const Instruction *I = 67 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 68 (void)I; 69 assert((!I || !I->mayWriteToMemory()) && 70 "underlying instruction may write to memory"); 71 return false; 72 } 73 default: 74 return true; 75 } 76 } 77 78 bool VPRecipeBase::mayReadFromMemory() const { 79 switch (getVPDefID()) { 80 case VPWidenMemoryInstructionSC: { 81 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 82 } 83 case VPReplicateSC: 84 case VPWidenCallSC: 85 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 86 ->mayReadFromMemory(); 87 case VPBranchOnMaskSC: 88 case VPScalarIVStepsSC: 89 case VPPredInstPHISC: 90 return false; 91 case VPBlendSC: 92 case VPReductionSC: 93 case VPWidenCanonicalIVSC: 94 case VPWidenCastSC: 95 case VPWidenGEPSC: 96 case VPWidenIntOrFpInductionSC: 97 case VPWidenPHISC: 98 case VPWidenSC: 99 case VPWidenSelectSC: { 100 const Instruction *I = 101 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 102 (void)I; 103 assert((!I || !I->mayReadFromMemory()) && 104 "underlying instruction may read from memory"); 105 return false; 106 } 107 default: 108 return true; 109 } 110 } 111 112 bool VPRecipeBase::mayHaveSideEffects() const { 113 switch (getVPDefID()) { 114 case VPDerivedIVSC: 115 case VPPredInstPHISC: 116 return false; 117 case VPWidenCallSC: 118 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 119 ->mayHaveSideEffects(); 120 case VPBlendSC: 121 case VPReductionSC: 122 case VPScalarIVStepsSC: 123 case VPWidenCanonicalIVSC: 124 case VPWidenCastSC: 125 case VPWidenGEPSC: 126 case VPWidenIntOrFpInductionSC: 127 case VPWidenPHISC: 128 case VPWidenPointerInductionSC: 129 case VPWidenSC: 130 case VPWidenSelectSC: { 131 const Instruction *I = 132 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 133 (void)I; 134 assert((!I || !I->mayHaveSideEffects()) && 135 "underlying instruction has side-effects"); 136 return false; 137 } 138 case VPWidenMemoryInstructionSC: 139 assert(cast<VPWidenMemoryInstructionRecipe>(this) 140 ->getIngredient() 141 .mayHaveSideEffects() == mayWriteToMemory() && 142 "mayHaveSideffects result for ingredient differs from this " 143 "implementation"); 144 return mayWriteToMemory(); 145 case VPReplicateSC: { 146 auto *R = cast<VPReplicateRecipe>(this); 147 return R->getUnderlyingInstr()->mayHaveSideEffects(); 148 } 149 default: 150 return true; 151 } 152 } 153 154 void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { 155 auto Lane = VPLane::getLastLaneForVF(State.VF); 156 VPValue *ExitValue = getOperand(0); 157 if (vputils::isUniformAfterVectorization(ExitValue)) 158 Lane = VPLane::getFirstLane(); 159 Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), 160 State.Builder.GetInsertBlock()); 161 } 162 163 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 164 void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { 165 O << "Live-out "; 166 getPhi()->printAsOperand(O); 167 O << " = "; 168 getOperand(0)->printAsOperand(O, SlotTracker); 169 O << "\n"; 170 } 171 #endif 172 173 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 174 assert(!Parent && "Recipe already in some VPBasicBlock"); 175 assert(InsertPos->getParent() && 176 "Insertion position not in any VPBasicBlock"); 177 Parent = InsertPos->getParent(); 178 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 179 } 180 181 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 182 iplist<VPRecipeBase>::iterator I) { 183 assert(!Parent && "Recipe already in some VPBasicBlock"); 184 assert(I == BB.end() || I->getParent() == &BB); 185 Parent = &BB; 186 BB.getRecipeList().insert(I, this); 187 } 188 189 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 190 assert(!Parent && "Recipe already in some VPBasicBlock"); 191 assert(InsertPos->getParent() && 192 "Insertion position not in any VPBasicBlock"); 193 Parent = InsertPos->getParent(); 194 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 195 } 196 197 void VPRecipeBase::removeFromParent() { 198 assert(getParent() && "Recipe not in any VPBasicBlock"); 199 getParent()->getRecipeList().remove(getIterator()); 200 Parent = nullptr; 201 } 202 203 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 204 assert(getParent() && "Recipe not in any VPBasicBlock"); 205 return getParent()->getRecipeList().erase(getIterator()); 206 } 207 208 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 209 removeFromParent(); 210 insertAfter(InsertPos); 211 } 212 213 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 214 iplist<VPRecipeBase>::iterator I) { 215 removeFromParent(); 216 insertBefore(BB, I); 217 } 218 219 Value *VPInstruction::generateInstruction(VPTransformState &State, 220 unsigned Part) { 221 IRBuilderBase &Builder = State.Builder; 222 Builder.SetCurrentDebugLocation(DL); 223 224 if (Instruction::isBinaryOp(getOpcode())) { 225 Value *A = State.get(getOperand(0), Part); 226 Value *B = State.get(getOperand(1), Part); 227 return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); 228 } 229 230 switch (getOpcode()) { 231 case VPInstruction::Not: { 232 Value *A = State.get(getOperand(0), Part); 233 return Builder.CreateNot(A, Name); 234 } 235 case VPInstruction::ICmpULE: { 236 Value *IV = State.get(getOperand(0), Part); 237 Value *TC = State.get(getOperand(1), Part); 238 return Builder.CreateICmpULE(IV, TC, Name); 239 } 240 case Instruction::Select: { 241 Value *Cond = State.get(getOperand(0), Part); 242 Value *Op1 = State.get(getOperand(1), Part); 243 Value *Op2 = State.get(getOperand(2), Part); 244 return Builder.CreateSelect(Cond, Op1, Op2, Name); 245 } 246 case VPInstruction::ActiveLaneMask: { 247 // Get first lane of vector induction variable. 248 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 249 // Get the original loop tripcount. 250 Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); 251 252 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 253 auto *PredTy = VectorType::get(Int1Ty, State.VF); 254 return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, 255 {PredTy, ScalarTC->getType()}, 256 {VIVElem0, ScalarTC}, nullptr, Name); 257 } 258 case VPInstruction::FirstOrderRecurrenceSplice: { 259 // Generate code to combine the previous and current values in vector v3. 260 // 261 // vector.ph: 262 // v_init = vector(..., ..., ..., a[-1]) 263 // br vector.body 264 // 265 // vector.body 266 // i = phi [0, vector.ph], [i+4, vector.body] 267 // v1 = phi [v_init, vector.ph], [v2, vector.body] 268 // v2 = a[i, i+1, i+2, i+3]; 269 // v3 = vector(v1(3), v2(0, 1, 2)) 270 271 // For the first part, use the recurrence phi (v1), otherwise v2. 272 auto *V1 = State.get(getOperand(0), 0); 273 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 274 if (!PartMinus1->getType()->isVectorTy()) 275 return PartMinus1; 276 Value *V2 = State.get(getOperand(1), Part); 277 return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); 278 } 279 case VPInstruction::CalculateTripCountMinusVF: { 280 Value *ScalarTC = State.get(getOperand(0), {0, 0}); 281 Value *Step = 282 createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); 283 Value *Sub = Builder.CreateSub(ScalarTC, Step); 284 Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); 285 Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); 286 return Builder.CreateSelect(Cmp, Sub, Zero); 287 } 288 case VPInstruction::CanonicalIVIncrement: 289 case VPInstruction::CanonicalIVIncrementNUW: { 290 if (Part == 0) { 291 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 292 auto *Phi = State.get(getOperand(0), 0); 293 // The loop step is equal to the vectorization factor (num of SIMD 294 // elements) times the unroll factor (num of SIMD instructions). 295 Value *Step = 296 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 297 return Builder.CreateAdd(Phi, Step, Name, IsNUW, false); 298 } 299 return State.get(this, 0); 300 } 301 302 case VPInstruction::CanonicalIVIncrementForPart: 303 case VPInstruction::CanonicalIVIncrementForPartNUW: { 304 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; 305 auto *IV = State.get(getOperand(0), VPIteration(0, 0)); 306 if (Part == 0) 307 return IV; 308 309 // The canonical IV is incremented by the vectorization factor (num of SIMD 310 // elements) times the unroll part. 311 Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); 312 return Builder.CreateAdd(IV, Step, Name, IsNUW, false); 313 } 314 case VPInstruction::BranchOnCond: { 315 if (Part != 0) 316 return nullptr; 317 318 Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); 319 VPRegionBlock *ParentRegion = getParent()->getParent(); 320 VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); 321 322 // Replace the temporary unreachable terminator with a new conditional 323 // branch, hooking it up to backward destination for exiting blocks now and 324 // to forward destination(s) later when they are created. 325 BranchInst *CondBr = 326 Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); 327 328 if (getParent()->isExiting()) 329 CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); 330 331 CondBr->setSuccessor(0, nullptr); 332 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 333 return CondBr; 334 } 335 case VPInstruction::BranchOnCount: { 336 if (Part != 0) 337 return nullptr; 338 // First create the compare. 339 Value *IV = State.get(getOperand(0), Part); 340 Value *TC = State.get(getOperand(1), Part); 341 Value *Cond = Builder.CreateICmpEQ(IV, TC); 342 343 // Now create the branch. 344 auto *Plan = getParent()->getPlan(); 345 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 346 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 347 348 // Replace the temporary unreachable terminator with a new conditional 349 // branch, hooking it up to backward destination (the header) now and to the 350 // forward destination (the exit/middle block) later when it is created. 351 // Note that CreateCondBr expects a valid BB as first argument, so we need 352 // to set it to nullptr later. 353 BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), 354 State.CFG.VPBB2IRBB[Header]); 355 CondBr->setSuccessor(0, nullptr); 356 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 357 return CondBr; 358 } 359 default: 360 llvm_unreachable("Unsupported opcode for instruction"); 361 } 362 } 363 364 void VPInstruction::execute(VPTransformState &State) { 365 assert(!State.Instance && "VPInstruction executing an Instance"); 366 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 367 State.Builder.setFastMathFlags(FMF); 368 for (unsigned Part = 0; Part < State.UF; ++Part) { 369 Value *GeneratedValue = generateInstruction(State, Part); 370 if (!hasResult()) 371 continue; 372 assert(GeneratedValue && "generateInstruction must produce a value"); 373 State.set(this, GeneratedValue, Part); 374 } 375 } 376 377 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 378 void VPInstruction::dump() const { 379 VPSlotTracker SlotTracker(getParent()->getPlan()); 380 print(dbgs(), "", SlotTracker); 381 } 382 383 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 384 VPSlotTracker &SlotTracker) const { 385 O << Indent << "EMIT "; 386 387 if (hasResult()) { 388 printAsOperand(O, SlotTracker); 389 O << " = "; 390 } 391 392 switch (getOpcode()) { 393 case VPInstruction::Not: 394 O << "not"; 395 break; 396 case VPInstruction::ICmpULE: 397 O << "icmp ule"; 398 break; 399 case VPInstruction::SLPLoad: 400 O << "combined load"; 401 break; 402 case VPInstruction::SLPStore: 403 O << "combined store"; 404 break; 405 case VPInstruction::ActiveLaneMask: 406 O << "active lane mask"; 407 break; 408 case VPInstruction::FirstOrderRecurrenceSplice: 409 O << "first-order splice"; 410 break; 411 case VPInstruction::CanonicalIVIncrement: 412 O << "VF * UF + "; 413 break; 414 case VPInstruction::CanonicalIVIncrementNUW: 415 O << "VF * UF +(nuw) "; 416 break; 417 case VPInstruction::BranchOnCond: 418 O << "branch-on-cond"; 419 break; 420 case VPInstruction::CalculateTripCountMinusVF: 421 O << "TC > VF ? TC - VF : 0"; 422 break; 423 case VPInstruction::CanonicalIVIncrementForPart: 424 O << "VF * Part + "; 425 break; 426 case VPInstruction::CanonicalIVIncrementForPartNUW: 427 O << "VF * Part +(nuw) "; 428 break; 429 case VPInstruction::BranchOnCount: 430 O << "branch-on-count "; 431 break; 432 default: 433 O << Instruction::getOpcodeName(getOpcode()); 434 } 435 436 O << FMF; 437 438 for (const VPValue *Operand : operands()) { 439 O << " "; 440 Operand->printAsOperand(O, SlotTracker); 441 } 442 443 if (DL) { 444 O << ", !dbg "; 445 DL.print(O); 446 } 447 } 448 #endif 449 450 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 451 // Make sure the VPInstruction is a floating-point operation. 452 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 453 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 454 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 455 Opcode == Instruction::FCmp) && 456 "this op can't take fast-math flags"); 457 FMF = FMFNew; 458 } 459 460 void VPWidenCallRecipe::execute(VPTransformState &State) { 461 assert(State.VF.isVector() && "not widening"); 462 auto &CI = *cast<CallInst>(getUnderlyingInstr()); 463 assert(!isa<DbgInfoIntrinsic>(CI) && 464 "DbgInfoIntrinsic should have been dropped during VPlan construction"); 465 State.setDebugLocFromInst(&CI); 466 467 for (unsigned Part = 0; Part < State.UF; ++Part) { 468 SmallVector<Type *, 2> TysForDecl; 469 // Add return type if intrinsic is overloaded on it. 470 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { 471 TysForDecl.push_back( 472 VectorType::get(CI.getType()->getScalarType(), State.VF)); 473 } 474 SmallVector<Value *, 4> Args; 475 for (const auto &I : enumerate(operands())) { 476 // Some intrinsics have a scalar argument - don't replace it with a 477 // vector. 478 Value *Arg; 479 if (VectorIntrinsicID == Intrinsic::not_intrinsic || 480 !isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) 481 Arg = State.get(I.value(), Part); 482 else 483 Arg = State.get(I.value(), VPIteration(0, 0)); 484 if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index())) 485 TysForDecl.push_back(Arg->getType()); 486 Args.push_back(Arg); 487 } 488 489 Function *VectorF; 490 if (VectorIntrinsicID != Intrinsic::not_intrinsic) { 491 // Use vector version of the intrinsic. 492 Module *M = State.Builder.GetInsertBlock()->getModule(); 493 VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); 494 assert(VectorF && "Can't retrieve vector intrinsic."); 495 } else { 496 #ifndef NDEBUG 497 assert(Variant != nullptr && "Can't create vector function."); 498 #endif 499 VectorF = Variant; 500 } 501 502 SmallVector<OperandBundleDef, 1> OpBundles; 503 CI.getOperandBundlesAsDefs(OpBundles); 504 CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); 505 506 if (isa<FPMathOperator>(V)) 507 V->copyFastMathFlags(&CI); 508 509 State.set(this, V, Part); 510 State.addMetadata(V, &CI); 511 } 512 } 513 514 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 515 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 516 VPSlotTracker &SlotTracker) const { 517 O << Indent << "WIDEN-CALL "; 518 519 auto *CI = cast<CallInst>(getUnderlyingInstr()); 520 if (CI->getType()->isVoidTy()) 521 O << "void "; 522 else { 523 printAsOperand(O, SlotTracker); 524 O << " = "; 525 } 526 527 O << "call @" << CI->getCalledFunction()->getName() << "("; 528 printOperands(O, SlotTracker); 529 O << ")"; 530 531 if (VectorIntrinsicID) 532 O << " (using vector intrinsic)"; 533 else { 534 O << " (using library function"; 535 if (Variant->hasName()) 536 O << ": " << Variant->getName(); 537 O << ")"; 538 } 539 } 540 541 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 542 VPSlotTracker &SlotTracker) const { 543 O << Indent << "WIDEN-SELECT "; 544 printAsOperand(O, SlotTracker); 545 O << " = select "; 546 getOperand(0)->printAsOperand(O, SlotTracker); 547 O << ", "; 548 getOperand(1)->printAsOperand(O, SlotTracker); 549 O << ", "; 550 getOperand(2)->printAsOperand(O, SlotTracker); 551 O << (isInvariantCond() ? " (condition is loop invariant)" : ""); 552 } 553 #endif 554 555 void VPWidenSelectRecipe::execute(VPTransformState &State) { 556 auto &I = *cast<SelectInst>(getUnderlyingInstr()); 557 State.setDebugLocFromInst(&I); 558 559 // The condition can be loop invariant but still defined inside the 560 // loop. This means that we can't just use the original 'cond' value. 561 // We have to take the 'vectorized' value and pick the first lane. 562 // Instcombine will make this a no-op. 563 auto *InvarCond = 564 isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; 565 566 for (unsigned Part = 0; Part < State.UF; ++Part) { 567 Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); 568 Value *Op0 = State.get(getOperand(1), Part); 569 Value *Op1 = State.get(getOperand(2), Part); 570 Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); 571 State.set(this, Sel, Part); 572 State.addMetadata(Sel, &I); 573 } 574 } 575 576 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 577 void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { 578 switch (OpType) { 579 case OperationType::PossiblyExactOp: 580 if (ExactFlags.IsExact) 581 O << " exact"; 582 break; 583 case OperationType::OverflowingBinOp: 584 if (WrapFlags.HasNUW) 585 O << " nuw"; 586 if (WrapFlags.HasNSW) 587 O << " nsw"; 588 break; 589 case OperationType::FPMathOp: 590 getFastMathFlags().print(O); 591 break; 592 case OperationType::GEPOp: 593 if (GEPFlags.IsInBounds) 594 O << " inbounds"; 595 break; 596 case OperationType::Other: 597 break; 598 } 599 O << " "; 600 } 601 #endif 602 603 void VPWidenRecipe::execute(VPTransformState &State) { 604 auto &I = *cast<Instruction>(getUnderlyingValue()); 605 auto &Builder = State.Builder; 606 switch (I.getOpcode()) { 607 case Instruction::Call: 608 case Instruction::Br: 609 case Instruction::PHI: 610 case Instruction::GetElementPtr: 611 case Instruction::Select: 612 llvm_unreachable("This instruction is handled by a different recipe."); 613 case Instruction::UDiv: 614 case Instruction::SDiv: 615 case Instruction::SRem: 616 case Instruction::URem: 617 case Instruction::Add: 618 case Instruction::FAdd: 619 case Instruction::Sub: 620 case Instruction::FSub: 621 case Instruction::FNeg: 622 case Instruction::Mul: 623 case Instruction::FMul: 624 case Instruction::FDiv: 625 case Instruction::FRem: 626 case Instruction::Shl: 627 case Instruction::LShr: 628 case Instruction::AShr: 629 case Instruction::And: 630 case Instruction::Or: 631 case Instruction::Xor: { 632 // Just widen unops and binops. 633 State.setDebugLocFromInst(&I); 634 635 for (unsigned Part = 0; Part < State.UF; ++Part) { 636 SmallVector<Value *, 2> Ops; 637 for (VPValue *VPOp : operands()) 638 Ops.push_back(State.get(VPOp, Part)); 639 640 Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); 641 642 if (auto *VecOp = dyn_cast<Instruction>(V)) 643 setFlags(VecOp); 644 645 // Use this vector value for all users of the original instruction. 646 State.set(this, V, Part); 647 State.addMetadata(V, &I); 648 } 649 650 break; 651 } 652 case Instruction::Freeze: { 653 State.setDebugLocFromInst(&I); 654 655 for (unsigned Part = 0; Part < State.UF; ++Part) { 656 Value *Op = State.get(getOperand(0), Part); 657 658 Value *Freeze = Builder.CreateFreeze(Op); 659 State.set(this, Freeze, Part); 660 } 661 break; 662 } 663 case Instruction::ICmp: 664 case Instruction::FCmp: { 665 // Widen compares. Generate vector compares. 666 bool FCmp = (I.getOpcode() == Instruction::FCmp); 667 auto *Cmp = cast<CmpInst>(&I); 668 State.setDebugLocFromInst(Cmp); 669 for (unsigned Part = 0; Part < State.UF; ++Part) { 670 Value *A = State.get(getOperand(0), Part); 671 Value *B = State.get(getOperand(1), Part); 672 Value *C = nullptr; 673 if (FCmp) { 674 // Propagate fast math flags. 675 IRBuilder<>::FastMathFlagGuard FMFG(Builder); 676 Builder.setFastMathFlags(Cmp->getFastMathFlags()); 677 C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); 678 } else { 679 C = Builder.CreateICmp(Cmp->getPredicate(), A, B); 680 } 681 State.set(this, C, Part); 682 State.addMetadata(C, &I); 683 } 684 685 break; 686 } 687 default: 688 // This instruction is not vectorized by simple widening. 689 LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); 690 llvm_unreachable("Unhandled instruction!"); 691 } // end of switch. 692 } 693 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 694 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 695 VPSlotTracker &SlotTracker) const { 696 O << Indent << "WIDEN "; 697 printAsOperand(O, SlotTracker); 698 const Instruction *UI = getUnderlyingInstr(); 699 O << " = " << UI->getOpcodeName(); 700 printFlags(O); 701 if (auto *Cmp = dyn_cast<CmpInst>(UI)) 702 O << Cmp->getPredicate() << " "; 703 printOperands(O, SlotTracker); 704 } 705 #endif 706 707 void VPWidenCastRecipe::execute(VPTransformState &State) { 708 auto *I = cast_or_null<Instruction>(getUnderlyingValue()); 709 if (I) 710 State.setDebugLocFromInst(I); 711 auto &Builder = State.Builder; 712 /// Vectorize casts. 713 assert(State.VF.isVector() && "Not vectorizing?"); 714 Type *DestTy = VectorType::get(getResultType(), State.VF); 715 716 for (unsigned Part = 0; Part < State.UF; ++Part) { 717 Value *A = State.get(getOperand(0), Part); 718 Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); 719 State.set(this, Cast, Part); 720 State.addMetadata(Cast, I); 721 } 722 } 723 724 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 725 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, 726 VPSlotTracker &SlotTracker) const { 727 O << Indent << "WIDEN-CAST "; 728 printAsOperand(O, SlotTracker); 729 O << " = " << Instruction::getOpcodeName(Opcode) << " "; 730 printOperands(O, SlotTracker); 731 O << " to " << *getResultType(); 732 } 733 734 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 735 VPSlotTracker &SlotTracker) const { 736 O << Indent << "WIDEN-INDUCTION"; 737 if (getTruncInst()) { 738 O << "\\l\""; 739 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 740 O << " +\n" << Indent << "\" "; 741 getVPValue(0)->printAsOperand(O, SlotTracker); 742 } else 743 O << " " << VPlanIngredient(IV); 744 745 O << ", "; 746 getStepValue()->printAsOperand(O, SlotTracker); 747 } 748 #endif 749 750 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 751 // The step may be defined by a recipe in the preheader (e.g. if it requires 752 // SCEV expansion), but for the canonical induction the step is required to be 753 // 1, which is represented as live-in. 754 if (getStepValue()->getDefiningRecipe()) 755 return false; 756 auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); 757 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 758 return StartC && StartC->isZero() && StepC && StepC->isOne(); 759 } 760 761 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 762 void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, 763 VPSlotTracker &SlotTracker) const { 764 O << Indent; 765 printAsOperand(O, SlotTracker); 766 O << Indent << "= DERIVED-IV "; 767 getStartValue()->printAsOperand(O, SlotTracker); 768 O << " + "; 769 getCanonicalIV()->printAsOperand(O, SlotTracker); 770 O << " * "; 771 getStepValue()->printAsOperand(O, SlotTracker); 772 773 if (IndDesc.getStep()->getType() != ResultTy) 774 O << " (truncated to " << *ResultTy << ")"; 775 } 776 #endif 777 778 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 779 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 780 VPSlotTracker &SlotTracker) const { 781 O << Indent; 782 printAsOperand(O, SlotTracker); 783 O << Indent << "= SCALAR-STEPS "; 784 printOperands(O, SlotTracker); 785 } 786 #endif 787 788 void VPWidenGEPRecipe::execute(VPTransformState &State) { 789 assert(State.VF.isVector() && "not widening"); 790 auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); 791 // Construct a vector GEP by widening the operands of the scalar GEP as 792 // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP 793 // results in a vector of pointers when at least one operand of the GEP 794 // is vector-typed. Thus, to keep the representation compact, we only use 795 // vector-typed operands for loop-varying values. 796 797 if (areAllOperandsInvariant()) { 798 // If we are vectorizing, but the GEP has only loop-invariant operands, 799 // the GEP we build (by only using vector-typed operands for 800 // loop-varying values) would be a scalar pointer. Thus, to ensure we 801 // produce a vector of pointers, we need to either arbitrarily pick an 802 // operand to broadcast, or broadcast a clone of the original GEP. 803 // Here, we broadcast a clone of the original. 804 // 805 // TODO: If at some point we decide to scalarize instructions having 806 // loop-invariant operands, this special case will no longer be 807 // required. We would add the scalarization decision to 808 // collectLoopScalars() and teach getVectorValue() to broadcast 809 // the lane-zero scalar value. 810 SmallVector<Value *> Ops; 811 for (unsigned I = 0, E = getNumOperands(); I != E; I++) 812 Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); 813 814 auto *NewGEP = 815 State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], 816 ArrayRef(Ops).drop_front(), "", isInBounds()); 817 for (unsigned Part = 0; Part < State.UF; ++Part) { 818 Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); 819 State.set(this, EntryPart, Part); 820 State.addMetadata(EntryPart, GEP); 821 } 822 } else { 823 // If the GEP has at least one loop-varying operand, we are sure to 824 // produce a vector of pointers. But if we are only unrolling, we want 825 // to produce a scalar GEP for each unroll part. Thus, the GEP we 826 // produce with the code below will be scalar (if VF == 1) or vector 827 // (otherwise). Note that for the unroll-only case, we still maintain 828 // values in the vector mapping with initVector, as we do for other 829 // instructions. 830 for (unsigned Part = 0; Part < State.UF; ++Part) { 831 // The pointer operand of the new GEP. If it's loop-invariant, we 832 // won't broadcast it. 833 auto *Ptr = isPointerLoopInvariant() 834 ? State.get(getOperand(0), VPIteration(0, 0)) 835 : State.get(getOperand(0), Part); 836 837 // Collect all the indices for the new GEP. If any index is 838 // loop-invariant, we won't broadcast it. 839 SmallVector<Value *, 4> Indices; 840 for (unsigned I = 1, E = getNumOperands(); I < E; I++) { 841 VPValue *Operand = getOperand(I); 842 if (isIndexLoopInvariant(I - 1)) 843 Indices.push_back(State.get(Operand, VPIteration(0, 0))); 844 else 845 Indices.push_back(State.get(Operand, Part)); 846 } 847 848 // Create the new GEP. Note that this GEP may be a scalar if VF == 1, 849 // but it should be a vector, otherwise. 850 auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, 851 Indices, "", isInBounds()); 852 assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && 853 "NewGEP is not a pointer vector"); 854 State.set(this, NewGEP, Part); 855 State.addMetadata(NewGEP, GEP); 856 } 857 } 858 } 859 860 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 861 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 862 VPSlotTracker &SlotTracker) const { 863 O << Indent << "WIDEN-GEP "; 864 O << (isPointerLoopInvariant() ? "Inv" : "Var"); 865 for (size_t I = 0; I < getNumOperands() - 1; ++I) 866 O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; 867 868 O << " "; 869 printAsOperand(O, SlotTracker); 870 O << " = getelementptr"; 871 printFlags(O); 872 printOperands(O, SlotTracker); 873 } 874 #endif 875 876 void VPBlendRecipe::execute(VPTransformState &State) { 877 State.setDebugLocFromInst(Phi); 878 // We know that all PHIs in non-header blocks are converted into 879 // selects, so we don't have to worry about the insertion order and we 880 // can just use the builder. 881 // At this point we generate the predication tree. There may be 882 // duplications since this is a simple recursive scan, but future 883 // optimizations will clean it up. 884 885 unsigned NumIncoming = getNumIncomingValues(); 886 887 // Generate a sequence of selects of the form: 888 // SELECT(Mask3, In3, 889 // SELECT(Mask2, In2, 890 // SELECT(Mask1, In1, 891 // In0))) 892 // Note that Mask0 is never used: lanes for which no path reaches this phi and 893 // are essentially undef are taken from In0. 894 VectorParts Entry(State.UF); 895 for (unsigned In = 0; In < NumIncoming; ++In) { 896 for (unsigned Part = 0; Part < State.UF; ++Part) { 897 // We might have single edge PHIs (blocks) - use an identity 898 // 'select' for the first PHI operand. 899 Value *In0 = State.get(getIncomingValue(In), Part); 900 if (In == 0) 901 Entry[Part] = In0; // Initialize with the first incoming value. 902 else { 903 // Select between the current value and the previous incoming edge 904 // based on the incoming mask. 905 Value *Cond = State.get(getMask(In), Part); 906 Entry[Part] = 907 State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); 908 } 909 } 910 } 911 for (unsigned Part = 0; Part < State.UF; ++Part) 912 State.set(this, Entry[Part], Part); 913 } 914 915 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 916 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 917 VPSlotTracker &SlotTracker) const { 918 O << Indent << "BLEND "; 919 Phi->printAsOperand(O, false); 920 O << " ="; 921 if (getNumIncomingValues() == 1) { 922 // Not a User of any mask: not really blending, this is a 923 // single-predecessor phi. 924 O << " "; 925 getIncomingValue(0)->printAsOperand(O, SlotTracker); 926 } else { 927 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 928 O << " "; 929 getIncomingValue(I)->printAsOperand(O, SlotTracker); 930 O << "/"; 931 getMask(I)->printAsOperand(O, SlotTracker); 932 } 933 } 934 } 935 936 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 937 VPSlotTracker &SlotTracker) const { 938 O << Indent << "REDUCE "; 939 printAsOperand(O, SlotTracker); 940 O << " = "; 941 getChainOp()->printAsOperand(O, SlotTracker); 942 O << " +"; 943 if (isa<FPMathOperator>(getUnderlyingInstr())) 944 O << getUnderlyingInstr()->getFastMathFlags(); 945 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 946 getVecOp()->printAsOperand(O, SlotTracker); 947 if (getCondOp()) { 948 O << ", "; 949 getCondOp()->printAsOperand(O, SlotTracker); 950 } 951 O << ")"; 952 if (RdxDesc->IntermediateStore) 953 O << " (with final reduction value stored in invariant address sank " 954 "outside of loop)"; 955 } 956 #endif 957 958 bool VPReplicateRecipe::shouldPack() const { 959 // Find if the recipe is used by a widened recipe via an intervening 960 // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. 961 return any_of(users(), [](const VPUser *U) { 962 if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) 963 return any_of(PredR->users(), [PredR](const VPUser *U) { 964 return !U->usesScalars(PredR); 965 }); 966 return false; 967 }); 968 } 969 970 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 971 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 972 VPSlotTracker &SlotTracker) const { 973 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 974 975 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 976 printAsOperand(O, SlotTracker); 977 O << " = "; 978 } 979 if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { 980 O << "call"; 981 printFlags(O); 982 O << "@" << CB->getCalledFunction()->getName() << "("; 983 interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), 984 O, [&O, &SlotTracker](VPValue *Op) { 985 Op->printAsOperand(O, SlotTracker); 986 }); 987 O << ")"; 988 } else { 989 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); 990 printFlags(O); 991 printOperands(O, SlotTracker); 992 } 993 994 if (shouldPack()) 995 O << " (S->V)"; 996 } 997 #endif 998 999 void VPBranchOnMaskRecipe::execute(VPTransformState &State) { 1000 assert(State.Instance && "Branch on Mask works only on single instance."); 1001 1002 unsigned Part = State.Instance->Part; 1003 unsigned Lane = State.Instance->Lane.getKnownLane(); 1004 1005 Value *ConditionBit = nullptr; 1006 VPValue *BlockInMask = getMask(); 1007 if (BlockInMask) { 1008 ConditionBit = State.get(BlockInMask, Part); 1009 if (ConditionBit->getType()->isVectorTy()) 1010 ConditionBit = State.Builder.CreateExtractElement( 1011 ConditionBit, State.Builder.getInt32(Lane)); 1012 } else // Block in mask is all-one. 1013 ConditionBit = State.Builder.getTrue(); 1014 1015 // Replace the temporary unreachable terminator with a new conditional branch, 1016 // whose two destinations will be set later when they are created. 1017 auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); 1018 assert(isa<UnreachableInst>(CurrentTerminator) && 1019 "Expected to replace unreachable terminator with conditional branch."); 1020 auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); 1021 CondBr->setSuccessor(0, nullptr); 1022 ReplaceInstWithInst(CurrentTerminator, CondBr); 1023 } 1024 1025 void VPPredInstPHIRecipe::execute(VPTransformState &State) { 1026 assert(State.Instance && "Predicated instruction PHI works per instance."); 1027 Instruction *ScalarPredInst = 1028 cast<Instruction>(State.get(getOperand(0), *State.Instance)); 1029 BasicBlock *PredicatedBB = ScalarPredInst->getParent(); 1030 BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); 1031 assert(PredicatingBB && "Predicated block has no single predecessor."); 1032 assert(isa<VPReplicateRecipe>(getOperand(0)) && 1033 "operand must be VPReplicateRecipe"); 1034 1035 // By current pack/unpack logic we need to generate only a single phi node: if 1036 // a vector value for the predicated instruction exists at this point it means 1037 // the instruction has vector users only, and a phi for the vector value is 1038 // needed. In this case the recipe of the predicated instruction is marked to 1039 // also do that packing, thereby "hoisting" the insert-element sequence. 1040 // Otherwise, a phi node for the scalar value is needed. 1041 unsigned Part = State.Instance->Part; 1042 if (State.hasVectorValue(getOperand(0), Part)) { 1043 Value *VectorValue = State.get(getOperand(0), Part); 1044 InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); 1045 PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); 1046 VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. 1047 VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. 1048 if (State.hasVectorValue(this, Part)) 1049 State.reset(this, VPhi, Part); 1050 else 1051 State.set(this, VPhi, Part); 1052 // NOTE: Currently we need to update the value of the operand, so the next 1053 // predicated iteration inserts its generated value in the correct vector. 1054 State.reset(getOperand(0), VPhi, Part); 1055 } else { 1056 Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); 1057 PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); 1058 Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), 1059 PredicatingBB); 1060 Phi->addIncoming(ScalarPredInst, PredicatedBB); 1061 if (State.hasScalarValue(this, *State.Instance)) 1062 State.reset(this, Phi, *State.Instance); 1063 else 1064 State.set(this, Phi, *State.Instance); 1065 // NOTE: Currently we need to update the value of the operand, so the next 1066 // predicated iteration inserts its generated value in the correct vector. 1067 State.reset(getOperand(0), Phi, *State.Instance); 1068 } 1069 } 1070 1071 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1072 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1073 VPSlotTracker &SlotTracker) const { 1074 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1075 printAsOperand(O, SlotTracker); 1076 O << " = "; 1077 printOperands(O, SlotTracker); 1078 } 1079 1080 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 1081 VPSlotTracker &SlotTracker) const { 1082 O << Indent << "WIDEN "; 1083 1084 if (!isStore()) { 1085 getVPSingleValue()->printAsOperand(O, SlotTracker); 1086 O << " = "; 1087 } 1088 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 1089 1090 printOperands(O, SlotTracker); 1091 } 1092 #endif 1093 1094 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1095 Value *Start = getStartValue()->getLiveInIRValue(); 1096 PHINode *EntryPart = PHINode::Create( 1097 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 1098 1099 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1100 EntryPart->addIncoming(Start, VectorPH); 1101 EntryPart->setDebugLoc(DL); 1102 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1103 State.set(this, EntryPart, Part); 1104 } 1105 1106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1107 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1108 VPSlotTracker &SlotTracker) const { 1109 O << Indent << "EMIT "; 1110 printAsOperand(O, SlotTracker); 1111 O << " = CANONICAL-INDUCTION"; 1112 } 1113 #endif 1114 1115 bool VPCanonicalIVPHIRecipe::isCanonical( 1116 InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step, 1117 Type *Ty) const { 1118 // The types must match and it must be an integer induction. 1119 if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction) 1120 return false; 1121 // Start must match the start value of this canonical induction. 1122 if (Start != getStartValue()) 1123 return false; 1124 1125 // If the step is defined by a recipe, it is not a ConstantInt. 1126 if (Step->getDefiningRecipe()) 1127 return false; 1128 1129 ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); 1130 return StepC && StepC->isOne(); 1131 } 1132 1133 bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { 1134 return IsScalarAfterVectorization && 1135 (!VF.isScalable() || vputils::onlyFirstLaneUsed(this)); 1136 } 1137 1138 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1139 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1140 VPSlotTracker &SlotTracker) const { 1141 O << Indent << "EMIT "; 1142 printAsOperand(O, SlotTracker); 1143 O << " = WIDEN-POINTER-INDUCTION "; 1144 getStartValue()->printAsOperand(O, SlotTracker); 1145 O << ", " << *IndDesc.getStep(); 1146 } 1147 #endif 1148 1149 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1150 assert(!State.Instance && "cannot be used in per-lane"); 1151 const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); 1152 SCEVExpander Exp(SE, DL, "induction"); 1153 1154 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1155 &*State.Builder.GetInsertPoint()); 1156 assert(!State.ExpandedSCEVs.contains(Expr) && 1157 "Same SCEV expanded multiple times"); 1158 State.ExpandedSCEVs[Expr] = Res; 1159 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1160 State.set(this, Res, {Part, 0}); 1161 } 1162 1163 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1164 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1165 VPSlotTracker &SlotTracker) const { 1166 O << Indent << "EMIT "; 1167 getVPSingleValue()->printAsOperand(O, SlotTracker); 1168 O << " = EXPAND SCEV " << *Expr; 1169 } 1170 #endif 1171 1172 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1173 Value *CanonicalIV = State.get(getOperand(0), 0); 1174 Type *STy = CanonicalIV->getType(); 1175 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1176 ElementCount VF = State.VF; 1177 Value *VStart = VF.isScalar() 1178 ? CanonicalIV 1179 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1180 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1181 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1182 if (VF.isVector()) { 1183 VStep = Builder.CreateVectorSplat(VF, VStep); 1184 VStep = 1185 Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1186 } 1187 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1188 State.set(this, CanonicalVectorIV, Part); 1189 } 1190 } 1191 1192 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1193 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1194 VPSlotTracker &SlotTracker) const { 1195 O << Indent << "EMIT "; 1196 printAsOperand(O, SlotTracker); 1197 O << " = WIDEN-CANONICAL-INDUCTION "; 1198 printOperands(O, SlotTracker); 1199 } 1200 #endif 1201 1202 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1203 auto &Builder = State.Builder; 1204 // Create a vector from the initial value. 1205 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1206 1207 Type *VecTy = State.VF.isScalar() 1208 ? VectorInit->getType() 1209 : VectorType::get(VectorInit->getType(), State.VF); 1210 1211 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1212 if (State.VF.isVector()) { 1213 auto *IdxTy = Builder.getInt32Ty(); 1214 auto *One = ConstantInt::get(IdxTy, 1); 1215 IRBuilder<>::InsertPointGuard Guard(Builder); 1216 Builder.SetInsertPoint(VectorPH->getTerminator()); 1217 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1218 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1219 VectorInit = Builder.CreateInsertElement( 1220 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1221 } 1222 1223 // Create a phi node for the new recurrence. 1224 PHINode *EntryPart = PHINode::Create( 1225 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 1226 EntryPart->addIncoming(VectorInit, VectorPH); 1227 State.set(this, EntryPart, 0); 1228 } 1229 1230 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1231 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1232 VPSlotTracker &SlotTracker) const { 1233 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1234 printAsOperand(O, SlotTracker); 1235 O << " = phi "; 1236 printOperands(O, SlotTracker); 1237 } 1238 #endif 1239 1240 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1241 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 1242 auto &Builder = State.Builder; 1243 1244 // In order to support recurrences we need to be able to vectorize Phi nodes. 1245 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1246 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1247 // this value when we vectorize all of the instructions that use the PHI. 1248 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1249 Type *VecTy = 1250 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 1251 1252 BasicBlock *HeaderBB = State.CFG.PrevBB; 1253 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1254 "recipe must be in the vector loop header"); 1255 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1256 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1257 Value *EntryPart = 1258 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 1259 State.set(this, EntryPart, Part); 1260 } 1261 1262 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1263 1264 // Reductions do not have to start at zero. They can start with 1265 // any loop invariant values. 1266 VPValue *StartVPV = getStartValue(); 1267 Value *StartV = StartVPV->getLiveInIRValue(); 1268 1269 Value *Iden = nullptr; 1270 RecurKind RK = RdxDesc.getRecurrenceKind(); 1271 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1272 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 1273 // MinMax reduction have the start value as their identify. 1274 if (ScalarPHI) { 1275 Iden = StartV; 1276 } else { 1277 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1278 Builder.SetInsertPoint(VectorPH->getTerminator()); 1279 StartV = Iden = 1280 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1281 } 1282 } else { 1283 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1284 RdxDesc.getFastMathFlags()); 1285 1286 if (!ScalarPHI) { 1287 Iden = Builder.CreateVectorSplat(State.VF, Iden); 1288 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1289 Builder.SetInsertPoint(VectorPH->getTerminator()); 1290 Constant *Zero = Builder.getInt32(0); 1291 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1292 } 1293 } 1294 1295 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1296 Value *EntryPart = State.get(this, Part); 1297 // Make sure to add the reduction start value only to the 1298 // first unroll part. 1299 Value *StartVal = (Part == 0) ? StartV : Iden; 1300 cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); 1301 } 1302 } 1303 1304 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1305 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1306 VPSlotTracker &SlotTracker) const { 1307 O << Indent << "WIDEN-REDUCTION-PHI "; 1308 1309 printAsOperand(O, SlotTracker); 1310 O << " = phi "; 1311 printOperands(O, SlotTracker); 1312 } 1313 #endif 1314 1315 void VPWidenPHIRecipe::execute(VPTransformState &State) { 1316 assert(EnableVPlanNativePath && 1317 "Non-native vplans are not expected to have VPWidenPHIRecipes."); 1318 1319 // Currently we enter here in the VPlan-native path for non-induction 1320 // PHIs where all control flow is uniform. We simply widen these PHIs. 1321 // Create a vector phi with no operands - the vector phi operands will be 1322 // set at the end of vector code generation. 1323 VPBasicBlock *Parent = getParent(); 1324 VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); 1325 unsigned StartIdx = 0; 1326 // For phis in header blocks of loop regions, use the index of the value 1327 // coming from the preheader. 1328 if (LoopRegion->getEntryBasicBlock() == Parent) { 1329 for (unsigned I = 0; I < getNumOperands(); ++I) { 1330 if (getIncomingBlock(I) == 1331 LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) 1332 StartIdx = I; 1333 } 1334 } 1335 Value *Op0 = State.get(getOperand(StartIdx), 0); 1336 Type *VecTy = Op0->getType(); 1337 Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); 1338 State.set(this, VecPhi, 0); 1339 } 1340 1341 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1342 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1343 VPSlotTracker &SlotTracker) const { 1344 O << Indent << "WIDEN-PHI "; 1345 1346 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1347 // Unless all incoming values are modeled in VPlan print the original PHI 1348 // directly. 1349 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1350 // values as VPValues. 1351 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1352 O << VPlanIngredient(OriginalPhi); 1353 return; 1354 } 1355 1356 printAsOperand(O, SlotTracker); 1357 O << " = phi "; 1358 printOperands(O, SlotTracker); 1359 } 1360 #endif 1361 1362 // TODO: It would be good to use the existing VPWidenPHIRecipe instead and 1363 // remove VPActiveLaneMaskPHIRecipe. 1364 void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { 1365 BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); 1366 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1367 Value *StartMask = State.get(getOperand(0), Part); 1368 PHINode *EntryPart = 1369 State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); 1370 EntryPart->addIncoming(StartMask, VectorPH); 1371 EntryPart->setDebugLoc(DL); 1372 State.set(this, EntryPart, Part); 1373 } 1374 } 1375 1376 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1377 void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1378 VPSlotTracker &SlotTracker) const { 1379 O << Indent << "ACTIVE-LANE-MASK-PHI "; 1380 1381 printAsOperand(O, SlotTracker); 1382 O << " = phi "; 1383 printOperands(O, SlotTracker); 1384 } 1385 #endif 1386