1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file "describes" induction and recurrence variables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/IVDescriptors.h" 14 #include "llvm/Analysis/DemandedBits.h" 15 #include "llvm/Analysis/LoopInfo.h" 16 #include "llvm/Analysis/ScalarEvolution.h" 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/ValueTracking.h" 19 #include "llvm/IR/Dominators.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/PatternMatch.h" 22 #include "llvm/IR/ValueHandle.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/KnownBits.h" 25 26 using namespace llvm; 27 using namespace llvm::PatternMatch; 28 29 #define DEBUG_TYPE "iv-descriptors" 30 31 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 32 SmallPtrSetImpl<Instruction *> &Set) { 33 for (const Use &Use : I->operands()) 34 if (!Set.count(dyn_cast<Instruction>(Use))) 35 return false; 36 return true; 37 } 38 39 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { 40 switch (Kind) { 41 default: 42 break; 43 case RecurKind::Add: 44 case RecurKind::Mul: 45 case RecurKind::Or: 46 case RecurKind::And: 47 case RecurKind::Xor: 48 case RecurKind::SMax: 49 case RecurKind::SMin: 50 case RecurKind::UMax: 51 case RecurKind::UMin: 52 case RecurKind::AnyOf: 53 case RecurKind::FindFirstIVSMin: 54 case RecurKind::FindFirstIVUMin: 55 case RecurKind::FindLastIVSMax: 56 case RecurKind::FindLastIVUMax: 57 return true; 58 } 59 return false; 60 } 61 62 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { 63 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); 64 } 65 66 /// Determines if Phi may have been type-promoted. If Phi has a single user 67 /// that ANDs the Phi with a type mask, return the user. RT is updated to 68 /// account for the narrower bit width represented by the mask, and the AND 69 /// instruction is added to CI. 70 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 71 SmallPtrSetImpl<Instruction *> &Visited, 72 SmallPtrSetImpl<Instruction *> &CI) { 73 if (!Phi->hasOneUse()) 74 return Phi; 75 76 const APInt *M = nullptr; 77 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 78 79 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 80 // with a new integer type of the corresponding bit width. 81 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) { 82 int32_t Bits = (*M + 1).exactLogBase2(); 83 if (Bits > 0) { 84 RT = IntegerType::get(Phi->getContext(), Bits); 85 Visited.insert(Phi); 86 CI.insert(J); 87 return J; 88 } 89 } 90 return Phi; 91 } 92 93 /// Compute the minimal bit width needed to represent a reduction whose exit 94 /// instruction is given by Exit. 95 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 96 DemandedBits *DB, 97 AssumptionCache *AC, 98 DominatorTree *DT) { 99 bool IsSigned = false; 100 const DataLayout &DL = Exit->getDataLayout(); 101 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 102 103 if (DB) { 104 // Use the demanded bits analysis to determine the bits that are live out 105 // of the exit instruction, rounding up to the nearest power of two. If the 106 // use of demanded bits results in a smaller bit width, we know the value 107 // must be positive (i.e., IsSigned = false), because if this were not the 108 // case, the sign bit would have been demanded. 109 auto Mask = DB->getDemandedBits(Exit); 110 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero(); 111 } 112 113 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 114 // If demanded bits wasn't able to limit the bit width, we can try to use 115 // value tracking instead. This can be the case, for example, if the value 116 // may be negative. 117 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT); 118 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 119 MaxBitWidth = NumTypeBits - NumSignBits; 120 KnownBits Bits = computeKnownBits(Exit, DL); 121 if (!Bits.isNonNegative()) { 122 // If the value is not known to be non-negative, we set IsSigned to true, 123 // meaning that we will use sext instructions instead of zext 124 // instructions to restore the original type. 125 IsSigned = true; 126 // Make sure at least one sign bit is included in the result, so it 127 // will get properly sign-extended. 128 ++MaxBitWidth; 129 } 130 } 131 MaxBitWidth = llvm::bit_ceil(MaxBitWidth); 132 133 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 134 IsSigned); 135 } 136 137 /// Collect cast instructions that can be ignored in the vectorizer's cost 138 /// model, given a reduction exit value and the minimal type in which the 139 // reduction can be represented. Also search casts to the recurrence type 140 // to find the minimum width used by the recurrence. 141 static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, 142 Type *RecurrenceType, 143 SmallPtrSetImpl<Instruction *> &Casts, 144 unsigned &MinWidthCastToRecurTy) { 145 146 SmallVector<Instruction *, 8> Worklist; 147 SmallPtrSet<Instruction *, 8> Visited; 148 Worklist.push_back(Exit); 149 MinWidthCastToRecurTy = -1U; 150 151 while (!Worklist.empty()) { 152 Instruction *Val = Worklist.pop_back_val(); 153 Visited.insert(Val); 154 if (auto *Cast = dyn_cast<CastInst>(Val)) { 155 if (Cast->getSrcTy() == RecurrenceType) { 156 // If the source type of a cast instruction is equal to the recurrence 157 // type, it will be eliminated, and should be ignored in the vectorizer 158 // cost model. 159 Casts.insert(Cast); 160 continue; 161 } 162 if (Cast->getDestTy() == RecurrenceType) { 163 // The minimum width used by the recurrence is found by checking for 164 // casts on its operands. The minimum width is used by the vectorizer 165 // when finding the widest type for in-loop reductions without any 166 // loads/stores. 167 MinWidthCastToRecurTy = std::min<unsigned>( 168 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits()); 169 continue; 170 } 171 } 172 // Add all operands to the work list if they are loop-varying values that 173 // we haven't yet visited. 174 for (Value *O : cast<User>(Val)->operands()) 175 if (auto *I = dyn_cast<Instruction>(O)) 176 if (TheLoop->contains(I) && !Visited.count(I)) 177 Worklist.push_back(I); 178 } 179 } 180 181 // Check if a given Phi node can be recognized as an ordered reduction for 182 // vectorizing floating point operations without unsafe math. 183 static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, 184 Instruction *Exit, PHINode *Phi) { 185 // Currently only FAdd and FMulAdd are supported. 186 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd) 187 return false; 188 189 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd) 190 return false; 191 192 if (Kind == RecurKind::FMulAdd && 193 !RecurrenceDescriptor::isFMulAddIntrinsic(Exit)) 194 return false; 195 196 // Ensure the exit instruction has only one user other than the reduction PHI 197 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3)) 198 return false; 199 200 // The only pattern accepted is the one in which the reduction PHI 201 // is used as one of the operands of the exit instruction 202 auto *Op0 = Exit->getOperand(0); 203 auto *Op1 = Exit->getOperand(1); 204 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi) 205 return false; 206 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi) 207 return false; 208 209 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi 210 << ", ExitInst: " << *Exit << "\n"); 211 212 return true; 213 } 214 215 bool RecurrenceDescriptor::AddReductionVar( 216 PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF, 217 RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC, 218 DominatorTree *DT, ScalarEvolution *SE) { 219 if (Phi->getNumIncomingValues() != 2) 220 return false; 221 222 // Reduction variables are only found in the loop header block. 223 if (Phi->getParent() != TheLoop->getHeader()) 224 return false; 225 226 // Obtain the reduction start value from the value that comes from the loop 227 // preheader. 228 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 229 230 // ExitInstruction is the single value which is used outside the loop. 231 // We only allow for a single reduction value to be used outside the loop. 232 // This includes users of the reduction, variables (which form a cycle 233 // which ends in the phi node). 234 Instruction *ExitInstruction = nullptr; 235 236 // Variable to keep last visited store instruction. By the end of the 237 // algorithm this variable will be either empty or having intermediate 238 // reduction value stored in invariant address. 239 StoreInst *IntermediateStore = nullptr; 240 241 // Indicates that we found a reduction operation in our scan. 242 bool FoundReduxOp = false; 243 244 // We start with the PHI node and scan for all of the users of this 245 // instruction. All users must be instructions that can be used as reduction 246 // variables (such as ADD). We must have a single out-of-block user. The cycle 247 // must include the original PHI. 248 bool FoundStartPHI = false; 249 250 // To recognize min/max patterns formed by a icmp select sequence, we store 251 // the number of instruction we saw from the recognized min/max pattern, 252 // to make sure we only see exactly the two instructions. 253 unsigned NumCmpSelectPatternInst = 0; 254 InstDesc ReduxDesc(false, nullptr); 255 256 // Data used for determining if the recurrence has been type-promoted. 257 Type *RecurrenceType = Phi->getType(); 258 SmallPtrSet<Instruction *, 4> CastInsts; 259 unsigned MinWidthCastToRecurrenceType; 260 Instruction *Start = Phi; 261 bool IsSigned = false; 262 263 SmallPtrSet<Instruction *, 8> VisitedInsts; 264 SmallVector<Instruction *, 8> Worklist; 265 266 // Return early if the recurrence kind does not match the type of Phi. If the 267 // recurrence kind is arithmetic, we attempt to look through AND operations 268 // resulting from the type promotion performed by InstCombine. Vector 269 // operations are not limited to the legal integer widths, so we may be able 270 // to evaluate the reduction in the narrower width. 271 if (RecurrenceType->isFloatingPointTy()) { 272 if (!isFloatingPointRecurrenceKind(Kind)) 273 return false; 274 } else if (RecurrenceType->isIntegerTy()) { 275 if (!isIntegerRecurrenceKind(Kind)) 276 return false; 277 if (!isMinMaxRecurrenceKind(Kind)) 278 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 279 } else { 280 // Pointer min/max may exist, but it is not supported as a reduction op. 281 return false; 282 } 283 284 Worklist.push_back(Start); 285 VisitedInsts.insert(Start); 286 287 // Start with all flags set because we will intersect this with the reduction 288 // flags from all the reduction operations. 289 FastMathFlags FMF = FastMathFlags::getFast(); 290 291 // The first instruction in the use-def chain of the Phi node that requires 292 // exact floating point operations. 293 Instruction *ExactFPMathInst = nullptr; 294 295 // A value in the reduction can be used: 296 // - By the reduction: 297 // - Reduction operation: 298 // - One use of reduction value (safe). 299 // - Multiple use of reduction value (not safe). 300 // - PHI: 301 // - All uses of the PHI must be the reduction (safe). 302 // - Otherwise, not safe. 303 // - By instructions outside of the loop (safe). 304 // * One value may have several outside users, but all outside 305 // uses must be of the same value. 306 // - By store instructions with a loop invariant address (safe with 307 // the following restrictions): 308 // * If there are several stores, all must have the same address. 309 // * Final value should be stored in that loop invariant address. 310 // - By an instruction that is not part of the reduction (not safe). 311 // This is either: 312 // * An instruction type other than PHI or the reduction operation. 313 // * A PHI in the header other than the initial PHI. 314 while (!Worklist.empty()) { 315 Instruction *Cur = Worklist.pop_back_val(); 316 317 // Store instructions are allowed iff it is the store of the reduction 318 // value to the same loop invariant memory location. 319 if (auto *SI = dyn_cast<StoreInst>(Cur)) { 320 if (!SE) { 321 LLVM_DEBUG(dbgs() << "Store instructions are not processed without " 322 << "Scalar Evolution Analysis\n"); 323 return false; 324 } 325 326 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand()); 327 // Check it is the same address as previous stores 328 if (IntermediateStore) { 329 const SCEV *OtherScev = 330 SE->getSCEV(IntermediateStore->getPointerOperand()); 331 332 if (OtherScev != PtrScev) { 333 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses " 334 << "inside the loop: " << *SI->getPointerOperand() 335 << " and " 336 << *IntermediateStore->getPointerOperand() << '\n'); 337 return false; 338 } 339 } 340 341 // Check the pointer is loop invariant 342 if (!SE->isLoopInvariant(PtrScev, TheLoop)) { 343 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address " 344 << "inside the loop: " << *SI->getPointerOperand() 345 << '\n'); 346 return false; 347 } 348 349 // IntermediateStore is always the last store in the loop. 350 IntermediateStore = SI; 351 continue; 352 } 353 354 // No Users. 355 // If the instruction has no users then this is a broken chain and can't be 356 // a reduction variable. 357 if (Cur->use_empty()) 358 return false; 359 360 bool IsAPhi = isa<PHINode>(Cur); 361 362 // A header PHI use other than the original PHI. 363 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 364 return false; 365 366 // Reductions of instructions such as Div, and Sub is only possible if the 367 // LHS is the reduction variable. 368 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 369 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 370 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 371 return false; 372 373 // Any reduction instruction must be of one of the allowed kinds. We ignore 374 // the starting value (the Phi or an AND instruction if the Phi has been 375 // type-promoted). 376 if (Cur != Start) { 377 ReduxDesc = 378 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE); 379 ExactFPMathInst = ExactFPMathInst == nullptr 380 ? ReduxDesc.getExactFPMathInst() 381 : ExactFPMathInst; 382 if (!ReduxDesc.isRecurrence()) 383 return false; 384 // FIXME: FMF is allowed on phi, but propagation is not handled correctly. 385 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) { 386 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags(); 387 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) { 388 // Accept FMF on either fcmp or select of a min/max idiom. 389 // TODO: This is a hack to work-around the fact that FMF may not be 390 // assigned/propagated correctly. If that problem is fixed or we 391 // standardize on fmin/fmax via intrinsics, this can be removed. 392 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition())) 393 CurFMF |= FCmp->getFastMathFlags(); 394 } 395 FMF &= CurFMF; 396 } 397 // Update this reduction kind if we matched a new instruction. 398 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind' 399 // state accurate while processing the worklist? 400 if (ReduxDesc.getRecKind() != RecurKind::None) 401 Kind = ReduxDesc.getRecKind(); 402 } 403 404 bool IsASelect = isa<SelectInst>(Cur); 405 406 // A conditional reduction operation must only have 2 or less uses in 407 // VisitedInsts. 408 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) && 409 hasMultipleUsesOf(Cur, VisitedInsts, 2)) 410 return false; 411 412 // A reduction operation must only have one use of the reduction value. 413 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && 414 !isAnyOfRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1)) 415 return false; 416 417 // All inputs to a PHI node must be a reduction value. 418 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 419 return false; 420 421 if (isIntMinMaxRecurrenceKind(Kind) && (isa<ICmpInst>(Cur) || IsASelect)) 422 ++NumCmpSelectPatternInst; 423 if (isFPMinMaxRecurrenceKind(Kind) && (isa<FCmpInst>(Cur) || IsASelect)) 424 ++NumCmpSelectPatternInst; 425 if (isAnyOfRecurrenceKind(Kind) && IsASelect) 426 ++NumCmpSelectPatternInst; 427 428 // Check whether we found a reduction operator. 429 FoundReduxOp |= !IsAPhi && Cur != Start; 430 431 // Process users of current instruction. Push non-PHI nodes after PHI nodes 432 // onto the stack. This way we are going to have seen all inputs to PHI 433 // nodes once we get to them. 434 SmallVector<Instruction *, 8> NonPHIs; 435 SmallVector<Instruction *, 8> PHIs; 436 for (User *U : Cur->users()) { 437 Instruction *UI = cast<Instruction>(U); 438 439 // If the user is a call to llvm.fmuladd then the instruction can only be 440 // the final operand. 441 if (isFMulAddIntrinsic(UI)) 442 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1)) 443 return false; 444 445 // Check if we found the exit user. 446 BasicBlock *Parent = UI->getParent(); 447 if (!TheLoop->contains(Parent)) { 448 // If we already know this instruction is used externally, move on to 449 // the next user. 450 if (ExitInstruction == Cur) 451 continue; 452 453 // Exit if you find multiple values used outside or if the header phi 454 // node is being used. In this case the user uses the value of the 455 // previous iteration, in which case we would loose "VF-1" iterations of 456 // the reduction operation if we vectorize. 457 if (ExitInstruction != nullptr || Cur == Phi) 458 return false; 459 460 // The instruction used by an outside user must be the last instruction 461 // before we feed back to the reduction phi. Otherwise, we loose VF-1 462 // operations on the value. 463 if (!is_contained(Phi->operands(), Cur)) 464 return false; 465 466 ExitInstruction = Cur; 467 continue; 468 } 469 470 // Process instructions only once (termination). Each reduction cycle 471 // value must only be used once, except by phi nodes and min/max 472 // reductions which are represented as a cmp followed by a select. 473 InstDesc IgnoredVal(false, nullptr); 474 if (VisitedInsts.insert(UI).second) { 475 if (isa<PHINode>(UI)) { 476 PHIs.push_back(UI); 477 } else { 478 StoreInst *SI = dyn_cast<StoreInst>(UI); 479 if (SI && SI->getPointerOperand() == Cur) { 480 // Reduction variable chain can only be stored somewhere but it 481 // can't be used as an address. 482 return false; 483 } 484 NonPHIs.push_back(UI); 485 } 486 } else if (!isa<PHINode>(UI) && 487 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 488 !isa<SelectInst>(UI)) || 489 (!isConditionalRdxPattern(UI).isRecurrence() && 490 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal) 491 .isRecurrence() && 492 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) 493 return false; 494 495 // Remember that we completed the cycle. 496 if (UI == Phi) 497 FoundStartPHI = true; 498 } 499 Worklist.append(PHIs.begin(), PHIs.end()); 500 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 501 } 502 503 // This means we have seen one but not the other instruction of the 504 // pattern or more than just a select and cmp. Zero implies that we saw a 505 // llvm.min/max intrinsic, which is always OK. 506 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 && 507 NumCmpSelectPatternInst != 0) 508 return false; 509 510 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1) 511 return false; 512 513 if (IntermediateStore) { 514 // Check that stored value goes to the phi node again. This way we make sure 515 // that the value stored in IntermediateStore is indeed the final reduction 516 // value. 517 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) { 518 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: " 519 << *IntermediateStore << '\n'); 520 return false; 521 } 522 523 // If there is an exit instruction it's value should be stored in 524 // IntermediateStore 525 if (ExitInstruction && 526 IntermediateStore->getValueOperand() != ExitInstruction) { 527 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not " 528 "store last calculated value of the reduction: " 529 << *IntermediateStore << '\n'); 530 return false; 531 } 532 533 // If all uses are inside the loop (intermediate stores), then the 534 // reduction value after the loop will be the one used in the last store. 535 if (!ExitInstruction) 536 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand()); 537 } 538 539 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 540 return false; 541 542 const bool IsOrdered = 543 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi); 544 545 if (Start != Phi) { 546 // If the starting value is not the same as the phi node, we speculatively 547 // looked through an 'and' instruction when evaluating a potential 548 // arithmetic reduction to determine if it may have been type-promoted. 549 // 550 // We now compute the minimal bit width that is required to represent the 551 // reduction. If this is the same width that was indicated by the 'and', we 552 // can represent the reduction in the smaller type. The 'and' instruction 553 // will be eliminated since it will essentially be a cast instruction that 554 // can be ignore in the cost model. If we compute a different type than we 555 // did when evaluating the 'and', the 'and' will not be eliminated, and we 556 // will end up with different kinds of operations in the recurrence 557 // expression (e.g., IntegerAND, IntegerADD). We give up if this is 558 // the case. 559 // 560 // The vectorizer relies on InstCombine to perform the actual 561 // type-shrinking. It does this by inserting instructions to truncate the 562 // exit value of the reduction to the width indicated by RecurrenceType and 563 // then extend this value back to the original width. If IsSigned is false, 564 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 565 // used. 566 // 567 // TODO: We should not rely on InstCombine to rewrite the reduction in the 568 // smaller type. We should just generate a correctly typed expression 569 // to begin with. 570 Type *ComputedType; 571 std::tie(ComputedType, IsSigned) = 572 computeRecurrenceType(ExitInstruction, DB, AC, DT); 573 if (ComputedType != RecurrenceType) 574 return false; 575 } 576 577 // Collect cast instructions and the minimum width used by the recurrence. 578 // If the starting value is not the same as the phi node and the computed 579 // recurrence type is equal to the recurrence type, the recurrence expression 580 // will be represented in a narrower or wider type. If there are any cast 581 // instructions that will be unnecessary, collect them in CastsFromRecurTy. 582 // Note that the 'and' instruction was already included in this list. 583 // 584 // TODO: A better way to represent this may be to tag in some way all the 585 // instructions that are a part of the reduction. The vectorizer cost 586 // model could then apply the recurrence type to these instructions, 587 // without needing a white list of instructions to ignore. 588 // This may also be useful for the inloop reductions, if it can be 589 // kept simple enough. 590 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts, 591 MinWidthCastToRecurrenceType); 592 593 // We found a reduction var if we have reached the original phi node and we 594 // only have a single instruction with out-of-loop users. 595 596 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 597 // is saved as part of the RecurrenceDescriptor. 598 599 // Save the description of this reduction variable. 600 RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind, 601 FMF, ExactFPMathInst, RecurrenceType, IsSigned, 602 IsOrdered, CastInsts, MinWidthCastToRecurrenceType); 603 RedDes = RD; 604 605 return true; 606 } 607 608 // We are looking for loops that do something like this: 609 // int r = 0; 610 // for (int i = 0; i < n; i++) { 611 // if (src[i] > 3) 612 // r = 3; 613 // } 614 // where the reduction value (r) only has two states, in this example 0 or 3. 615 // The generated LLVM IR for this type of loop will be like this: 616 // for.body: 617 // %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ] 618 // ... 619 // %cmp = icmp sgt i32 %5, 3 620 // %spec.select = select i1 %cmp, i32 3, i32 %r 621 // ... 622 // In general we can support vectorization of loops where 'r' flips between 623 // any two non-constants, provided they are loop invariant. The only thing 624 // we actually care about at the end of the loop is whether or not any lane 625 // in the selected vector is different from the start value. The final 626 // across-vector reduction after the loop simply involves choosing the start 627 // value if nothing changed (0 in the example above) or the other selected 628 // value (3 in the example above). 629 RecurrenceDescriptor::InstDesc 630 RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, 631 Instruction *I, InstDesc &Prev) { 632 // We must handle the select(cmp(),x,y) as a single instruction. Advance to 633 // the select. 634 if (match(I, m_OneUse(m_Cmp()))) { 635 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 636 return InstDesc(Select, Prev.getRecKind()); 637 } 638 639 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value()))) 640 return InstDesc(false, I); 641 642 SelectInst *SI = cast<SelectInst>(I); 643 Value *NonPhi = nullptr; 644 645 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue())) 646 NonPhi = SI->getFalseValue(); 647 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue())) 648 NonPhi = SI->getTrueValue(); 649 else 650 return InstDesc(false, I); 651 652 // We are looking for selects of the form: 653 // select(cmp(), phi, loop_invariant) or 654 // select(cmp(), loop_invariant, phi) 655 if (!Loop->isLoopInvariant(NonPhi)) 656 return InstDesc(false, I); 657 658 return InstDesc(I, RecurKind::AnyOf); 659 } 660 661 // We are looking for loops that do something like this: 662 // int r = 0; 663 // for (int i = 0; i < n; i++) { 664 // if (src[i] > 3) 665 // r = i; 666 // } 667 // The reduction value (r) is derived from either the values of an induction 668 // variable (i) sequence, or from the start value (0). The LLVM IR generated for 669 // such loops would be as follows: 670 // for.body: 671 // %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ] 672 // %i = phi i32 [ %inc, %for.body ], [ 0, %entry ] 673 // ... 674 // %cmp = icmp sgt i32 %5, 3 675 // %spec.select = select i1 %cmp, i32 %i, i32 %r 676 // %inc = add nsw i32 %i, 1 677 // ... 678 // Since 'i' is an induction variable, the reduction value after the loop will 679 // be the maximum (increasing induction) or minimum (decreasing induction) value 680 // of 'i' that the condition (src[i] > 3) is satisfied, or the start value (0 in 681 // the example above). When the start value of the induction variable 'i' is 682 // greater than the minimum (increasing induction) or maximum (decreasing 683 // induction) value of the data type, we can use the minimum (increasing 684 // induction) or maximum (decreasing induction) value of the data type as a 685 // sentinel value to replace the start value. This allows us to perform a single 686 // reduction max (increasing induction) or min (decreasing induction) operation 687 // to obtain the final reduction result. 688 // TODO: It is possible to solve the case where the start value is the minimum 689 // value of the data type or a non-constant value by using mask and multiple 690 // reduction operations. 691 RecurrenceDescriptor::InstDesc 692 RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop, 693 PHINode *OrigPhi, Instruction *I, 694 ScalarEvolution &SE) { 695 // TODO: Support the vectorization of FindLastIV when the reduction phi is 696 // used by more than one select instruction. This vectorization is only 697 // performed when the SCEV of each increasing induction variable used by the 698 // select instructions is identical. 699 if (!OrigPhi->hasOneUse()) 700 return InstDesc(false, I); 701 702 // We are looking for selects of the form: 703 // select(cmp(), phi, loop_induction) or 704 // select(cmp(), loop_induction, phi) 705 // TODO: Match selects with multi-use cmp conditions. 706 Value *NonRdxPhi = nullptr; 707 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi), 708 m_Specific(OrigPhi)), 709 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi), 710 m_Value(NonRdxPhi))))) 711 return InstDesc(false, I); 712 713 // Returns either FindFirstIV/FindLastIV, if such a pattern is found, or 714 // std::nullopt. 715 auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> { 716 Type *Ty = V->getType(); 717 if (!SE.isSCEVable(Ty)) 718 return std::nullopt; 719 720 auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V)); 721 if (!AR || AR->getLoop() != TheLoop) 722 return std::nullopt; 723 724 const SCEV *Step = AR->getStepRecurrence(SE); 725 if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) || 726 (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step))) 727 return std::nullopt; 728 729 // Check if the minimum (FindLast) or maximum (FindFirst) value of the 730 // recurrence type can be used as a sentinel value. The maximum acceptable 731 // range for the induction variable, called the valid range will exclude 732 // <sentinel value>, where <sentinel value> is 733 // [Signed|Unsigned]Min(<recurrence type>) for FindLastIV or 734 // [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV. 735 // TODO: This range restriction can be lifted by adding an additional 736 // virtual OR reduction. 737 auto CheckRange = [&](bool IsSigned) { 738 const ConstantRange IVRange = 739 IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR); 740 unsigned NumBits = Ty->getIntegerBitWidth(); 741 ConstantRange ValidRange = ConstantRange::getEmpty(NumBits); 742 if (isFindLastIVRecurrenceKind(Kind)) { 743 APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits) 744 : APInt::getMinValue(NumBits); 745 ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel); 746 } else { 747 if (IsSigned) 748 ValidRange = 749 ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits), 750 APInt::getSignedMaxValue(NumBits) - 1); 751 else 752 ValidRange = ConstantRange::getNonEmpty( 753 APInt::getMinValue(NumBits), APInt::getMaxValue(NumBits) - 1); 754 } 755 756 LLVM_DEBUG(dbgs() << "LV: " 757 << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV" 758 : "FindFirstIV") 759 << " valid range is " << ValidRange 760 << ", and the range of " << *AR << " is " << IVRange 761 << "\n"); 762 763 // Ensure the induction variable does not wrap around by verifying that 764 // its range is fully contained within the valid range. 765 return ValidRange.contains(IVRange); 766 }; 767 if (isFindLastIVRecurrenceKind(Kind)) { 768 if (CheckRange(true)) 769 return RecurKind::FindLastIVSMax; 770 if (CheckRange(false)) 771 return RecurKind::FindLastIVUMax; 772 return std::nullopt; 773 } 774 assert(isFindFirstIVRecurrenceKind(Kind) && 775 "Kind must either be a FindLastIV or FindFirstIV"); 776 777 if (CheckRange(true)) 778 return RecurKind::FindFirstIVSMin; 779 if (CheckRange(false)) 780 return RecurKind::FindFirstIVUMin; 781 return std::nullopt; 782 }; 783 784 if (auto RK = GetRecurKind(NonRdxPhi)) 785 return InstDesc(I, *RK); 786 787 return InstDesc(false, I); 788 } 789 790 RecurrenceDescriptor::InstDesc 791 RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, 792 const InstDesc &Prev) { 793 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) && 794 "Expected a cmp or select or call instruction"); 795 if (!isMinMaxRecurrenceKind(Kind)) 796 return InstDesc(false, I); 797 798 // We must handle the select(cmp()) as a single instruction. Advance to the 799 // select. 800 if (match(I, m_OneUse(m_Cmp()))) { 801 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 802 return InstDesc(Select, Prev.getRecKind()); 803 } 804 805 // Only match select with single use cmp condition, or a min/max intrinsic. 806 if (!isa<IntrinsicInst>(I) && 807 !match(I, m_Select(m_OneUse(m_Cmp()), m_Value(), m_Value()))) 808 return InstDesc(false, I); 809 810 // Look for a min/max pattern. 811 if (match(I, m_UMin(m_Value(), m_Value()))) 812 return InstDesc(Kind == RecurKind::UMin, I); 813 if (match(I, m_UMax(m_Value(), m_Value()))) 814 return InstDesc(Kind == RecurKind::UMax, I); 815 if (match(I, m_SMax(m_Value(), m_Value()))) 816 return InstDesc(Kind == RecurKind::SMax, I); 817 if (match(I, m_SMin(m_Value(), m_Value()))) 818 return InstDesc(Kind == RecurKind::SMin, I); 819 if (match(I, m_OrdOrUnordFMin(m_Value(), m_Value()))) 820 return InstDesc(Kind == RecurKind::FMin, I); 821 if (match(I, m_OrdOrUnordFMax(m_Value(), m_Value()))) 822 return InstDesc(Kind == RecurKind::FMax, I); 823 if (match(I, m_FMinNum(m_Value(), m_Value()))) 824 return InstDesc(Kind == RecurKind::FMin, I); 825 if (match(I, m_FMaxNum(m_Value(), m_Value()))) 826 return InstDesc(Kind == RecurKind::FMax, I); 827 if (match(I, m_FMinimumNum(m_Value(), m_Value()))) 828 return InstDesc(Kind == RecurKind::FMinimumNum, I); 829 if (match(I, m_FMaximumNum(m_Value(), m_Value()))) 830 return InstDesc(Kind == RecurKind::FMaximumNum, I); 831 if (match(I, m_FMinimum(m_Value(), m_Value()))) 832 return InstDesc(Kind == RecurKind::FMinimum, I); 833 if (match(I, m_FMaximum(m_Value(), m_Value()))) 834 return InstDesc(Kind == RecurKind::FMaximum, I); 835 836 return InstDesc(false, I); 837 } 838 839 /// Returns true if the select instruction has users in the compare-and-add 840 /// reduction pattern below. The select instruction argument is the last one 841 /// in the sequence. 842 /// 843 /// %sum.1 = phi ... 844 /// ... 845 /// %cmp = fcmp pred %0, %CFP 846 /// %add = fadd %0, %sum.1 847 /// %sum.2 = select %cmp, %add, %sum.1 848 RecurrenceDescriptor::InstDesc 849 RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) { 850 SelectInst *SI = dyn_cast<SelectInst>(I); 851 if (!SI) 852 return InstDesc(false, I); 853 854 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); 855 // Only handle single use cases for now. 856 if (!CI || !CI->hasOneUse()) 857 return InstDesc(false, I); 858 859 Value *TrueVal = SI->getTrueValue(); 860 Value *FalseVal = SI->getFalseValue(); 861 // Handle only when either of operands of select instruction is a PHI 862 // node for now. 863 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) || 864 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal))) 865 return InstDesc(false, I); 866 867 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal) 868 : dyn_cast<Instruction>(TrueVal); 869 if (!I1 || !I1->isBinaryOp()) 870 return InstDesc(false, I); 871 872 Value *Op1, *Op2; 873 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || 874 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && 875 I1->isFast()) || 876 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) || 877 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) || 878 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) || 879 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1)))) 880 return InstDesc(false, I); 881 882 Instruction *IPhi = isa<PHINode>(Op1) ? dyn_cast<Instruction>(Op1) 883 : dyn_cast<Instruction>(Op2); 884 if (!IPhi || IPhi != FalseVal) 885 return InstDesc(false, I); 886 887 return InstDesc(true, SI); 888 } 889 890 RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( 891 Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev, 892 FastMathFlags FuncFMF, ScalarEvolution *SE) { 893 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); 894 switch (I->getOpcode()) { 895 default: 896 return InstDesc(false, I); 897 case Instruction::PHI: 898 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst()); 899 case Instruction::Sub: 900 case Instruction::Add: 901 return InstDesc(Kind == RecurKind::Add, I); 902 case Instruction::Mul: 903 return InstDesc(Kind == RecurKind::Mul, I); 904 case Instruction::And: 905 return InstDesc(Kind == RecurKind::And, I); 906 case Instruction::Or: 907 return InstDesc(Kind == RecurKind::Or, I); 908 case Instruction::Xor: 909 return InstDesc(Kind == RecurKind::Xor, I); 910 case Instruction::FDiv: 911 case Instruction::FMul: 912 return InstDesc(Kind == RecurKind::FMul, I, 913 I->hasAllowReassoc() ? nullptr : I); 914 case Instruction::FSub: 915 case Instruction::FAdd: 916 return InstDesc(Kind == RecurKind::FAdd, I, 917 I->hasAllowReassoc() ? nullptr : I); 918 case Instruction::Select: 919 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul || 920 Kind == RecurKind::Add || Kind == RecurKind::Mul) 921 return isConditionalRdxPattern(I); 922 if (isFindIVRecurrenceKind(Kind) && SE) 923 return isFindIVPattern(Kind, L, OrigPhi, I, *SE); 924 [[fallthrough]]; 925 case Instruction::FCmp: 926 case Instruction::ICmp: 927 case Instruction::Call: 928 if (isAnyOfRecurrenceKind(Kind)) 929 return isAnyOfPattern(L, OrigPhi, I, Prev); 930 auto HasRequiredFMF = [&]() { 931 if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) 932 return true; 933 if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros()) 934 return true; 935 // minimum/minnum and maximum/maxnum intrinsics do not require nsz and nnan 936 // flags since NaN and signed zeroes are propagated in the intrinsic 937 // implementation. 938 return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) || 939 match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())) || 940 match(I, 941 m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) || 942 match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value())); 943 }; 944 if (isIntMinMaxRecurrenceKind(Kind)) 945 return isMinMaxPattern(I, Kind, Prev); 946 if (isFPMinMaxRecurrenceKind(Kind)) { 947 InstDesc Res = isMinMaxPattern(I, Kind, Prev); 948 if (!Res.isRecurrence()) 949 return InstDesc(false, I); 950 if (HasRequiredFMF()) 951 return Res; 952 // We may be able to vectorize FMax/FMin reductions using maxnum/minnum 953 // intrinsics with extra checks ensuring the vector loop handles only 954 // non-NaN inputs. 955 if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) { 956 assert(Kind == RecurKind::FMax && 957 "unexpected recurrence kind for maxnum"); 958 return InstDesc(I, RecurKind::FMaxNum); 959 } 960 if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) { 961 assert(Kind == RecurKind::FMin && 962 "unexpected recurrence kind for minnum"); 963 return InstDesc(I, RecurKind::FMinNum); 964 } 965 return InstDesc(false, I); 966 } 967 if (isFMulAddIntrinsic(I)) 968 return InstDesc(Kind == RecurKind::FMulAdd, I, 969 I->hasAllowReassoc() ? nullptr : I); 970 return InstDesc(false, I); 971 } 972 } 973 974 bool RecurrenceDescriptor::hasMultipleUsesOf( 975 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts, 976 unsigned MaxNumUses) { 977 unsigned NumUses = 0; 978 for (const Use &U : I->operands()) { 979 if (Insts.count(dyn_cast<Instruction>(U))) 980 ++NumUses; 981 if (NumUses > MaxNumUses) 982 return true; 983 } 984 985 return false; 986 } 987 988 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 989 RecurrenceDescriptor &RedDes, 990 DemandedBits *DB, AssumptionCache *AC, 991 DominatorTree *DT, 992 ScalarEvolution *SE) { 993 BasicBlock *Header = TheLoop->getHeader(); 994 Function &F = *Header->getParent(); 995 FastMathFlags FMF; 996 FMF.setNoNaNs( 997 F.getFnAttribute("no-nans-fp-math").getValueAsBool()); 998 FMF.setNoSignedZeros( 999 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool()); 1000 1001 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT, 1002 SE)) { 1003 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 1004 return true; 1005 } 1006 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT, 1007 SE)) { 1008 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 1009 return true; 1010 } 1011 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT, 1012 SE)) { 1013 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 1014 return true; 1015 } 1016 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT, 1017 SE)) { 1018 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 1019 return true; 1020 } 1021 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT, 1022 SE)) { 1023 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 1024 return true; 1025 } 1026 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT, 1027 SE)) { 1028 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); 1029 return true; 1030 } 1031 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT, 1032 SE)) { 1033 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); 1034 return true; 1035 } 1036 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT, 1037 SE)) { 1038 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); 1039 return true; 1040 } 1041 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT, 1042 SE)) { 1043 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); 1044 return true; 1045 } 1046 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT, 1047 SE)) { 1048 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi 1049 << "\n"); 1050 return true; 1051 } 1052 if (AddReductionVar(Phi, RecurKind::FindLastIVSMax, TheLoop, FMF, RedDes, DB, 1053 AC, DT, SE)) { 1054 LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n"); 1055 return true; 1056 } 1057 if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB, 1058 AC, DT, SE)) { 1059 LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n"); 1060 return true; 1061 } 1062 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT, 1063 SE)) { 1064 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 1065 return true; 1066 } 1067 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT, 1068 SE)) { 1069 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 1070 return true; 1071 } 1072 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT, 1073 SE)) { 1074 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); 1075 return true; 1076 } 1077 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT, 1078 SE)) { 1079 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); 1080 return true; 1081 } 1082 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT, 1083 SE)) { 1084 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); 1085 return true; 1086 } 1087 if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT, 1088 SE)) { 1089 LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n"); 1090 return true; 1091 } 1092 if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT, 1093 SE)) { 1094 LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n"); 1095 return true; 1096 } 1097 if (AddReductionVar(Phi, RecurKind::FMaximumNum, TheLoop, FMF, RedDes, DB, AC, 1098 DT, SE)) { 1099 LLVM_DEBUG(dbgs() << "Found a float MAXIMUMNUM reduction PHI." << *Phi 1100 << "\n"); 1101 return true; 1102 } 1103 if (AddReductionVar(Phi, RecurKind::FMinimumNum, TheLoop, FMF, RedDes, DB, AC, 1104 DT, SE)) { 1105 LLVM_DEBUG(dbgs() << "Found a float MINIMUMNUM reduction PHI." << *Phi 1106 << "\n"); 1107 return true; 1108 } 1109 1110 // Not a reduction of known type. 1111 return false; 1112 } 1113 1114 bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, 1115 DominatorTree *DT) { 1116 1117 // Ensure the phi node is in the loop header and has two incoming values. 1118 if (Phi->getParent() != TheLoop->getHeader() || 1119 Phi->getNumIncomingValues() != 2) 1120 return false; 1121 1122 // Ensure the loop has a preheader and a single latch block. The loop 1123 // vectorizer will need the latch to set up the next iteration of the loop. 1124 auto *Preheader = TheLoop->getLoopPreheader(); 1125 auto *Latch = TheLoop->getLoopLatch(); 1126 if (!Preheader || !Latch) 1127 return false; 1128 1129 // Ensure the phi node's incoming blocks are the loop preheader and latch. 1130 if (Phi->getBasicBlockIndex(Preheader) < 0 || 1131 Phi->getBasicBlockIndex(Latch) < 0) 1132 return false; 1133 1134 // Get the previous value. The previous value comes from the latch edge while 1135 // the initial value comes from the preheader edge. 1136 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 1137 1138 // If Previous is a phi in the header, go through incoming values from the 1139 // latch until we find a non-phi value. Use this as the new Previous, all uses 1140 // in the header will be dominated by the original phi, but need to be moved 1141 // after the non-phi previous value. 1142 SmallPtrSet<PHINode *, 4> SeenPhis; 1143 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) { 1144 if (PrevPhi->getParent() != Phi->getParent()) 1145 return false; 1146 if (!SeenPhis.insert(PrevPhi).second) 1147 return false; 1148 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch)); 1149 } 1150 1151 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous)) 1152 return false; 1153 1154 // Ensure every user of the phi node (recursively) is dominated by the 1155 // previous value. The dominance requirement ensures the loop vectorizer will 1156 // not need to vectorize the initial value prior to the first iteration of the 1157 // loop. 1158 // TODO: Consider extending this sinking to handle memory instructions. 1159 1160 SmallPtrSet<Value *, 8> Seen; 1161 BasicBlock *PhiBB = Phi->getParent(); 1162 SmallVector<Instruction *, 8> WorkList; 1163 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) { 1164 // Cyclic dependence. 1165 if (Previous == SinkCandidate) 1166 return false; 1167 1168 if (!Seen.insert(SinkCandidate).second) 1169 return true; 1170 if (DT->dominates(Previous, 1171 SinkCandidate)) // We already are good w/o sinking. 1172 return true; 1173 1174 if (SinkCandidate->getParent() != PhiBB || 1175 SinkCandidate->mayHaveSideEffects() || 1176 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) 1177 return false; 1178 1179 // If we reach a PHI node that is not dominated by Previous, we reached a 1180 // header PHI. No need for sinking. 1181 if (isa<PHINode>(SinkCandidate)) 1182 return true; 1183 1184 // Sink User tentatively and check its users 1185 WorkList.push_back(SinkCandidate); 1186 return true; 1187 }; 1188 1189 WorkList.push_back(Phi); 1190 // Try to recursively sink instructions and their users after Previous. 1191 while (!WorkList.empty()) { 1192 Instruction *Current = WorkList.pop_back_val(); 1193 for (User *User : Current->users()) { 1194 if (!TryToPushSinkCandidate(cast<Instruction>(User))) 1195 return false; 1196 } 1197 } 1198 1199 return true; 1200 } 1201 1202 unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { 1203 switch (Kind) { 1204 case RecurKind::Add: 1205 return Instruction::Add; 1206 case RecurKind::Mul: 1207 return Instruction::Mul; 1208 case RecurKind::AnyOf: 1209 case RecurKind::FindFirstIVSMin: 1210 case RecurKind::FindFirstIVUMin: 1211 case RecurKind::FindLastIVSMax: 1212 case RecurKind::FindLastIVUMax: 1213 case RecurKind::Or: 1214 return Instruction::Or; 1215 case RecurKind::And: 1216 return Instruction::And; 1217 case RecurKind::Xor: 1218 return Instruction::Xor; 1219 case RecurKind::FMul: 1220 return Instruction::FMul; 1221 case RecurKind::FMulAdd: 1222 case RecurKind::FAdd: 1223 return Instruction::FAdd; 1224 case RecurKind::SMax: 1225 case RecurKind::SMin: 1226 case RecurKind::UMax: 1227 case RecurKind::UMin: 1228 return Instruction::ICmp; 1229 case RecurKind::FMax: 1230 case RecurKind::FMin: 1231 case RecurKind::FMaximum: 1232 case RecurKind::FMinimum: 1233 case RecurKind::FMaximumNum: 1234 case RecurKind::FMinimumNum: 1235 return Instruction::FCmp; 1236 default: 1237 llvm_unreachable("Unknown recurrence operation"); 1238 } 1239 } 1240 1241 SmallVector<Instruction *, 4> 1242 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { 1243 SmallVector<Instruction *, 4> ReductionOperations; 1244 const bool IsMinMax = isMinMaxRecurrenceKind(Kind); 1245 1246 // Search down from the Phi to the LoopExitInstr, looking for instructions 1247 // with a single user of the correct type for the reduction. 1248 1249 // Note that we check that the type of the operand is correct for each item in 1250 // the chain, including the last (the loop exit value). This can come up from 1251 // sub, which would otherwise be treated as an add reduction. MinMax also need 1252 // to check for a pair of icmp/select, for which we use getNextInstruction and 1253 // isCorrectOpcode functions to step the right number of instruction, and 1254 // check the icmp/select pair. 1255 // FIXME: We also do not attempt to look through Select's yet, which might 1256 // be part of the reduction chain, or attempt to looks through And's to find a 1257 // smaller bitwidth. Subs are also currently not allowed (which are usually 1258 // treated as part of a add reduction) as they are expected to generally be 1259 // more expensive than out-of-loop reductions, and need to be costed more 1260 // carefully. 1261 unsigned ExpectedUses = 1; 1262 if (IsMinMax) 1263 ExpectedUses = 2; 1264 1265 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * { 1266 for (auto *User : Cur->users()) { 1267 Instruction *UI = cast<Instruction>(User); 1268 if (isa<PHINode>(UI)) 1269 continue; 1270 if (IsMinMax) { 1271 // We are expecting a icmp/select pair, which we go to the next select 1272 // instruction if we can. We already know that Cur has 2 uses. 1273 if (isa<SelectInst>(UI)) 1274 return UI; 1275 continue; 1276 } 1277 return UI; 1278 } 1279 return nullptr; 1280 }; 1281 auto isCorrectOpcode = [&](Instruction *Cur) { 1282 if (IsMinMax) { 1283 Value *LHS, *RHS; 1284 return SelectPatternResult::isMinOrMax( 1285 matchSelectPattern(Cur, LHS, RHS).Flavor); 1286 } 1287 // Recognize a call to the llvm.fmuladd intrinsic. 1288 if (isFMulAddIntrinsic(Cur)) 1289 return true; 1290 1291 return Cur->getOpcode() == getOpcode(); 1292 }; 1293 1294 // Attempt to look through Phis which are part of the reduction chain 1295 unsigned ExtraPhiUses = 0; 1296 Instruction *RdxInstr = LoopExitInstr; 1297 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) { 1298 if (ExitPhi->getNumIncomingValues() != 2) 1299 return {}; 1300 1301 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0)); 1302 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1)); 1303 1304 Instruction *Chain = nullptr; 1305 if (Inc0 == Phi) 1306 Chain = Inc1; 1307 else if (Inc1 == Phi) 1308 Chain = Inc0; 1309 else 1310 return {}; 1311 1312 RdxInstr = Chain; 1313 ExtraPhiUses = 1; 1314 } 1315 1316 // The loop exit instruction we check first (as a quick test) but add last. We 1317 // check the opcode is correct (and dont allow them to be Subs) and that they 1318 // have expected to have the expected number of uses. They will have one use 1319 // from the phi and one from a LCSSA value, no matter the type. 1320 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2)) 1321 return {}; 1322 1323 // Check that the Phi has one (or two for min/max) uses, plus an extra use 1324 // for conditional reductions. 1325 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses)) 1326 return {}; 1327 1328 Instruction *Cur = getNextInstruction(Phi); 1329 1330 // Each other instruction in the chain should have the expected number of uses 1331 // and be the correct opcode. 1332 while (Cur != RdxInstr) { 1333 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) 1334 return {}; 1335 1336 ReductionOperations.push_back(Cur); 1337 Cur = getNextInstruction(Cur); 1338 } 1339 1340 ReductionOperations.push_back(Cur); 1341 return ReductionOperations; 1342 } 1343 1344 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 1345 const SCEV *Step, BinaryOperator *BOp, 1346 SmallVectorImpl<Instruction *> *Casts) 1347 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) { 1348 assert(IK != IK_NoInduction && "Not an induction"); 1349 1350 // Start value type should match the induction kind and the value 1351 // itself should not be null. 1352 assert(StartValue && "StartValue is null"); 1353 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 1354 "StartValue is not a pointer for pointer induction"); 1355 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 1356 "StartValue is not an integer for integer induction"); 1357 1358 // Check the Step Value. It should be non-zero integer value. 1359 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 1360 "Step value is zero"); 1361 1362 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 1363 "StepValue is not an integer"); 1364 1365 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 1366 "StepValue is not FP for FpInduction"); 1367 assert((IK != IK_FpInduction || 1368 (InductionBinOp && 1369 (InductionBinOp->getOpcode() == Instruction::FAdd || 1370 InductionBinOp->getOpcode() == Instruction::FSub))) && 1371 "Binary opcode should be specified for FP induction"); 1372 1373 if (Casts) 1374 llvm::append_range(RedundantCasts, *Casts); 1375 } 1376 1377 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 1378 if (isa<SCEVConstant>(Step)) 1379 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 1380 return nullptr; 1381 } 1382 1383 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 1384 ScalarEvolution *SE, 1385 InductionDescriptor &D) { 1386 1387 // Here we only handle FP induction variables. 1388 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 1389 1390 if (TheLoop->getHeader() != Phi->getParent()) 1391 return false; 1392 1393 // The loop may have multiple entrances or multiple exits; we can analyze 1394 // this phi if it has a unique entry value and a unique backedge value. 1395 if (Phi->getNumIncomingValues() != 2) 1396 return false; 1397 Value *BEValue = nullptr, *StartValue = nullptr; 1398 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 1399 BEValue = Phi->getIncomingValue(0); 1400 StartValue = Phi->getIncomingValue(1); 1401 } else { 1402 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 1403 "Unexpected Phi node in the loop"); 1404 BEValue = Phi->getIncomingValue(1); 1405 StartValue = Phi->getIncomingValue(0); 1406 } 1407 1408 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 1409 if (!BOp) 1410 return false; 1411 1412 Value *Addend = nullptr; 1413 if (BOp->getOpcode() == Instruction::FAdd) { 1414 if (BOp->getOperand(0) == Phi) 1415 Addend = BOp->getOperand(1); 1416 else if (BOp->getOperand(1) == Phi) 1417 Addend = BOp->getOperand(0); 1418 } else if (BOp->getOpcode() == Instruction::FSub) 1419 if (BOp->getOperand(0) == Phi) 1420 Addend = BOp->getOperand(1); 1421 1422 if (!Addend) 1423 return false; 1424 1425 // The addend should be loop invariant 1426 if (auto *I = dyn_cast<Instruction>(Addend)) 1427 if (TheLoop->contains(I)) 1428 return false; 1429 1430 // FP Step has unknown SCEV 1431 const SCEV *Step = SE->getUnknown(Addend); 1432 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 1433 return true; 1434 } 1435 1436 /// This function is called when we suspect that the update-chain of a phi node 1437 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 1438 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 1439 /// predicate P under which the SCEV expression for the phi can be the 1440 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 1441 /// cast instructions that are involved in the update-chain of this induction. 1442 /// A caller that adds the required runtime predicate can be free to drop these 1443 /// cast instructions, and compute the phi using \p AR (instead of some scev 1444 /// expression with casts). 1445 /// 1446 /// For example, without a predicate the scev expression can take the following 1447 /// form: 1448 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 1449 /// 1450 /// It corresponds to the following IR sequence: 1451 /// %for.body: 1452 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 1453 /// %casted_phi = "ExtTrunc i64 %x" 1454 /// %add = add i64 %casted_phi, %step 1455 /// 1456 /// where %x is given in \p PN, 1457 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 1458 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 1459 /// several forms, for example, such as: 1460 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 1461 /// or: 1462 /// ExtTrunc2: %t = shl %x, m 1463 /// %casted_phi = ashr %t, m 1464 /// 1465 /// If we are able to find such sequence, we return the instructions 1466 /// we found, namely %casted_phi and the instructions on its use-def chain up 1467 /// to the phi (not including the phi). 1468 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 1469 const SCEVUnknown *PhiScev, 1470 const SCEVAddRecExpr *AR, 1471 SmallVectorImpl<Instruction *> &CastInsts) { 1472 1473 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 1474 auto *PN = cast<PHINode>(PhiScev->getValue()); 1475 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 1476 const Loop *L = AR->getLoop(); 1477 1478 // Find any cast instructions that participate in the def-use chain of 1479 // PhiScev in the loop. 1480 // FORNOW/TODO: We currently expect the def-use chain to include only 1481 // two-operand instructions, where one of the operands is an invariant. 1482 // createAddRecFromPHIWithCasts() currently does not support anything more 1483 // involved than that, so we keep the search simple. This can be 1484 // extended/generalized as needed. 1485 1486 auto getDef = [&](const Value *Val) -> Value * { 1487 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 1488 if (!BinOp) 1489 return nullptr; 1490 Value *Op0 = BinOp->getOperand(0); 1491 Value *Op1 = BinOp->getOperand(1); 1492 Value *Def = nullptr; 1493 if (L->isLoopInvariant(Op0)) 1494 Def = Op1; 1495 else if (L->isLoopInvariant(Op1)) 1496 Def = Op0; 1497 return Def; 1498 }; 1499 1500 // Look for the instruction that defines the induction via the 1501 // loop backedge. 1502 BasicBlock *Latch = L->getLoopLatch(); 1503 if (!Latch) 1504 return false; 1505 Value *Val = PN->getIncomingValueForBlock(Latch); 1506 if (!Val) 1507 return false; 1508 1509 // Follow the def-use chain until the induction phi is reached. 1510 // If on the way we encounter a Value that has the same SCEV Expr as the 1511 // phi node, we can consider the instructions we visit from that point 1512 // as part of the cast-sequence that can be ignored. 1513 bool InCastSequence = false; 1514 auto *Inst = dyn_cast<Instruction>(Val); 1515 while (Val != PN) { 1516 // If we encountered a phi node other than PN, or if we left the loop, 1517 // we bail out. 1518 if (!Inst || !L->contains(Inst)) { 1519 return false; 1520 } 1521 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1522 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1523 InCastSequence = true; 1524 if (InCastSequence) { 1525 // Only the last instruction in the cast sequence is expected to have 1526 // uses outside the induction def-use chain. 1527 if (!CastInsts.empty()) 1528 if (!Inst->hasOneUse()) 1529 return false; 1530 CastInsts.push_back(Inst); 1531 } 1532 Val = getDef(Val); 1533 if (!Val) 1534 return false; 1535 Inst = dyn_cast<Instruction>(Val); 1536 } 1537 1538 return InCastSequence; 1539 } 1540 1541 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1542 PredicatedScalarEvolution &PSE, 1543 InductionDescriptor &D, bool Assume) { 1544 Type *PhiTy = Phi->getType(); 1545 1546 // Handle integer and pointer inductions variables. 1547 // Now we handle also FP induction but not trying to make a 1548 // recurrent expression from the PHI node in-place. 1549 1550 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && 1551 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1552 return false; 1553 1554 if (PhiTy->isFloatingPointTy()) 1555 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1556 1557 const SCEV *PhiScev = PSE.getSCEV(Phi); 1558 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1559 1560 // We need this expression to be an AddRecExpr. 1561 if (Assume && !AR) 1562 AR = PSE.getAsAddRec(Phi); 1563 1564 if (!AR) { 1565 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1566 return false; 1567 } 1568 1569 // Record any Cast instructions that participate in the induction update 1570 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1571 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1572 // only after enabling Assume with PSCEV, this means we may have encountered 1573 // cast instructions that required adding a runtime check in order to 1574 // guarantee the correctness of the AddRecurrence respresentation of the 1575 // induction. 1576 if (PhiScev != AR && SymbolicPhi) { 1577 SmallVector<Instruction *, 2> Casts; 1578 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1579 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1580 } 1581 1582 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1583 } 1584 1585 bool InductionDescriptor::isInductionPHI( 1586 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1587 InductionDescriptor &D, const SCEV *Expr, 1588 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1589 Type *PhiTy = Phi->getType(); 1590 // isSCEVable returns true for integer and pointer types. 1591 if (!SE->isSCEVable(PhiTy)) 1592 return false; 1593 1594 // Check that the PHI is consecutive. 1595 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1596 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1597 1598 if (!AR) { 1599 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1600 return false; 1601 } 1602 1603 if (AR->getLoop() != TheLoop) { 1604 // FIXME: We should treat this as a uniform. Unfortunately, we 1605 // don't currently know how to handled uniform PHIs. 1606 LLVM_DEBUG( 1607 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1608 return false; 1609 } 1610 1611 // This function assumes that InductionPhi is called only on Phi nodes 1612 // present inside loop headers. Check for the same, and throw an assert if 1613 // the current Phi is not present inside the loop header. 1614 assert(Phi->getParent() == AR->getLoop()->getHeader() 1615 && "Invalid Phi node, not present in loop header"); 1616 1617 Value *StartValue = 1618 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1619 1620 BasicBlock *Latch = AR->getLoop()->getLoopLatch(); 1621 if (!Latch) 1622 return false; 1623 1624 const SCEV *Step = AR->getStepRecurrence(*SE); 1625 // Calculate the pointer stride and check if it is consecutive. 1626 // The stride may be a constant or a loop invariant integer value. 1627 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1628 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1629 return false; 1630 1631 if (PhiTy->isIntegerTy()) { 1632 BinaryOperator *BOp = 1633 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch)); 1634 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp, 1635 CastsToIgnore); 1636 return true; 1637 } 1638 1639 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1640 1641 // This allows induction variables w/non-constant steps. 1642 D = InductionDescriptor(StartValue, IK_PtrInduction, Step); 1643 return true; 1644 } 1645