1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file "describes" induction and recurrence variables. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/IVDescriptors.h" 14 #include "llvm/ADT/ScopeExit.h" 15 #include "llvm/Analysis/BasicAliasAnalysis.h" 16 #include "llvm/Analysis/DemandedBits.h" 17 #include "llvm/Analysis/DomTreeUpdater.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/InstructionSimplify.h" 20 #include "llvm/Analysis/LoopInfo.h" 21 #include "llvm/Analysis/LoopPass.h" 22 #include "llvm/Analysis/MustExecute.h" 23 #include "llvm/Analysis/ScalarEvolution.h" 24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/Analysis/ValueTracking.h" 28 #include "llvm/IR/Dominators.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/PatternMatch.h" 32 #include "llvm/IR/ValueHandle.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/KnownBits.h" 36 37 #include <set> 38 39 using namespace llvm; 40 using namespace llvm::PatternMatch; 41 42 #define DEBUG_TYPE "iv-descriptors" 43 44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I, 45 SmallPtrSetImpl<Instruction *> &Set) { 46 for (const Use &Use : I->operands()) 47 if (!Set.count(dyn_cast<Instruction>(Use))) 48 return false; 49 return true; 50 } 51 52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) { 53 switch (Kind) { 54 default: 55 break; 56 case RecurKind::Add: 57 case RecurKind::Mul: 58 case RecurKind::Or: 59 case RecurKind::And: 60 case RecurKind::Xor: 61 case RecurKind::SMax: 62 case RecurKind::SMin: 63 case RecurKind::UMax: 64 case RecurKind::UMin: 65 case RecurKind::SelectICmp: 66 case RecurKind::SelectFCmp: 67 return true; 68 } 69 return false; 70 } 71 72 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) { 73 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind); 74 } 75 76 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) { 77 switch (Kind) { 78 default: 79 break; 80 case RecurKind::Add: 81 case RecurKind::Mul: 82 case RecurKind::FAdd: 83 case RecurKind::FMul: 84 case RecurKind::FMulAdd: 85 return true; 86 } 87 return false; 88 } 89 90 /// Determines if Phi may have been type-promoted. If Phi has a single user 91 /// that ANDs the Phi with a type mask, return the user. RT is updated to 92 /// account for the narrower bit width represented by the mask, and the AND 93 /// instruction is added to CI. 94 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, 95 SmallPtrSetImpl<Instruction *> &Visited, 96 SmallPtrSetImpl<Instruction *> &CI) { 97 if (!Phi->hasOneUse()) 98 return Phi; 99 100 const APInt *M = nullptr; 101 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser()); 102 103 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT 104 // with a new integer type of the corresponding bit width. 105 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) { 106 int32_t Bits = (*M + 1).exactLogBase2(); 107 if (Bits > 0) { 108 RT = IntegerType::get(Phi->getContext(), Bits); 109 Visited.insert(Phi); 110 CI.insert(J); 111 return J; 112 } 113 } 114 return Phi; 115 } 116 117 /// Compute the minimal bit width needed to represent a reduction whose exit 118 /// instruction is given by Exit. 119 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, 120 DemandedBits *DB, 121 AssumptionCache *AC, 122 DominatorTree *DT) { 123 bool IsSigned = false; 124 const DataLayout &DL = Exit->getModule()->getDataLayout(); 125 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); 126 127 if (DB) { 128 // Use the demanded bits analysis to determine the bits that are live out 129 // of the exit instruction, rounding up to the nearest power of two. If the 130 // use of demanded bits results in a smaller bit width, we know the value 131 // must be positive (i.e., IsSigned = false), because if this were not the 132 // case, the sign bit would have been demanded. 133 auto Mask = DB->getDemandedBits(Exit); 134 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); 135 } 136 137 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { 138 // If demanded bits wasn't able to limit the bit width, we can try to use 139 // value tracking instead. This can be the case, for example, if the value 140 // may be negative. 141 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); 142 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); 143 MaxBitWidth = NumTypeBits - NumSignBits; 144 KnownBits Bits = computeKnownBits(Exit, DL); 145 if (!Bits.isNonNegative()) { 146 // If the value is not known to be non-negative, we set IsSigned to true, 147 // meaning that we will use sext instructions instead of zext 148 // instructions to restore the original type. 149 IsSigned = true; 150 // Make sure at at least one sign bit is included in the result, so it 151 // will get properly sign-extended. 152 ++MaxBitWidth; 153 } 154 } 155 if (!isPowerOf2_64(MaxBitWidth)) 156 MaxBitWidth = NextPowerOf2(MaxBitWidth); 157 158 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), 159 IsSigned); 160 } 161 162 /// Collect cast instructions that can be ignored in the vectorizer's cost 163 /// model, given a reduction exit value and the minimal type in which the 164 // reduction can be represented. Also search casts to the recurrence type 165 // to find the minimum width used by the recurrence. 166 static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, 167 Type *RecurrenceType, 168 SmallPtrSetImpl<Instruction *> &Casts, 169 unsigned &MinWidthCastToRecurTy) { 170 171 SmallVector<Instruction *, 8> Worklist; 172 SmallPtrSet<Instruction *, 8> Visited; 173 Worklist.push_back(Exit); 174 MinWidthCastToRecurTy = -1U; 175 176 while (!Worklist.empty()) { 177 Instruction *Val = Worklist.pop_back_val(); 178 Visited.insert(Val); 179 if (auto *Cast = dyn_cast<CastInst>(Val)) { 180 if (Cast->getSrcTy() == RecurrenceType) { 181 // If the source type of a cast instruction is equal to the recurrence 182 // type, it will be eliminated, and should be ignored in the vectorizer 183 // cost model. 184 Casts.insert(Cast); 185 continue; 186 } 187 if (Cast->getDestTy() == RecurrenceType) { 188 // The minimum width used by the recurrence is found by checking for 189 // casts on its operands. The minimum width is used by the vectorizer 190 // when finding the widest type for in-loop reductions without any 191 // loads/stores. 192 MinWidthCastToRecurTy = std::min<unsigned>( 193 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits()); 194 continue; 195 } 196 } 197 // Add all operands to the work list if they are loop-varying values that 198 // we haven't yet visited. 199 for (Value *O : cast<User>(Val)->operands()) 200 if (auto *I = dyn_cast<Instruction>(O)) 201 if (TheLoop->contains(I) && !Visited.count(I)) 202 Worklist.push_back(I); 203 } 204 } 205 206 // Check if a given Phi node can be recognized as an ordered reduction for 207 // vectorizing floating point operations without unsafe math. 208 static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, 209 Instruction *Exit, PHINode *Phi) { 210 // Currently only FAdd and FMulAdd are supported. 211 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd) 212 return false; 213 214 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd) 215 return false; 216 217 if (Kind == RecurKind::FMulAdd && 218 !RecurrenceDescriptor::isFMulAddIntrinsic(Exit)) 219 return false; 220 221 // Ensure the exit instruction has only one user other than the reduction PHI 222 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3)) 223 return false; 224 225 // The only pattern accepted is the one in which the reduction PHI 226 // is used as one of the operands of the exit instruction 227 auto *Op0 = Exit->getOperand(0); 228 auto *Op1 = Exit->getOperand(1); 229 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi) 230 return false; 231 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi) 232 return false; 233 234 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi 235 << ", ExitInst: " << *Exit << "\n"); 236 237 return true; 238 } 239 240 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind, 241 Loop *TheLoop, FastMathFlags FuncFMF, 242 RecurrenceDescriptor &RedDes, 243 DemandedBits *DB, 244 AssumptionCache *AC, 245 DominatorTree *DT) { 246 if (Phi->getNumIncomingValues() != 2) 247 return false; 248 249 // Reduction variables are only found in the loop header block. 250 if (Phi->getParent() != TheLoop->getHeader()) 251 return false; 252 253 // Obtain the reduction start value from the value that comes from the loop 254 // preheader. 255 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); 256 257 // ExitInstruction is the single value which is used outside the loop. 258 // We only allow for a single reduction value to be used outside the loop. 259 // This includes users of the reduction, variables (which form a cycle 260 // which ends in the phi node). 261 Instruction *ExitInstruction = nullptr; 262 // Indicates that we found a reduction operation in our scan. 263 bool FoundReduxOp = false; 264 265 // We start with the PHI node and scan for all of the users of this 266 // instruction. All users must be instructions that can be used as reduction 267 // variables (such as ADD). We must have a single out-of-block user. The cycle 268 // must include the original PHI. 269 bool FoundStartPHI = false; 270 271 // To recognize min/max patterns formed by a icmp select sequence, we store 272 // the number of instruction we saw from the recognized min/max pattern, 273 // to make sure we only see exactly the two instructions. 274 unsigned NumCmpSelectPatternInst = 0; 275 InstDesc ReduxDesc(false, nullptr); 276 277 // Data used for determining if the recurrence has been type-promoted. 278 Type *RecurrenceType = Phi->getType(); 279 SmallPtrSet<Instruction *, 4> CastInsts; 280 unsigned MinWidthCastToRecurrenceType; 281 Instruction *Start = Phi; 282 bool IsSigned = false; 283 284 SmallPtrSet<Instruction *, 8> VisitedInsts; 285 SmallVector<Instruction *, 8> Worklist; 286 287 // Return early if the recurrence kind does not match the type of Phi. If the 288 // recurrence kind is arithmetic, we attempt to look through AND operations 289 // resulting from the type promotion performed by InstCombine. Vector 290 // operations are not limited to the legal integer widths, so we may be able 291 // to evaluate the reduction in the narrower width. 292 if (RecurrenceType->isFloatingPointTy()) { 293 if (!isFloatingPointRecurrenceKind(Kind)) 294 return false; 295 } else if (RecurrenceType->isIntegerTy()) { 296 if (!isIntegerRecurrenceKind(Kind)) 297 return false; 298 if (!isMinMaxRecurrenceKind(Kind)) 299 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts); 300 } else { 301 // Pointer min/max may exist, but it is not supported as a reduction op. 302 return false; 303 } 304 305 Worklist.push_back(Start); 306 VisitedInsts.insert(Start); 307 308 // Start with all flags set because we will intersect this with the reduction 309 // flags from all the reduction operations. 310 FastMathFlags FMF = FastMathFlags::getFast(); 311 312 // The first instruction in the use-def chain of the Phi node that requires 313 // exact floating point operations. 314 Instruction *ExactFPMathInst = nullptr; 315 316 // A value in the reduction can be used: 317 // - By the reduction: 318 // - Reduction operation: 319 // - One use of reduction value (safe). 320 // - Multiple use of reduction value (not safe). 321 // - PHI: 322 // - All uses of the PHI must be the reduction (safe). 323 // - Otherwise, not safe. 324 // - By instructions outside of the loop (safe). 325 // * One value may have several outside users, but all outside 326 // uses must be of the same value. 327 // - By an instruction that is not part of the reduction (not safe). 328 // This is either: 329 // * An instruction type other than PHI or the reduction operation. 330 // * A PHI in the header other than the initial PHI. 331 while (!Worklist.empty()) { 332 Instruction *Cur = Worklist.pop_back_val(); 333 334 // No Users. 335 // If the instruction has no users then this is a broken chain and can't be 336 // a reduction variable. 337 if (Cur->use_empty()) 338 return false; 339 340 bool IsAPhi = isa<PHINode>(Cur); 341 342 // A header PHI use other than the original PHI. 343 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent()) 344 return false; 345 346 // Reductions of instructions such as Div, and Sub is only possible if the 347 // LHS is the reduction variable. 348 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) && 349 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) && 350 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) 351 return false; 352 353 // Any reduction instruction must be of one of the allowed kinds. We ignore 354 // the starting value (the Phi or an AND instruction if the Phi has been 355 // type-promoted). 356 if (Cur != Start) { 357 ReduxDesc = 358 isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); 359 ExactFPMathInst = ExactFPMathInst == nullptr 360 ? ReduxDesc.getExactFPMathInst() 361 : ExactFPMathInst; 362 if (!ReduxDesc.isRecurrence()) 363 return false; 364 // FIXME: FMF is allowed on phi, but propagation is not handled correctly. 365 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) { 366 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags(); 367 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) { 368 // Accept FMF on either fcmp or select of a min/max idiom. 369 // TODO: This is a hack to work-around the fact that FMF may not be 370 // assigned/propagated correctly. If that problem is fixed or we 371 // standardize on fmin/fmax via intrinsics, this can be removed. 372 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition())) 373 CurFMF |= FCmp->getFastMathFlags(); 374 } 375 FMF &= CurFMF; 376 } 377 // Update this reduction kind if we matched a new instruction. 378 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind' 379 // state accurate while processing the worklist? 380 if (ReduxDesc.getRecKind() != RecurKind::None) 381 Kind = ReduxDesc.getRecKind(); 382 } 383 384 bool IsASelect = isa<SelectInst>(Cur); 385 386 // A conditional reduction operation must only have 2 or less uses in 387 // VisitedInsts. 388 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) && 389 hasMultipleUsesOf(Cur, VisitedInsts, 2)) 390 return false; 391 392 // A reduction operation must only have one use of the reduction value. 393 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && 394 !isSelectCmpRecurrenceKind(Kind) && 395 hasMultipleUsesOf(Cur, VisitedInsts, 1)) 396 return false; 397 398 // All inputs to a PHI node must be a reduction value. 399 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) 400 return false; 401 402 if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) && 403 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur))) 404 ++NumCmpSelectPatternInst; 405 if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) && 406 (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur))) 407 ++NumCmpSelectPatternInst; 408 409 // Check whether we found a reduction operator. 410 FoundReduxOp |= !IsAPhi && Cur != Start; 411 412 // Process users of current instruction. Push non-PHI nodes after PHI nodes 413 // onto the stack. This way we are going to have seen all inputs to PHI 414 // nodes once we get to them. 415 SmallVector<Instruction *, 8> NonPHIs; 416 SmallVector<Instruction *, 8> PHIs; 417 for (User *U : Cur->users()) { 418 Instruction *UI = cast<Instruction>(U); 419 420 // If the user is a call to llvm.fmuladd then the instruction can only be 421 // the final operand. 422 if (isFMulAddIntrinsic(UI)) 423 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1)) 424 return false; 425 426 // Check if we found the exit user. 427 BasicBlock *Parent = UI->getParent(); 428 if (!TheLoop->contains(Parent)) { 429 // If we already know this instruction is used externally, move on to 430 // the next user. 431 if (ExitInstruction == Cur) 432 continue; 433 434 // Exit if you find multiple values used outside or if the header phi 435 // node is being used. In this case the user uses the value of the 436 // previous iteration, in which case we would loose "VF-1" iterations of 437 // the reduction operation if we vectorize. 438 if (ExitInstruction != nullptr || Cur == Phi) 439 return false; 440 441 // The instruction used by an outside user must be the last instruction 442 // before we feed back to the reduction phi. Otherwise, we loose VF-1 443 // operations on the value. 444 if (!is_contained(Phi->operands(), Cur)) 445 return false; 446 447 ExitInstruction = Cur; 448 continue; 449 } 450 451 // Process instructions only once (termination). Each reduction cycle 452 // value must only be used once, except by phi nodes and min/max 453 // reductions which are represented as a cmp followed by a select. 454 InstDesc IgnoredVal(false, nullptr); 455 if (VisitedInsts.insert(UI).second) { 456 if (isa<PHINode>(UI)) 457 PHIs.push_back(UI); 458 else 459 NonPHIs.push_back(UI); 460 } else if (!isa<PHINode>(UI) && 461 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) && 462 !isa<SelectInst>(UI)) || 463 (!isConditionalRdxPattern(Kind, UI).isRecurrence() && 464 !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal) 465 .isRecurrence() && 466 !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) 467 return false; 468 469 // Remember that we completed the cycle. 470 if (UI == Phi) 471 FoundStartPHI = true; 472 } 473 Worklist.append(PHIs.begin(), PHIs.end()); 474 Worklist.append(NonPHIs.begin(), NonPHIs.end()); 475 } 476 477 // This means we have seen one but not the other instruction of the 478 // pattern or more than just a select and cmp. Zero implies that we saw a 479 // llvm.min/max instrinsic, which is always OK. 480 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 && 481 NumCmpSelectPatternInst != 0) 482 return false; 483 484 if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1) 485 return false; 486 487 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) 488 return false; 489 490 const bool IsOrdered = 491 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi); 492 493 if (Start != Phi) { 494 // If the starting value is not the same as the phi node, we speculatively 495 // looked through an 'and' instruction when evaluating a potential 496 // arithmetic reduction to determine if it may have been type-promoted. 497 // 498 // We now compute the minimal bit width that is required to represent the 499 // reduction. If this is the same width that was indicated by the 'and', we 500 // can represent the reduction in the smaller type. The 'and' instruction 501 // will be eliminated since it will essentially be a cast instruction that 502 // can be ignore in the cost model. If we compute a different type than we 503 // did when evaluating the 'and', the 'and' will not be eliminated, and we 504 // will end up with different kinds of operations in the recurrence 505 // expression (e.g., IntegerAND, IntegerADD). We give up if this is 506 // the case. 507 // 508 // The vectorizer relies on InstCombine to perform the actual 509 // type-shrinking. It does this by inserting instructions to truncate the 510 // exit value of the reduction to the width indicated by RecurrenceType and 511 // then extend this value back to the original width. If IsSigned is false, 512 // a 'zext' instruction will be generated; otherwise, a 'sext' will be 513 // used. 514 // 515 // TODO: We should not rely on InstCombine to rewrite the reduction in the 516 // smaller type. We should just generate a correctly typed expression 517 // to begin with. 518 Type *ComputedType; 519 std::tie(ComputedType, IsSigned) = 520 computeRecurrenceType(ExitInstruction, DB, AC, DT); 521 if (ComputedType != RecurrenceType) 522 return false; 523 } 524 525 // Collect cast instructions and the minimum width used by the recurrence. 526 // If the starting value is not the same as the phi node and the computed 527 // recurrence type is equal to the recurrence type, the recurrence expression 528 // will be represented in a narrower or wider type. If there are any cast 529 // instructions that will be unnecessary, collect them in CastsFromRecurTy. 530 // Note that the 'and' instruction was already included in this list. 531 // 532 // TODO: A better way to represent this may be to tag in some way all the 533 // instructions that are a part of the reduction. The vectorizer cost 534 // model could then apply the recurrence type to these instructions, 535 // without needing a white list of instructions to ignore. 536 // This may also be useful for the inloop reductions, if it can be 537 // kept simple enough. 538 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts, 539 MinWidthCastToRecurrenceType); 540 541 // We found a reduction var if we have reached the original phi node and we 542 // only have a single instruction with out-of-loop users. 543 544 // The ExitInstruction(Instruction which is allowed to have out-of-loop users) 545 // is saved as part of the RecurrenceDescriptor. 546 547 // Save the description of this reduction variable. 548 RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst, 549 RecurrenceType, IsSigned, IsOrdered, CastInsts, 550 MinWidthCastToRecurrenceType); 551 RedDes = RD; 552 553 return true; 554 } 555 556 // We are looking for loops that do something like this: 557 // int r = 0; 558 // for (int i = 0; i < n; i++) { 559 // if (src[i] > 3) 560 // r = 3; 561 // } 562 // where the reduction value (r) only has two states, in this example 0 or 3. 563 // The generated LLVM IR for this type of loop will be like this: 564 // for.body: 565 // %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ] 566 // ... 567 // %cmp = icmp sgt i32 %5, 3 568 // %spec.select = select i1 %cmp, i32 3, i32 %r 569 // ... 570 // In general we can support vectorization of loops where 'r' flips between 571 // any two non-constants, provided they are loop invariant. The only thing 572 // we actually care about at the end of the loop is whether or not any lane 573 // in the selected vector is different from the start value. The final 574 // across-vector reduction after the loop simply involves choosing the start 575 // value if nothing changed (0 in the example above) or the other selected 576 // value (3 in the example above). 577 RecurrenceDescriptor::InstDesc 578 RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, 579 Instruction *I, InstDesc &Prev) { 580 // We must handle the select(cmp(),x,y) as a single instruction. Advance to 581 // the select. 582 CmpInst::Predicate Pred; 583 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { 584 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 585 return InstDesc(Select, Prev.getRecKind()); 586 } 587 588 // Only match select with single use cmp condition. 589 if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), 590 m_Value()))) 591 return InstDesc(false, I); 592 593 SelectInst *SI = cast<SelectInst>(I); 594 Value *NonPhi = nullptr; 595 596 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue())) 597 NonPhi = SI->getFalseValue(); 598 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue())) 599 NonPhi = SI->getTrueValue(); 600 else 601 return InstDesc(false, I); 602 603 // We are looking for selects of the form: 604 // select(cmp(), phi, loop_invariant) or 605 // select(cmp(), loop_invariant, phi) 606 if (!Loop->isLoopInvariant(NonPhi)) 607 return InstDesc(false, I); 608 609 return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp 610 : RecurKind::SelectFCmp); 611 } 612 613 RecurrenceDescriptor::InstDesc 614 RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, 615 const InstDesc &Prev) { 616 assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) && 617 "Expected a cmp or select or call instruction"); 618 if (!isMinMaxRecurrenceKind(Kind)) 619 return InstDesc(false, I); 620 621 // We must handle the select(cmp()) as a single instruction. Advance to the 622 // select. 623 CmpInst::Predicate Pred; 624 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { 625 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin())) 626 return InstDesc(Select, Prev.getRecKind()); 627 } 628 629 // Only match select with single use cmp condition, or a min/max intrinsic. 630 if (!isa<IntrinsicInst>(I) && 631 !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), 632 m_Value()))) 633 return InstDesc(false, I); 634 635 // Look for a min/max pattern. 636 if (match(I, m_UMin(m_Value(), m_Value()))) 637 return InstDesc(Kind == RecurKind::UMin, I); 638 if (match(I, m_UMax(m_Value(), m_Value()))) 639 return InstDesc(Kind == RecurKind::UMax, I); 640 if (match(I, m_SMax(m_Value(), m_Value()))) 641 return InstDesc(Kind == RecurKind::SMax, I); 642 if (match(I, m_SMin(m_Value(), m_Value()))) 643 return InstDesc(Kind == RecurKind::SMin, I); 644 if (match(I, m_OrdFMin(m_Value(), m_Value()))) 645 return InstDesc(Kind == RecurKind::FMin, I); 646 if (match(I, m_OrdFMax(m_Value(), m_Value()))) 647 return InstDesc(Kind == RecurKind::FMax, I); 648 if (match(I, m_UnordFMin(m_Value(), m_Value()))) 649 return InstDesc(Kind == RecurKind::FMin, I); 650 if (match(I, m_UnordFMax(m_Value(), m_Value()))) 651 return InstDesc(Kind == RecurKind::FMax, I); 652 if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) 653 return InstDesc(Kind == RecurKind::FMin, I); 654 if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) 655 return InstDesc(Kind == RecurKind::FMax, I); 656 657 return InstDesc(false, I); 658 } 659 660 /// Returns true if the select instruction has users in the compare-and-add 661 /// reduction pattern below. The select instruction argument is the last one 662 /// in the sequence. 663 /// 664 /// %sum.1 = phi ... 665 /// ... 666 /// %cmp = fcmp pred %0, %CFP 667 /// %add = fadd %0, %sum.1 668 /// %sum.2 = select %cmp, %add, %sum.1 669 RecurrenceDescriptor::InstDesc 670 RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) { 671 SelectInst *SI = dyn_cast<SelectInst>(I); 672 if (!SI) 673 return InstDesc(false, I); 674 675 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition()); 676 // Only handle single use cases for now. 677 if (!CI || !CI->hasOneUse()) 678 return InstDesc(false, I); 679 680 Value *TrueVal = SI->getTrueValue(); 681 Value *FalseVal = SI->getFalseValue(); 682 // Handle only when either of operands of select instruction is a PHI 683 // node for now. 684 if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) || 685 (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal))) 686 return InstDesc(false, I); 687 688 Instruction *I1 = 689 isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal) 690 : dyn_cast<Instruction>(TrueVal); 691 if (!I1 || !I1->isBinaryOp()) 692 return InstDesc(false, I); 693 694 Value *Op1, *Op2; 695 if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) || 696 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) && 697 I1->isFast()) 698 return InstDesc(Kind == RecurKind::FAdd, SI); 699 700 if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) 701 return InstDesc(Kind == RecurKind::FMul, SI); 702 703 return InstDesc(false, I); 704 } 705 706 RecurrenceDescriptor::InstDesc 707 RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, 708 Instruction *I, RecurKind Kind, 709 InstDesc &Prev, FastMathFlags FuncFMF) { 710 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); 711 switch (I->getOpcode()) { 712 default: 713 return InstDesc(false, I); 714 case Instruction::PHI: 715 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst()); 716 case Instruction::Sub: 717 case Instruction::Add: 718 return InstDesc(Kind == RecurKind::Add, I); 719 case Instruction::Mul: 720 return InstDesc(Kind == RecurKind::Mul, I); 721 case Instruction::And: 722 return InstDesc(Kind == RecurKind::And, I); 723 case Instruction::Or: 724 return InstDesc(Kind == RecurKind::Or, I); 725 case Instruction::Xor: 726 return InstDesc(Kind == RecurKind::Xor, I); 727 case Instruction::FDiv: 728 case Instruction::FMul: 729 return InstDesc(Kind == RecurKind::FMul, I, 730 I->hasAllowReassoc() ? nullptr : I); 731 case Instruction::FSub: 732 case Instruction::FAdd: 733 return InstDesc(Kind == RecurKind::FAdd, I, 734 I->hasAllowReassoc() ? nullptr : I); 735 case Instruction::Select: 736 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) 737 return isConditionalRdxPattern(Kind, I); 738 LLVM_FALLTHROUGH; 739 case Instruction::FCmp: 740 case Instruction::ICmp: 741 case Instruction::Call: 742 if (isSelectCmpRecurrenceKind(Kind)) 743 return isSelectCmpPattern(L, OrigPhi, I, Prev); 744 if (isIntMinMaxRecurrenceKind(Kind) || 745 (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || 746 (isa<FPMathOperator>(I) && I->hasNoNaNs() && 747 I->hasNoSignedZeros())) && 748 isFPMinMaxRecurrenceKind(Kind))) 749 return isMinMaxPattern(I, Kind, Prev); 750 else if (isFMulAddIntrinsic(I)) 751 return InstDesc(Kind == RecurKind::FMulAdd, I, 752 I->hasAllowReassoc() ? nullptr : I); 753 return InstDesc(false, I); 754 } 755 } 756 757 bool RecurrenceDescriptor::hasMultipleUsesOf( 758 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts, 759 unsigned MaxNumUses) { 760 unsigned NumUses = 0; 761 for (const Use &U : I->operands()) { 762 if (Insts.count(dyn_cast<Instruction>(U))) 763 ++NumUses; 764 if (NumUses > MaxNumUses) 765 return true; 766 } 767 768 return false; 769 } 770 771 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, 772 RecurrenceDescriptor &RedDes, 773 DemandedBits *DB, AssumptionCache *AC, 774 DominatorTree *DT) { 775 BasicBlock *Header = TheLoop->getHeader(); 776 Function &F = *Header->getParent(); 777 FastMathFlags FMF; 778 FMF.setNoNaNs( 779 F.getFnAttribute("no-nans-fp-math").getValueAsBool()); 780 FMF.setNoSignedZeros( 781 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool()); 782 783 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) { 784 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); 785 return true; 786 } 787 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) { 788 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); 789 return true; 790 } 791 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) { 792 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); 793 return true; 794 } 795 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) { 796 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); 797 return true; 798 } 799 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) { 800 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); 801 return true; 802 } 803 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 804 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n"); 805 return true; 806 } 807 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 808 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n"); 809 return true; 810 } 811 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 812 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n"); 813 return true; 814 } 815 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 816 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n"); 817 return true; 818 } 819 if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC, 820 DT)) { 821 LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI." 822 << *Phi << "\n"); 823 return true; 824 } 825 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) { 826 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); 827 return true; 828 } 829 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) { 830 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); 831 return true; 832 } 833 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) { 834 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n"); 835 return true; 836 } 837 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) { 838 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n"); 839 return true; 840 } 841 if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC, 842 DT)) { 843 LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI." 844 << " PHI." << *Phi << "\n"); 845 return true; 846 } 847 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, 848 DT)) { 849 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); 850 return true; 851 } 852 // Not a reduction of known type. 853 return false; 854 } 855 856 bool RecurrenceDescriptor::isFirstOrderRecurrence( 857 PHINode *Phi, Loop *TheLoop, 858 MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) { 859 860 // Ensure the phi node is in the loop header and has two incoming values. 861 if (Phi->getParent() != TheLoop->getHeader() || 862 Phi->getNumIncomingValues() != 2) 863 return false; 864 865 // Ensure the loop has a preheader and a single latch block. The loop 866 // vectorizer will need the latch to set up the next iteration of the loop. 867 auto *Preheader = TheLoop->getLoopPreheader(); 868 auto *Latch = TheLoop->getLoopLatch(); 869 if (!Preheader || !Latch) 870 return false; 871 872 // Ensure the phi node's incoming blocks are the loop preheader and latch. 873 if (Phi->getBasicBlockIndex(Preheader) < 0 || 874 Phi->getBasicBlockIndex(Latch) < 0) 875 return false; 876 877 // Get the previous value. The previous value comes from the latch edge while 878 // the initial value comes form the preheader edge. 879 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch)); 880 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) || 881 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion. 882 return false; 883 884 // Ensure every user of the phi node (recursively) is dominated by the 885 // previous value. The dominance requirement ensures the loop vectorizer will 886 // not need to vectorize the initial value prior to the first iteration of the 887 // loop. 888 // TODO: Consider extending this sinking to handle memory instructions. 889 890 // We optimistically assume we can sink all users after Previous. Keep a set 891 // of instructions to sink after Previous ordered by dominance in the common 892 // basic block. It will be applied to SinkAfter if all users can be sunk. 893 auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) { 894 return A->comesBefore(B); 895 }; 896 std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink( 897 CompareByComesBefore); 898 899 BasicBlock *PhiBB = Phi->getParent(); 900 SmallVector<Instruction *, 8> WorkList; 901 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) { 902 // Already sunk SinkCandidate. 903 if (SinkCandidate->getParent() == PhiBB && 904 InstrsToSink.find(SinkCandidate) != InstrsToSink.end()) 905 return true; 906 907 // Cyclic dependence. 908 if (Previous == SinkCandidate) 909 return false; 910 911 if (DT->dominates(Previous, 912 SinkCandidate)) // We already are good w/o sinking. 913 return true; 914 915 if (SinkCandidate->getParent() != PhiBB || 916 SinkCandidate->mayHaveSideEffects() || 917 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) 918 return false; 919 920 // Do not try to sink an instruction multiple times (if multiple operands 921 // are first order recurrences). 922 // TODO: We can support this case, by sinking the instruction after the 923 // 'deepest' previous instruction. 924 if (SinkAfter.find(SinkCandidate) != SinkAfter.end()) 925 return false; 926 927 // If we reach a PHI node that is not dominated by Previous, we reached a 928 // header PHI. No need for sinking. 929 if (isa<PHINode>(SinkCandidate)) 930 return true; 931 932 // Sink User tentatively and check its users 933 InstrsToSink.insert(SinkCandidate); 934 WorkList.push_back(SinkCandidate); 935 return true; 936 }; 937 938 WorkList.push_back(Phi); 939 // Try to recursively sink instructions and their users after Previous. 940 while (!WorkList.empty()) { 941 Instruction *Current = WorkList.pop_back_val(); 942 for (User *User : Current->users()) { 943 if (!TryToPushSinkCandidate(cast<Instruction>(User))) 944 return false; 945 } 946 } 947 948 // We can sink all users of Phi. Update the mapping. 949 for (Instruction *I : InstrsToSink) { 950 SinkAfter[I] = Previous; 951 Previous = I; 952 } 953 return true; 954 } 955 956 /// This function returns the identity element (or neutral element) for 957 /// the operation K. 958 Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, 959 FastMathFlags FMF) const { 960 switch (K) { 961 case RecurKind::Xor: 962 case RecurKind::Add: 963 case RecurKind::Or: 964 // Adding, Xoring, Oring zero to a number does not change it. 965 return ConstantInt::get(Tp, 0); 966 case RecurKind::Mul: 967 // Multiplying a number by 1 does not change it. 968 return ConstantInt::get(Tp, 1); 969 case RecurKind::And: 970 // AND-ing a number with an all-1 value does not change it. 971 return ConstantInt::get(Tp, -1, true); 972 case RecurKind::FMul: 973 // Multiplying a number by 1 does not change it. 974 return ConstantFP::get(Tp, 1.0L); 975 case RecurKind::FMulAdd: 976 case RecurKind::FAdd: 977 // Adding zero to a number does not change it. 978 // FIXME: Ideally we should not need to check FMF for FAdd and should always 979 // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0. 980 // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI 981 // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would 982 // mean we can then remove the check for noSignedZeros() below (see D98963). 983 if (FMF.noSignedZeros()) 984 return ConstantFP::get(Tp, 0.0L); 985 return ConstantFP::get(Tp, -0.0L); 986 case RecurKind::UMin: 987 return ConstantInt::get(Tp, -1); 988 case RecurKind::UMax: 989 return ConstantInt::get(Tp, 0); 990 case RecurKind::SMin: 991 return ConstantInt::get(Tp, 992 APInt::getSignedMaxValue(Tp->getIntegerBitWidth())); 993 case RecurKind::SMax: 994 return ConstantInt::get(Tp, 995 APInt::getSignedMinValue(Tp->getIntegerBitWidth())); 996 case RecurKind::FMin: 997 return ConstantFP::getInfinity(Tp, true); 998 case RecurKind::FMax: 999 return ConstantFP::getInfinity(Tp, false); 1000 case RecurKind::SelectICmp: 1001 case RecurKind::SelectFCmp: 1002 return getRecurrenceStartValue(); 1003 break; 1004 default: 1005 llvm_unreachable("Unknown recurrence kind"); 1006 } 1007 } 1008 1009 unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { 1010 switch (Kind) { 1011 case RecurKind::Add: 1012 return Instruction::Add; 1013 case RecurKind::Mul: 1014 return Instruction::Mul; 1015 case RecurKind::Or: 1016 return Instruction::Or; 1017 case RecurKind::And: 1018 return Instruction::And; 1019 case RecurKind::Xor: 1020 return Instruction::Xor; 1021 case RecurKind::FMul: 1022 return Instruction::FMul; 1023 case RecurKind::FMulAdd: 1024 case RecurKind::FAdd: 1025 return Instruction::FAdd; 1026 case RecurKind::SMax: 1027 case RecurKind::SMin: 1028 case RecurKind::UMax: 1029 case RecurKind::UMin: 1030 case RecurKind::SelectICmp: 1031 return Instruction::ICmp; 1032 case RecurKind::FMax: 1033 case RecurKind::FMin: 1034 case RecurKind::SelectFCmp: 1035 return Instruction::FCmp; 1036 default: 1037 llvm_unreachable("Unknown recurrence operation"); 1038 } 1039 } 1040 1041 SmallVector<Instruction *, 4> 1042 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { 1043 SmallVector<Instruction *, 4> ReductionOperations; 1044 unsigned RedOp = getOpcode(Kind); 1045 1046 // Search down from the Phi to the LoopExitInstr, looking for instructions 1047 // with a single user of the correct type for the reduction. 1048 1049 // Note that we check that the type of the operand is correct for each item in 1050 // the chain, including the last (the loop exit value). This can come up from 1051 // sub, which would otherwise be treated as an add reduction. MinMax also need 1052 // to check for a pair of icmp/select, for which we use getNextInstruction and 1053 // isCorrectOpcode functions to step the right number of instruction, and 1054 // check the icmp/select pair. 1055 // FIXME: We also do not attempt to look through Phi/Select's yet, which might 1056 // be part of the reduction chain, or attempt to looks through And's to find a 1057 // smaller bitwidth. Subs are also currently not allowed (which are usually 1058 // treated as part of a add reduction) as they are expected to generally be 1059 // more expensive than out-of-loop reductions, and need to be costed more 1060 // carefully. 1061 unsigned ExpectedUses = 1; 1062 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) 1063 ExpectedUses = 2; 1064 1065 auto getNextInstruction = [&](Instruction *Cur) { 1066 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { 1067 // We are expecting a icmp/select pair, which we go to the next select 1068 // instruction if we can. We already know that Cur has 2 uses. 1069 if (isa<SelectInst>(*Cur->user_begin())) 1070 return cast<Instruction>(*Cur->user_begin()); 1071 else 1072 return cast<Instruction>(*std::next(Cur->user_begin())); 1073 } 1074 return cast<Instruction>(*Cur->user_begin()); 1075 }; 1076 auto isCorrectOpcode = [&](Instruction *Cur) { 1077 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { 1078 Value *LHS, *RHS; 1079 return SelectPatternResult::isMinOrMax( 1080 matchSelectPattern(Cur, LHS, RHS).Flavor); 1081 } 1082 // Recognize a call to the llvm.fmuladd intrinsic. 1083 if (isFMulAddIntrinsic(Cur)) 1084 return true; 1085 1086 return Cur->getOpcode() == RedOp; 1087 }; 1088 1089 // The loop exit instruction we check first (as a quick test) but add last. We 1090 // check the opcode is correct (and dont allow them to be Subs) and that they 1091 // have expected to have the expected number of uses. They will have one use 1092 // from the phi and one from a LCSSA value, no matter the type. 1093 if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) 1094 return {}; 1095 1096 // Check that the Phi has one (or two for min/max) uses. 1097 if (!Phi->hasNUses(ExpectedUses)) 1098 return {}; 1099 Instruction *Cur = getNextInstruction(Phi); 1100 1101 // Each other instruction in the chain should have the expected number of uses 1102 // and be the correct opcode. 1103 while (Cur != LoopExitInstr) { 1104 if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) 1105 return {}; 1106 1107 ReductionOperations.push_back(Cur); 1108 Cur = getNextInstruction(Cur); 1109 } 1110 1111 ReductionOperations.push_back(Cur); 1112 return ReductionOperations; 1113 } 1114 1115 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, 1116 const SCEV *Step, BinaryOperator *BOp, 1117 Type *ElementType, 1118 SmallVectorImpl<Instruction *> *Casts) 1119 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp), 1120 ElementType(ElementType) { 1121 assert(IK != IK_NoInduction && "Not an induction"); 1122 1123 // Start value type should match the induction kind and the value 1124 // itself should not be null. 1125 assert(StartValue && "StartValue is null"); 1126 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) && 1127 "StartValue is not a pointer for pointer induction"); 1128 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) && 1129 "StartValue is not an integer for integer induction"); 1130 1131 // Check the Step Value. It should be non-zero integer value. 1132 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) && 1133 "Step value is zero"); 1134 1135 assert((IK != IK_PtrInduction || getConstIntStepValue()) && 1136 "Step value should be constant for pointer induction"); 1137 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) && 1138 "StepValue is not an integer"); 1139 1140 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) && 1141 "StepValue is not FP for FpInduction"); 1142 assert((IK != IK_FpInduction || 1143 (InductionBinOp && 1144 (InductionBinOp->getOpcode() == Instruction::FAdd || 1145 InductionBinOp->getOpcode() == Instruction::FSub))) && 1146 "Binary opcode should be specified for FP induction"); 1147 1148 if (IK == IK_PtrInduction) 1149 assert(ElementType && "Pointer induction must have element type"); 1150 else 1151 assert(!ElementType && "Non-pointer induction cannot have element type"); 1152 1153 if (Casts) { 1154 for (auto &Inst : *Casts) { 1155 RedundantCasts.push_back(Inst); 1156 } 1157 } 1158 } 1159 1160 ConstantInt *InductionDescriptor::getConstIntStepValue() const { 1161 if (isa<SCEVConstant>(Step)) 1162 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue()); 1163 return nullptr; 1164 } 1165 1166 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop, 1167 ScalarEvolution *SE, 1168 InductionDescriptor &D) { 1169 1170 // Here we only handle FP induction variables. 1171 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type"); 1172 1173 if (TheLoop->getHeader() != Phi->getParent()) 1174 return false; 1175 1176 // The loop may have multiple entrances or multiple exits; we can analyze 1177 // this phi if it has a unique entry value and a unique backedge value. 1178 if (Phi->getNumIncomingValues() != 2) 1179 return false; 1180 Value *BEValue = nullptr, *StartValue = nullptr; 1181 if (TheLoop->contains(Phi->getIncomingBlock(0))) { 1182 BEValue = Phi->getIncomingValue(0); 1183 StartValue = Phi->getIncomingValue(1); 1184 } else { 1185 assert(TheLoop->contains(Phi->getIncomingBlock(1)) && 1186 "Unexpected Phi node in the loop"); 1187 BEValue = Phi->getIncomingValue(1); 1188 StartValue = Phi->getIncomingValue(0); 1189 } 1190 1191 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue); 1192 if (!BOp) 1193 return false; 1194 1195 Value *Addend = nullptr; 1196 if (BOp->getOpcode() == Instruction::FAdd) { 1197 if (BOp->getOperand(0) == Phi) 1198 Addend = BOp->getOperand(1); 1199 else if (BOp->getOperand(1) == Phi) 1200 Addend = BOp->getOperand(0); 1201 } else if (BOp->getOpcode() == Instruction::FSub) 1202 if (BOp->getOperand(0) == Phi) 1203 Addend = BOp->getOperand(1); 1204 1205 if (!Addend) 1206 return false; 1207 1208 // The addend should be loop invariant 1209 if (auto *I = dyn_cast<Instruction>(Addend)) 1210 if (TheLoop->contains(I)) 1211 return false; 1212 1213 // FP Step has unknown SCEV 1214 const SCEV *Step = SE->getUnknown(Addend); 1215 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp); 1216 return true; 1217 } 1218 1219 /// This function is called when we suspect that the update-chain of a phi node 1220 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, 1221 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime 1222 /// predicate P under which the SCEV expression for the phi can be the 1223 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the 1224 /// cast instructions that are involved in the update-chain of this induction. 1225 /// A caller that adds the required runtime predicate can be free to drop these 1226 /// cast instructions, and compute the phi using \p AR (instead of some scev 1227 /// expression with casts). 1228 /// 1229 /// For example, without a predicate the scev expression can take the following 1230 /// form: 1231 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy) 1232 /// 1233 /// It corresponds to the following IR sequence: 1234 /// %for.body: 1235 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ] 1236 /// %casted_phi = "ExtTrunc i64 %x" 1237 /// %add = add i64 %casted_phi, %step 1238 /// 1239 /// where %x is given in \p PN, 1240 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate, 1241 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of 1242 /// several forms, for example, such as: 1243 /// ExtTrunc1: %casted_phi = and %x, 2^n-1 1244 /// or: 1245 /// ExtTrunc2: %t = shl %x, m 1246 /// %casted_phi = ashr %t, m 1247 /// 1248 /// If we are able to find such sequence, we return the instructions 1249 /// we found, namely %casted_phi and the instructions on its use-def chain up 1250 /// to the phi (not including the phi). 1251 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, 1252 const SCEVUnknown *PhiScev, 1253 const SCEVAddRecExpr *AR, 1254 SmallVectorImpl<Instruction *> &CastInsts) { 1255 1256 assert(CastInsts.empty() && "CastInsts is expected to be empty."); 1257 auto *PN = cast<PHINode>(PhiScev->getValue()); 1258 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression"); 1259 const Loop *L = AR->getLoop(); 1260 1261 // Find any cast instructions that participate in the def-use chain of 1262 // PhiScev in the loop. 1263 // FORNOW/TODO: We currently expect the def-use chain to include only 1264 // two-operand instructions, where one of the operands is an invariant. 1265 // createAddRecFromPHIWithCasts() currently does not support anything more 1266 // involved than that, so we keep the search simple. This can be 1267 // extended/generalized as needed. 1268 1269 auto getDef = [&](const Value *Val) -> Value * { 1270 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val); 1271 if (!BinOp) 1272 return nullptr; 1273 Value *Op0 = BinOp->getOperand(0); 1274 Value *Op1 = BinOp->getOperand(1); 1275 Value *Def = nullptr; 1276 if (L->isLoopInvariant(Op0)) 1277 Def = Op1; 1278 else if (L->isLoopInvariant(Op1)) 1279 Def = Op0; 1280 return Def; 1281 }; 1282 1283 // Look for the instruction that defines the induction via the 1284 // loop backedge. 1285 BasicBlock *Latch = L->getLoopLatch(); 1286 if (!Latch) 1287 return false; 1288 Value *Val = PN->getIncomingValueForBlock(Latch); 1289 if (!Val) 1290 return false; 1291 1292 // Follow the def-use chain until the induction phi is reached. 1293 // If on the way we encounter a Value that has the same SCEV Expr as the 1294 // phi node, we can consider the instructions we visit from that point 1295 // as part of the cast-sequence that can be ignored. 1296 bool InCastSequence = false; 1297 auto *Inst = dyn_cast<Instruction>(Val); 1298 while (Val != PN) { 1299 // If we encountered a phi node other than PN, or if we left the loop, 1300 // we bail out. 1301 if (!Inst || !L->contains(Inst)) { 1302 return false; 1303 } 1304 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val)); 1305 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR)) 1306 InCastSequence = true; 1307 if (InCastSequence) { 1308 // Only the last instruction in the cast sequence is expected to have 1309 // uses outside the induction def-use chain. 1310 if (!CastInsts.empty()) 1311 if (!Inst->hasOneUse()) 1312 return false; 1313 CastInsts.push_back(Inst); 1314 } 1315 Val = getDef(Val); 1316 if (!Val) 1317 return false; 1318 Inst = dyn_cast<Instruction>(Val); 1319 } 1320 1321 return InCastSequence; 1322 } 1323 1324 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop, 1325 PredicatedScalarEvolution &PSE, 1326 InductionDescriptor &D, bool Assume) { 1327 Type *PhiTy = Phi->getType(); 1328 1329 // Handle integer and pointer inductions variables. 1330 // Now we handle also FP induction but not trying to make a 1331 // recurrent expression from the PHI node in-place. 1332 1333 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() && 1334 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy()) 1335 return false; 1336 1337 if (PhiTy->isFloatingPointTy()) 1338 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D); 1339 1340 const SCEV *PhiScev = PSE.getSCEV(Phi); 1341 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1342 1343 // We need this expression to be an AddRecExpr. 1344 if (Assume && !AR) 1345 AR = PSE.getAsAddRec(Phi); 1346 1347 if (!AR) { 1348 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1349 return false; 1350 } 1351 1352 // Record any Cast instructions that participate in the induction update 1353 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev); 1354 // If we started from an UnknownSCEV, and managed to build an addRecurrence 1355 // only after enabling Assume with PSCEV, this means we may have encountered 1356 // cast instructions that required adding a runtime check in order to 1357 // guarantee the correctness of the AddRecurrence respresentation of the 1358 // induction. 1359 if (PhiScev != AR && SymbolicPhi) { 1360 SmallVector<Instruction *, 2> Casts; 1361 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts)) 1362 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts); 1363 } 1364 1365 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR); 1366 } 1367 1368 bool InductionDescriptor::isInductionPHI( 1369 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE, 1370 InductionDescriptor &D, const SCEV *Expr, 1371 SmallVectorImpl<Instruction *> *CastsToIgnore) { 1372 Type *PhiTy = Phi->getType(); 1373 // We only handle integer and pointer inductions variables. 1374 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) 1375 return false; 1376 1377 // Check that the PHI is consecutive. 1378 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi); 1379 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); 1380 1381 if (!AR) { 1382 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); 1383 return false; 1384 } 1385 1386 if (AR->getLoop() != TheLoop) { 1387 // FIXME: We should treat this as a uniform. Unfortunately, we 1388 // don't currently know how to handled uniform PHIs. 1389 LLVM_DEBUG( 1390 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); 1391 return false; 1392 } 1393 1394 Value *StartValue = 1395 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); 1396 1397 BasicBlock *Latch = AR->getLoop()->getLoopLatch(); 1398 if (!Latch) 1399 return false; 1400 1401 const SCEV *Step = AR->getStepRecurrence(*SE); 1402 // Calculate the pointer stride and check if it is consecutive. 1403 // The stride may be a constant or a loop invariant integer value. 1404 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); 1405 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop)) 1406 return false; 1407 1408 if (PhiTy->isIntegerTy()) { 1409 BinaryOperator *BOp = 1410 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch)); 1411 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp, 1412 /* ElementType */ nullptr, CastsToIgnore); 1413 return true; 1414 } 1415 1416 assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); 1417 // Pointer induction should be a constant. 1418 if (!ConstStep) 1419 return false; 1420 1421 // Always use i8 element type for opaque pointer inductions. 1422 PointerType *PtrTy = cast<PointerType>(PhiTy); 1423 Type *ElementType = PtrTy->isOpaque() 1424 ? Type::getInt8Ty(PtrTy->getContext()) 1425 : PtrTy->getNonOpaquePointerElementType(); 1426 if (!ElementType->isSized()) 1427 return false; 1428 1429 ConstantInt *CV = ConstStep->getValue(); 1430 const DataLayout &DL = Phi->getModule()->getDataLayout(); 1431 TypeSize TySize = DL.getTypeAllocSize(ElementType); 1432 // TODO: We could potentially support this for scalable vectors if we can 1433 // prove at compile time that the constant step is always a multiple of 1434 // the scalable type. 1435 if (TySize.isZero() || TySize.isScalable()) 1436 return false; 1437 1438 int64_t Size = static_cast<int64_t>(TySize.getFixedSize()); 1439 int64_t CVSize = CV->getSExtValue(); 1440 if (CVSize % Size) 1441 return false; 1442 auto *StepValue = 1443 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */); 1444 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, 1445 /* BinOp */ nullptr, ElementType); 1446 return true; 1447 } 1448