1 //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "VPlanAnalysis.h" 10 #include "VPlan.h" 11 #include "VPlanCFG.h" 12 #include "VPlanDominatorTree.h" 13 #include "llvm/ADT/PostOrderIterator.h" 14 #include "llvm/ADT/TypeSwitch.h" 15 #include "llvm/Analysis/ScalarEvolution.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/IR/Instruction.h" 18 #include "llvm/IR/PatternMatch.h" 19 20 using namespace llvm; 21 22 #define DEBUG_TYPE "vplan" 23 24 VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) 25 : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) { 26 if (auto LoopRegion = Plan.getVectorLoopRegion()) { 27 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>( 28 &LoopRegion->getEntryBasicBlock()->front())) { 29 CanonicalIVTy = CanIV->getScalarType(); 30 return; 31 } 32 } 33 34 // If there's no canonical IV, retrieve the type from the trip count 35 // expression. 36 auto *TC = Plan.getTripCount(); 37 if (TC->isLiveIn()) { 38 CanonicalIVTy = TC->getLiveInIRValue()->getType(); 39 return; 40 } 41 CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType(); 42 } 43 44 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { 45 Type *ResTy = inferScalarType(R->getIncomingValue(0)); 46 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { 47 VPValue *Inc = R->getIncomingValue(I); 48 assert(inferScalarType(Inc) == ResTy && 49 "different types inferred for different incoming values"); 50 CachedTypes[Inc] = ResTy; 51 } 52 return ResTy; 53 } 54 55 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { 56 // Set the result type from the first operand, check if the types for all 57 // other operands match and cache them. 58 auto SetResultTyFromOp = [this, R]() { 59 Type *ResTy = inferScalarType(R->getOperand(0)); 60 for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { 61 VPValue *OtherV = R->getOperand(Op); 62 assert(inferScalarType(OtherV) == ResTy && 63 "different types inferred for different operands"); 64 CachedTypes[OtherV] = ResTy; 65 } 66 return ResTy; 67 }; 68 69 unsigned Opcode = R->getOpcode(); 70 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) 71 return SetResultTyFromOp(); 72 73 switch (Opcode) { 74 case Instruction::ExtractElement: 75 case Instruction::Freeze: 76 case VPInstruction::ReductionStartVector: 77 return inferScalarType(R->getOperand(0)); 78 case Instruction::Select: { 79 Type *ResTy = inferScalarType(R->getOperand(1)); 80 VPValue *OtherV = R->getOperand(2); 81 assert(inferScalarType(OtherV) == ResTy && 82 "different types inferred for different operands"); 83 CachedTypes[OtherV] = ResTy; 84 return ResTy; 85 } 86 case Instruction::ICmp: 87 case VPInstruction::ActiveLaneMask: 88 assert(inferScalarType(R->getOperand(0)) == 89 inferScalarType(R->getOperand(1)) && 90 "different types inferred for different operands"); 91 return IntegerType::get(Ctx, 1); 92 case VPInstruction::ComputeAnyOfResult: 93 return inferScalarType(R->getOperand(1)); 94 case VPInstruction::ComputeFindIVResult: 95 case VPInstruction::ComputeReductionResult: { 96 return inferScalarType(R->getOperand(0)); 97 } 98 case VPInstruction::ExplicitVectorLength: 99 return Type::getIntNTy(Ctx, 32); 100 case Instruction::PHI: 101 // Infer the type of first operand only, as other operands of header phi's 102 // may lead to infinite recursion. 103 return inferScalarType(R->getOperand(0)); 104 case VPInstruction::FirstOrderRecurrenceSplice: 105 case VPInstruction::Not: 106 case VPInstruction::CalculateTripCountMinusVF: 107 case VPInstruction::CanonicalIVIncrementForPart: 108 case VPInstruction::AnyOf: 109 case VPInstruction::BuildStructVector: 110 case VPInstruction::BuildVector: 111 return SetResultTyFromOp(); 112 case VPInstruction::FirstActiveLane: 113 return Type::getIntNTy(Ctx, 64); 114 case VPInstruction::ExtractLastElement: 115 case VPInstruction::ExtractPenultimateElement: { 116 Type *BaseTy = inferScalarType(R->getOperand(0)); 117 if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) 118 return VecTy->getElementType(); 119 return BaseTy; 120 } 121 case VPInstruction::LogicalAnd: 122 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && 123 inferScalarType(R->getOperand(1))->isIntegerTy(1) && 124 "LogicalAnd operands should be bool"); 125 return IntegerType::get(Ctx, 1); 126 case VPInstruction::Broadcast: 127 case VPInstruction::PtrAdd: 128 // Return the type based on first operand. 129 return inferScalarType(R->getOperand(0)); 130 case VPInstruction::BranchOnCond: 131 case VPInstruction::BranchOnCount: 132 return Type::getVoidTy(Ctx); 133 default: 134 break; 135 } 136 // Type inference not implemented for opcode. 137 LLVM_DEBUG({ 138 dbgs() << "LV: Found unhandled opcode for: "; 139 R->getVPSingleValue()->dump(); 140 }); 141 llvm_unreachable("Unhandled opcode!"); 142 } 143 144 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { 145 unsigned Opcode = R->getOpcode(); 146 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || 147 Instruction::isBitwiseLogicOp(Opcode)) { 148 Type *ResTy = inferScalarType(R->getOperand(0)); 149 assert(ResTy == inferScalarType(R->getOperand(1)) && 150 "types for both operands must match for binary op"); 151 CachedTypes[R->getOperand(1)] = ResTy; 152 return ResTy; 153 } 154 155 switch (Opcode) { 156 case Instruction::ICmp: 157 case Instruction::FCmp: 158 return IntegerType::get(Ctx, 1); 159 case Instruction::FNeg: 160 case Instruction::Freeze: 161 return inferScalarType(R->getOperand(0)); 162 case Instruction::ExtractValue: { 163 assert(R->getNumOperands() == 2 && "expected single level extractvalue"); 164 auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0))); 165 auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue()); 166 return StructTy->getTypeAtIndex(CI->getZExtValue()); 167 } 168 default: 169 break; 170 } 171 172 // Type inference not implemented for opcode. 173 LLVM_DEBUG({ 174 dbgs() << "LV: Found unhandled opcode for: "; 175 R->getVPSingleValue()->dump(); 176 }); 177 llvm_unreachable("Unhandled opcode!"); 178 } 179 180 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { 181 auto &CI = *cast<CallInst>(R->getUnderlyingInstr()); 182 return CI.getType(); 183 } 184 185 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { 186 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) && 187 "Store recipes should not define any values"); 188 return cast<LoadInst>(&R->getIngredient())->getType(); 189 } 190 191 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { 192 Type *ResTy = inferScalarType(R->getOperand(1)); 193 VPValue *OtherV = R->getOperand(2); 194 assert(inferScalarType(OtherV) == ResTy && 195 "different types inferred for different operands"); 196 CachedTypes[OtherV] = ResTy; 197 return ResTy; 198 } 199 200 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { 201 unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); 202 203 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || 204 Instruction::isBitwiseLogicOp(Opcode)) { 205 Type *ResTy = inferScalarType(R->getOperand(0)); 206 assert(ResTy == inferScalarType(R->getOperand(1)) && 207 "inferred types for operands of binary op don't match"); 208 CachedTypes[R->getOperand(1)] = ResTy; 209 return ResTy; 210 } 211 212 if (Instruction::isCast(Opcode)) 213 return R->getUnderlyingInstr()->getType(); 214 215 switch (Opcode) { 216 case Instruction::Call: { 217 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); 218 return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue()) 219 ->getReturnType(); 220 } 221 case Instruction::Select: { 222 Type *ResTy = inferScalarType(R->getOperand(1)); 223 assert(ResTy == inferScalarType(R->getOperand(2)) && 224 "inferred types for operands of select op don't match"); 225 CachedTypes[R->getOperand(2)] = ResTy; 226 return ResTy; 227 } 228 case Instruction::ICmp: 229 case Instruction::FCmp: 230 return IntegerType::get(Ctx, 1); 231 case Instruction::Alloca: 232 case Instruction::ExtractValue: 233 return R->getUnderlyingInstr()->getType(); 234 case Instruction::Freeze: 235 case Instruction::FNeg: 236 case Instruction::GetElementPtr: 237 return inferScalarType(R->getOperand(0)); 238 case Instruction::Load: 239 return cast<LoadInst>(R->getUnderlyingInstr())->getType(); 240 case Instruction::Store: 241 // FIXME: VPReplicateRecipes with store opcodes still define a result 242 // VPValue, so we need to handle them here. Remove the code here once this 243 // is modeled accurately in VPlan. 244 return Type::getVoidTy(Ctx); 245 default: 246 break; 247 } 248 // Type inference not implemented for opcode. 249 LLVM_DEBUG({ 250 dbgs() << "LV: Found unhandled opcode for: "; 251 R->getVPSingleValue()->dump(); 252 }); 253 llvm_unreachable("Unhandled opcode"); 254 } 255 256 Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { 257 if (Type *CachedTy = CachedTypes.lookup(V)) 258 return CachedTy; 259 260 if (V->isLiveIn()) { 261 if (auto *IRValue = V->getLiveInIRValue()) 262 return IRValue->getType(); 263 // All VPValues without any underlying IR value (like the vector trip count 264 // or the backedge-taken count) have the same type as the canonical IV. 265 return CanonicalIVTy; 266 } 267 268 Type *ResultTy = 269 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe()) 270 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe, 271 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe, 272 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( 273 [this](const auto *R) { 274 // Handle header phi recipes, except VPWidenIntOrFpInduction 275 // which needs special handling due it being possibly truncated. 276 // TODO: consider inferring/caching type of siblings, e.g., 277 // backedge value, here and in cases below. 278 return inferScalarType(R->getStartValue()); 279 }) 280 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>( 281 [](const auto *R) { return R->getScalarType(); }) 282 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe, 283 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe, 284 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe, 285 VPPartialReductionRecipe>([this](const VPRecipeBase *R) { 286 return inferScalarType(R->getOperand(0)); 287 }) 288 // VPInstructionWithType must be handled before VPInstruction. 289 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe, 290 VPWidenCastRecipe>( 291 [](const auto *R) { return R->getResultType(); }) 292 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, 293 VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( 294 [this](const auto *R) { return inferScalarTypeForRecipe(R); }) 295 .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) { 296 // TODO: Use info from interleave group. 297 return V->getUnderlyingValue()->getType(); 298 }) 299 .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { 300 return R->getSCEV()->getType(); 301 }) 302 .Case<VPReductionRecipe>([this](const auto *R) { 303 return inferScalarType(R->getChainOp()); 304 }) 305 .Case<VPExpressionRecipe>([this](const auto *R) { 306 return inferScalarType(R->getOperandOfResultType()); 307 }); 308 309 assert(ResultTy && "could not infer type for the given VPValue"); 310 CachedTypes[V] = ResultTy; 311 return ResultTy; 312 } 313 314 void llvm::collectEphemeralRecipesForVPlan( 315 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) { 316 // First, collect seed recipes which are operands of assumes. 317 SmallVector<VPRecipeBase *> Worklist; 318 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 319 vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { 320 for (VPRecipeBase &R : *VPBB) { 321 auto *RepR = dyn_cast<VPReplicateRecipe>(&R); 322 if (!RepR || !match(RepR->getUnderlyingInstr(), 323 PatternMatch::m_Intrinsic<Intrinsic::assume>())) 324 continue; 325 Worklist.push_back(RepR); 326 EphRecipes.insert(RepR); 327 } 328 } 329 330 // Process operands of candidates in worklist and add them to the set of 331 // ephemeral recipes, if they don't have side-effects and are only used by 332 // other ephemeral recipes. 333 while (!Worklist.empty()) { 334 VPRecipeBase *Cur = Worklist.pop_back_val(); 335 for (VPValue *Op : Cur->operands()) { 336 auto *OpR = Op->getDefiningRecipe(); 337 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR)) 338 continue; 339 if (any_of(Op->users(), [EphRecipes](VPUser *U) { 340 auto *UR = dyn_cast<VPRecipeBase>(U); 341 return !UR || !EphRecipes.contains(UR); 342 })) 343 continue; 344 EphRecipes.insert(OpR); 345 Worklist.push_back(OpR); 346 } 347 } 348 } 349 350 template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>( 351 DominatorTreeBase<VPBlockBase, false> &DT); 352 353 bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, 354 const VPRecipeBase *B) { 355 if (A == B) 356 return false; 357 358 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { 359 for (auto &R : *A->getParent()) { 360 if (&R == A) 361 return true; 362 if (&R == B) 363 return false; 364 } 365 llvm_unreachable("recipe not found"); 366 }; 367 const VPBlockBase *ParentA = A->getParent(); 368 const VPBlockBase *ParentB = B->getParent(); 369 if (ParentA == ParentB) 370 return LocalComesBefore(A, B); 371 372 #ifndef NDEBUG 373 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { 374 auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); 375 if (Region && Region->isReplicator()) { 376 assert(Region->getNumSuccessors() == 1 && 377 Region->getNumPredecessors() == 1 && "Expected SESE region!"); 378 assert(R->getParent()->size() == 1 && 379 "A recipe in an original replicator region must be the only " 380 "recipe in its block"); 381 return Region; 382 } 383 return nullptr; 384 }; 385 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && 386 "No replicate regions expected at this point"); 387 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && 388 "No replicate regions expected at this point"); 389 #endif 390 return Base::properlyDominates(ParentA, ParentB); 391 } 392 393 /// Get the VF scaling factor applied to the recipe's output, if the recipe has 394 /// one. 395 static unsigned getVFScaleFactor(VPRecipeBase *R) { 396 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R)) 397 return RR->getVFScaleFactor(); 398 if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) 399 return RR->getVFScaleFactor(); 400 assert( 401 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != 402 VPInstruction::ReductionStartVector) && 403 "getting scaling factor of reduction-start-vector not implemented yet"); 404 return 1; 405 } 406 407 bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const { 408 return any_of(MaxLocalUsers, [&TTI](auto &LU) { 409 return LU.second > TTI.getNumberOfRegisters(LU.first); 410 }); 411 } 412 413 SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan( 414 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI, 415 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) { 416 // Each 'key' in the map opens a new interval. The values 417 // of the map are the index of the 'last seen' usage of the 418 // recipe that is the key. 419 using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>; 420 421 // Maps indices to recipes. 422 SmallVector<VPRecipeBase *, 64> Idx2Recipe; 423 // Marks the end of each interval. 424 IntervalMap EndPoint; 425 // Saves the list of recipe indices that are used in the loop. 426 SmallPtrSet<VPRecipeBase *, 8> Ends; 427 // Saves the list of values that are used in the loop but are defined outside 428 // the loop (not including non-recipe values such as arguments and 429 // constants). 430 SmallSetVector<VPValue *, 8> LoopInvariants; 431 LoopInvariants.insert(&Plan.getVectorTripCount()); 432 433 // We scan the loop in a topological order in order and assign a number to 434 // each recipe. We use RPO to ensure that defs are met before their users. We 435 // assume that each recipe that has in-loop users starts an interval. We 436 // record every time that an in-loop value is used, so we have a list of the 437 // first and last occurrences of each recipe. 438 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 439 Plan.getVectorLoopRegion()); 440 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 441 if (!VPBB->getParent()) 442 break; 443 for (VPRecipeBase &R : *VPBB) { 444 Idx2Recipe.push_back(&R); 445 446 // Save the end location of each USE. 447 for (VPValue *U : R.operands()) { 448 auto *DefR = U->getDefiningRecipe(); 449 450 // Ignore non-recipe values such as arguments, constants, etc. 451 // FIXME: Might need some motivation why these values are ignored. If 452 // for example an argument is used inside the loop it will increase the 453 // register pressure (so shouldn't we add it to LoopInvariants). 454 if (!DefR && (!U->getLiveInIRValue() || 455 !isa<Instruction>(U->getLiveInIRValue()))) 456 continue; 457 458 // If this recipe is outside the loop then record it and continue. 459 if (!DefR) { 460 LoopInvariants.insert(U); 461 continue; 462 } 463 464 // Overwrite previous end points. 465 EndPoint[DefR] = Idx2Recipe.size(); 466 Ends.insert(DefR); 467 } 468 } 469 if (VPBB == Plan.getVectorLoopRegion()->getExiting()) { 470 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the 471 // exiting block, where their increment will get materialized eventually. 472 for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 473 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { 474 EndPoint[&R] = Idx2Recipe.size(); 475 Ends.insert(&R); 476 } 477 } 478 } 479 } 480 481 // Saves the list of intervals that end with the index in 'key'. 482 using RecipeList = SmallVector<VPRecipeBase *, 2>; 483 SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds; 484 485 // Next, we transpose the EndPoints into a multi map that holds the list of 486 // intervals that *end* at a specific location. 487 for (auto &Interval : EndPoint) 488 TransposeEnds[Interval.second].push_back(Interval.first); 489 490 SmallPtrSet<VPRecipeBase *, 8> OpenIntervals; 491 SmallVector<VPRegisterUsage, 8> RUs(VFs.size()); 492 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); 493 494 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); 495 496 VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); 497 498 const auto &TTICapture = TTI; 499 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { 500 if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) || 501 (VF.isScalable() && 502 !TTICapture.isElementTypeLegalForScalableVector(Ty))) 503 return 0; 504 return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); 505 }; 506 507 // We scan the instructions linearly and record each time that a new interval 508 // starts, by placing it in a set. If we find this value in TransposEnds then 509 // we remove it from the set. The max register usage is the maximum register 510 // usage of the recipes of the set. 511 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) { 512 VPRecipeBase *R = Idx2Recipe[Idx]; 513 514 // Remove all of the recipes that end at this location. 515 RecipeList &List = TransposeEnds[Idx]; 516 for (VPRecipeBase *ToRemove : List) 517 OpenIntervals.erase(ToRemove); 518 519 // Ignore recipes that are never used within the loop and do not have side 520 // effects. 521 if (!Ends.count(R) && !R->mayHaveSideEffects()) 522 continue; 523 524 // Skip recipes for ignored values. 525 // TODO: Should mark recipes for ephemeral values that cannot be removed 526 // explictly in VPlan. 527 if (isa<VPSingleDefRecipe>(R) && 528 ValuesToIgnore.contains( 529 cast<VPSingleDefRecipe>(R)->getUnderlyingValue())) 530 continue; 531 532 // For each VF find the maximum usage of registers. 533 for (unsigned J = 0, E = VFs.size(); J < E; ++J) { 534 // Count the number of registers used, per register class, given all open 535 // intervals. 536 // Note that elements in this SmallMapVector will be default constructed 537 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if 538 // there is no previous entry for ClassID. 539 SmallMapVector<unsigned, unsigned, 4> RegUsage; 540 541 for (auto *R : OpenIntervals) { 542 // Skip recipes that weren't present in the original loop. 543 // TODO: Remove after removing the legacy 544 // LoopVectorizationCostModel::calculateRegisterUsage 545 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, 546 VPBranchOnMaskRecipe>(R)) 547 continue; 548 549 if (VFs[J].isScalar() || 550 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe, 551 VPScalarIVStepsRecipe>(R) || 552 (isa<VPInstruction>(R) && 553 all_of(cast<VPSingleDefRecipe>(R)->users(), 554 [&](VPUser *U) { 555 return cast<VPRecipeBase>(U)->usesScalars( 556 R->getVPSingleValue()); 557 })) || 558 (isa<VPReductionPHIRecipe>(R) && 559 (cast<VPReductionPHIRecipe>(R))->isInLoop())) { 560 unsigned ClassID = TTI.getRegisterClassForType( 561 false, TypeInfo.inferScalarType(R->getVPSingleValue())); 562 // FIXME: The target might use more than one register for the type 563 // even in the scalar case. 564 RegUsage[ClassID] += 1; 565 } else { 566 // The output from scaled phis and scaled reductions actually has 567 // fewer lanes than the VF. 568 unsigned ScaleFactor = getVFScaleFactor(R); 569 ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor); 570 LLVM_DEBUG(if (VF != VFs[J]) { 571 dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF 572 << " for " << *R << "\n"; 573 }); 574 575 for (VPValue *DefV : R->definedValues()) { 576 Type *ScalarTy = TypeInfo.inferScalarType(DefV); 577 unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); 578 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); 579 } 580 } 581 } 582 583 for (const auto &Pair : RegUsage) { 584 auto &Entry = MaxUsages[J][Pair.first]; 585 Entry = std::max(Entry, Pair.second); 586 } 587 } 588 589 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # " 590 << OpenIntervals.size() << '\n'); 591 592 // Add the current recipe to the list of open intervals. 593 OpenIntervals.insert(R); 594 } 595 596 // We also search for instructions that are defined outside the loop, but are 597 // used inside the loop. We need this number separately from the max-interval 598 // usage number because when we unroll, loop-invariant values do not take 599 // more register. 600 VPRegisterUsage RU; 601 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) { 602 // Note that elements in this SmallMapVector will be default constructed 603 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if 604 // there is no previous entry for ClassID. 605 SmallMapVector<unsigned, unsigned, 4> Invariant; 606 607 for (auto *In : LoopInvariants) { 608 // FIXME: The target might use more than one register for the type 609 // even in the scalar case. 610 bool IsScalar = all_of(In->users(), [&](VPUser *U) { 611 return cast<VPRecipeBase>(U)->usesScalars(In); 612 }); 613 614 ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx]; 615 unsigned ClassID = TTI.getRegisterClassForType( 616 VF.isVector(), TypeInfo.inferScalarType(In)); 617 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF); 618 } 619 620 LLVM_DEBUG({ 621 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n'; 622 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size() 623 << " item\n"; 624 for (const auto &pair : MaxUsages[Idx]) { 625 dbgs() << "LV(REG): RegisterClass: " 626 << TTI.getRegisterClassName(pair.first) << ", " << pair.second 627 << " registers\n"; 628 } 629 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() 630 << " item\n"; 631 for (const auto &pair : Invariant) { 632 dbgs() << "LV(REG): RegisterClass: " 633 << TTI.getRegisterClassName(pair.first) << ", " << pair.second 634 << " registers\n"; 635 } 636 }); 637 638 RU.LoopInvariantRegs = Invariant; 639 RU.MaxLocalUsers = MaxUsages[Idx]; 640 RUs[Idx] = RU; 641 } 642 643 return RUs; 644 } 645