1 //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "VPlanAnalysis.h" 10 #include "VPlan.h" 11 #include "VPlanCFG.h" 12 #include "VPlanDominatorTree.h" 13 #include "llvm/ADT/PostOrderIterator.h" 14 #include "llvm/ADT/TypeSwitch.h" 15 #include "llvm/Analysis/ScalarEvolution.h" 16 #include "llvm/Analysis/TargetTransformInfo.h" 17 #include "llvm/IR/Instruction.h" 18 #include "llvm/IR/PatternMatch.h" 19 20 using namespace llvm; 21 22 #define DEBUG_TYPE "vplan" 23 24 VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) 25 : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) { 26 if (auto LoopRegion = Plan.getVectorLoopRegion()) { 27 if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>( 28 &LoopRegion->getEntryBasicBlock()->front())) { 29 CanonicalIVTy = CanIV->getScalarType(); 30 return; 31 } 32 } 33 34 // If there's no canonical IV, retrieve the type from the trip count 35 // expression. 36 auto *TC = Plan.getTripCount(); 37 if (TC->isLiveIn()) { 38 CanonicalIVTy = TC->getLiveInIRValue()->getType(); 39 return; 40 } 41 CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType(); 42 } 43 44 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) { 45 Type *ResTy = inferScalarType(R->getIncomingValue(0)); 46 for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) { 47 VPValue *Inc = R->getIncomingValue(I); 48 assert(inferScalarType(Inc) == ResTy && 49 "different types inferred for different incoming values"); 50 CachedTypes[Inc] = ResTy; 51 } 52 return ResTy; 53 } 54 55 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { 56 // Set the result type from the first operand, check if the types for all 57 // other operands match and cache them. 58 auto SetResultTyFromOp = [this, R]() { 59 Type *ResTy = inferScalarType(R->getOperand(0)); 60 for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) { 61 VPValue *OtherV = R->getOperand(Op); 62 assert(inferScalarType(OtherV) == ResTy && 63 "different types inferred for different operands"); 64 CachedTypes[OtherV] = ResTy; 65 } 66 return ResTy; 67 }; 68 69 unsigned Opcode = R->getOpcode(); 70 if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) 71 return SetResultTyFromOp(); 72 73 switch (Opcode) { 74 case Instruction::ExtractElement: 75 case Instruction::Freeze: 76 case VPInstruction::ReductionStartVector: 77 return inferScalarType(R->getOperand(0)); 78 case Instruction::Select: { 79 Type *ResTy = inferScalarType(R->getOperand(1)); 80 VPValue *OtherV = R->getOperand(2); 81 assert(inferScalarType(OtherV) == ResTy && 82 "different types inferred for different operands"); 83 CachedTypes[OtherV] = ResTy; 84 return ResTy; 85 } 86 case Instruction::ICmp: 87 case Instruction::FCmp: 88 case VPInstruction::ActiveLaneMask: 89 assert(inferScalarType(R->getOperand(0)) == 90 inferScalarType(R->getOperand(1)) && 91 "different types inferred for different operands"); 92 return IntegerType::get(Ctx, 1); 93 case VPInstruction::ComputeAnyOfResult: 94 return inferScalarType(R->getOperand(1)); 95 case VPInstruction::ComputeFindIVResult: 96 case VPInstruction::ComputeReductionResult: { 97 return inferScalarType(R->getOperand(0)); 98 } 99 case VPInstruction::ExplicitVectorLength: 100 return Type::getIntNTy(Ctx, 32); 101 case Instruction::PHI: 102 // Infer the type of first operand only, as other operands of header phi's 103 // may lead to infinite recursion. 104 return inferScalarType(R->getOperand(0)); 105 case VPInstruction::FirstOrderRecurrenceSplice: 106 case VPInstruction::Not: 107 case VPInstruction::CalculateTripCountMinusVF: 108 case VPInstruction::CanonicalIVIncrementForPart: 109 case VPInstruction::AnyOf: 110 case VPInstruction::BuildStructVector: 111 case VPInstruction::BuildVector: 112 return SetResultTyFromOp(); 113 case VPInstruction::FirstActiveLane: 114 return Type::getIntNTy(Ctx, 64); 115 case VPInstruction::ExtractLastElement: 116 case VPInstruction::ExtractPenultimateElement: { 117 Type *BaseTy = inferScalarType(R->getOperand(0)); 118 if (auto *VecTy = dyn_cast<VectorType>(BaseTy)) 119 return VecTy->getElementType(); 120 return BaseTy; 121 } 122 case VPInstruction::LogicalAnd: 123 assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) && 124 inferScalarType(R->getOperand(1))->isIntegerTy(1) && 125 "LogicalAnd operands should be bool"); 126 return IntegerType::get(Ctx, 1); 127 case VPInstruction::Broadcast: 128 case VPInstruction::PtrAdd: 129 // Return the type based on first operand. 130 return inferScalarType(R->getOperand(0)); 131 case VPInstruction::BranchOnCond: 132 case VPInstruction::BranchOnCount: 133 return Type::getVoidTy(Ctx); 134 default: 135 break; 136 } 137 // Type inference not implemented for opcode. 138 LLVM_DEBUG({ 139 dbgs() << "LV: Found unhandled opcode for: "; 140 R->getVPSingleValue()->dump(); 141 }); 142 llvm_unreachable("Unhandled opcode!"); 143 } 144 145 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) { 146 unsigned Opcode = R->getOpcode(); 147 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || 148 Instruction::isBitwiseLogicOp(Opcode)) { 149 Type *ResTy = inferScalarType(R->getOperand(0)); 150 assert(ResTy == inferScalarType(R->getOperand(1)) && 151 "types for both operands must match for binary op"); 152 CachedTypes[R->getOperand(1)] = ResTy; 153 return ResTy; 154 } 155 156 switch (Opcode) { 157 case Instruction::ICmp: 158 case Instruction::FCmp: 159 return IntegerType::get(Ctx, 1); 160 case Instruction::FNeg: 161 case Instruction::Freeze: 162 return inferScalarType(R->getOperand(0)); 163 case Instruction::ExtractValue: { 164 assert(R->getNumOperands() == 2 && "expected single level extractvalue"); 165 auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0))); 166 auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue()); 167 return StructTy->getTypeAtIndex(CI->getZExtValue()); 168 } 169 default: 170 break; 171 } 172 173 // Type inference not implemented for opcode. 174 LLVM_DEBUG({ 175 dbgs() << "LV: Found unhandled opcode for: "; 176 R->getVPSingleValue()->dump(); 177 }); 178 llvm_unreachable("Unhandled opcode!"); 179 } 180 181 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) { 182 auto &CI = *cast<CallInst>(R->getUnderlyingInstr()); 183 return CI.getType(); 184 } 185 186 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) { 187 assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) && 188 "Store recipes should not define any values"); 189 return cast<LoadInst>(&R->getIngredient())->getType(); 190 } 191 192 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) { 193 Type *ResTy = inferScalarType(R->getOperand(1)); 194 VPValue *OtherV = R->getOperand(2); 195 assert(inferScalarType(OtherV) == ResTy && 196 "different types inferred for different operands"); 197 CachedTypes[OtherV] = ResTy; 198 return ResTy; 199 } 200 201 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) { 202 unsigned Opcode = R->getUnderlyingInstr()->getOpcode(); 203 204 if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) || 205 Instruction::isBitwiseLogicOp(Opcode)) { 206 Type *ResTy = inferScalarType(R->getOperand(0)); 207 assert(ResTy == inferScalarType(R->getOperand(1)) && 208 "inferred types for operands of binary op don't match"); 209 CachedTypes[R->getOperand(1)] = ResTy; 210 return ResTy; 211 } 212 213 if (Instruction::isCast(Opcode)) 214 return R->getUnderlyingInstr()->getType(); 215 216 switch (Opcode) { 217 case Instruction::Call: { 218 unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1); 219 return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue()) 220 ->getReturnType(); 221 } 222 case Instruction::Select: { 223 Type *ResTy = inferScalarType(R->getOperand(1)); 224 assert(ResTy == inferScalarType(R->getOperand(2)) && 225 "inferred types for operands of select op don't match"); 226 CachedTypes[R->getOperand(2)] = ResTy; 227 return ResTy; 228 } 229 case Instruction::ICmp: 230 case Instruction::FCmp: 231 return IntegerType::get(Ctx, 1); 232 case Instruction::Alloca: 233 case Instruction::ExtractValue: 234 return R->getUnderlyingInstr()->getType(); 235 case Instruction::Freeze: 236 case Instruction::FNeg: 237 case Instruction::GetElementPtr: 238 return inferScalarType(R->getOperand(0)); 239 case Instruction::Load: 240 return cast<LoadInst>(R->getUnderlyingInstr())->getType(); 241 case Instruction::Store: 242 // FIXME: VPReplicateRecipes with store opcodes still define a result 243 // VPValue, so we need to handle them here. Remove the code here once this 244 // is modeled accurately in VPlan. 245 return Type::getVoidTy(Ctx); 246 default: 247 break; 248 } 249 // Type inference not implemented for opcode. 250 LLVM_DEBUG({ 251 dbgs() << "LV: Found unhandled opcode for: "; 252 R->getVPSingleValue()->dump(); 253 }); 254 llvm_unreachable("Unhandled opcode"); 255 } 256 257 Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { 258 if (Type *CachedTy = CachedTypes.lookup(V)) 259 return CachedTy; 260 261 if (V->isLiveIn()) { 262 if (auto *IRValue = V->getLiveInIRValue()) 263 return IRValue->getType(); 264 // All VPValues without any underlying IR value (like the vector trip count 265 // or the backedge-taken count) have the same type as the canonical IV. 266 return CanonicalIVTy; 267 } 268 269 Type *ResultTy = 270 TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe()) 271 .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe, 272 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe, 273 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>( 274 [this](const auto *R) { 275 // Handle header phi recipes, except VPWidenIntOrFpInduction 276 // which needs special handling due it being possibly truncated. 277 // TODO: consider inferring/caching type of siblings, e.g., 278 // backedge value, here and in cases below. 279 return inferScalarType(R->getStartValue()); 280 }) 281 .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>( 282 [](const auto *R) { return R->getScalarType(); }) 283 .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe, 284 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe, 285 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe, 286 VPPartialReductionRecipe>([this](const VPRecipeBase *R) { 287 return inferScalarType(R->getOperand(0)); 288 }) 289 // VPInstructionWithType must be handled before VPInstruction. 290 .Case<VPInstructionWithType, VPWidenIntrinsicRecipe, 291 VPWidenCastRecipe>( 292 [](const auto *R) { return R->getResultType(); }) 293 .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe, 294 VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>( 295 [this](const auto *R) { return inferScalarTypeForRecipe(R); }) 296 .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) { 297 // TODO: Use info from interleave group. 298 return V->getUnderlyingValue()->getType(); 299 }) 300 .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { 301 return R->getSCEV()->getType(); 302 }) 303 .Case<VPReductionRecipe>([this](const auto *R) { 304 return inferScalarType(R->getChainOp()); 305 }) 306 .Case<VPExpressionRecipe>([this](const auto *R) { 307 return inferScalarType(R->getOperandOfResultType()); 308 }); 309 310 assert(ResultTy && "could not infer type for the given VPValue"); 311 CachedTypes[V] = ResultTy; 312 return ResultTy; 313 } 314 315 void llvm::collectEphemeralRecipesForVPlan( 316 VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) { 317 // First, collect seed recipes which are operands of assumes. 318 SmallVector<VPRecipeBase *> Worklist; 319 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( 320 vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { 321 for (VPRecipeBase &R : *VPBB) { 322 auto *RepR = dyn_cast<VPReplicateRecipe>(&R); 323 if (!RepR || !match(RepR->getUnderlyingInstr(), 324 PatternMatch::m_Intrinsic<Intrinsic::assume>())) 325 continue; 326 Worklist.push_back(RepR); 327 EphRecipes.insert(RepR); 328 } 329 } 330 331 // Process operands of candidates in worklist and add them to the set of 332 // ephemeral recipes, if they don't have side-effects and are only used by 333 // other ephemeral recipes. 334 while (!Worklist.empty()) { 335 VPRecipeBase *Cur = Worklist.pop_back_val(); 336 for (VPValue *Op : Cur->operands()) { 337 auto *OpR = Op->getDefiningRecipe(); 338 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR)) 339 continue; 340 if (any_of(Op->users(), [EphRecipes](VPUser *U) { 341 auto *UR = dyn_cast<VPRecipeBase>(U); 342 return !UR || !EphRecipes.contains(UR); 343 })) 344 continue; 345 EphRecipes.insert(OpR); 346 Worklist.push_back(OpR); 347 } 348 } 349 } 350 351 template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>( 352 DominatorTreeBase<VPBlockBase, false> &DT); 353 354 bool VPDominatorTree::properlyDominates(const VPRecipeBase *A, 355 const VPRecipeBase *B) { 356 if (A == B) 357 return false; 358 359 auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) { 360 for (auto &R : *A->getParent()) { 361 if (&R == A) 362 return true; 363 if (&R == B) 364 return false; 365 } 366 llvm_unreachable("recipe not found"); 367 }; 368 const VPBlockBase *ParentA = A->getParent(); 369 const VPBlockBase *ParentB = B->getParent(); 370 if (ParentA == ParentB) 371 return LocalComesBefore(A, B); 372 373 #ifndef NDEBUG 374 auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { 375 auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent()); 376 if (Region && Region->isReplicator()) { 377 assert(Region->getNumSuccessors() == 1 && 378 Region->getNumPredecessors() == 1 && "Expected SESE region!"); 379 assert(R->getParent()->size() == 1 && 380 "A recipe in an original replicator region must be the only " 381 "recipe in its block"); 382 return Region; 383 } 384 return nullptr; 385 }; 386 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) && 387 "No replicate regions expected at this point"); 388 assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) && 389 "No replicate regions expected at this point"); 390 #endif 391 return Base::properlyDominates(ParentA, ParentB); 392 } 393 394 /// Get the VF scaling factor applied to the recipe's output, if the recipe has 395 /// one. 396 static unsigned getVFScaleFactor(VPRecipeBase *R) { 397 if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R)) 398 return RR->getVFScaleFactor(); 399 if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) 400 return RR->getVFScaleFactor(); 401 assert( 402 (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != 403 VPInstruction::ReductionStartVector) && 404 "getting scaling factor of reduction-start-vector not implemented yet"); 405 return 1; 406 } 407 408 bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const { 409 return any_of(MaxLocalUsers, [&TTI](auto &LU) { 410 return LU.second > TTI.getNumberOfRegisters(LU.first); 411 }); 412 } 413 414 SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan( 415 VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI, 416 const SmallPtrSetImpl<const Value *> &ValuesToIgnore) { 417 // Each 'key' in the map opens a new interval. The values 418 // of the map are the index of the 'last seen' usage of the 419 // recipe that is the key. 420 using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>; 421 422 // Maps indices to recipes. 423 SmallVector<VPRecipeBase *, 64> Idx2Recipe; 424 // Marks the end of each interval. 425 IntervalMap EndPoint; 426 // Saves the list of recipe indices that are used in the loop. 427 SmallPtrSet<VPRecipeBase *, 8> Ends; 428 // Saves the list of values that are used in the loop but are defined outside 429 // the loop (not including non-recipe values such as arguments and 430 // constants). 431 SmallSetVector<VPValue *, 8> LoopInvariants; 432 LoopInvariants.insert(&Plan.getVectorTripCount()); 433 434 // We scan the loop in a topological order in order and assign a number to 435 // each recipe. We use RPO to ensure that defs are met before their users. We 436 // assume that each recipe that has in-loop users starts an interval. We 437 // record every time that an in-loop value is used, so we have a list of the 438 // first and last occurrences of each recipe. 439 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( 440 Plan.getVectorLoopRegion()); 441 for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) { 442 if (!VPBB->getParent()) 443 break; 444 for (VPRecipeBase &R : *VPBB) { 445 Idx2Recipe.push_back(&R); 446 447 // Save the end location of each USE. 448 for (VPValue *U : R.operands()) { 449 auto *DefR = U->getDefiningRecipe(); 450 451 // Ignore non-recipe values such as arguments, constants, etc. 452 // FIXME: Might need some motivation why these values are ignored. If 453 // for example an argument is used inside the loop it will increase the 454 // register pressure (so shouldn't we add it to LoopInvariants). 455 if (!DefR && (!U->getLiveInIRValue() || 456 !isa<Instruction>(U->getLiveInIRValue()))) 457 continue; 458 459 // If this recipe is outside the loop then record it and continue. 460 if (!DefR) { 461 LoopInvariants.insert(U); 462 continue; 463 } 464 465 // Overwrite previous end points. 466 EndPoint[DefR] = Idx2Recipe.size(); 467 Ends.insert(DefR); 468 } 469 } 470 if (VPBB == Plan.getVectorLoopRegion()->getExiting()) { 471 // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the 472 // exiting block, where their increment will get materialized eventually. 473 for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { 474 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { 475 EndPoint[&R] = Idx2Recipe.size(); 476 Ends.insert(&R); 477 } 478 } 479 } 480 } 481 482 // Saves the list of intervals that end with the index in 'key'. 483 using RecipeList = SmallVector<VPRecipeBase *, 2>; 484 SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds; 485 486 // Next, we transpose the EndPoints into a multi map that holds the list of 487 // intervals that *end* at a specific location. 488 for (auto &Interval : EndPoint) 489 TransposeEnds[Interval.second].push_back(Interval.first); 490 491 SmallPtrSet<VPRecipeBase *, 8> OpenIntervals; 492 SmallVector<VPRegisterUsage, 8> RUs(VFs.size()); 493 SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size()); 494 495 LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); 496 497 VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); 498 499 const auto &TTICapture = TTI; 500 auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned { 501 if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) || 502 (VF.isScalable() && 503 !TTICapture.isElementTypeLegalForScalableVector(Ty))) 504 return 0; 505 return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); 506 }; 507 508 // We scan the instructions linearly and record each time that a new interval 509 // starts, by placing it in a set. If we find this value in TransposEnds then 510 // we remove it from the set. The max register usage is the maximum register 511 // usage of the recipes of the set. 512 for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) { 513 VPRecipeBase *R = Idx2Recipe[Idx]; 514 515 // Remove all of the recipes that end at this location. 516 RecipeList &List = TransposeEnds[Idx]; 517 for (VPRecipeBase *ToRemove : List) 518 OpenIntervals.erase(ToRemove); 519 520 // Ignore recipes that are never used within the loop and do not have side 521 // effects. 522 if (!Ends.count(R) && !R->mayHaveSideEffects()) 523 continue; 524 525 // Skip recipes for ignored values. 526 // TODO: Should mark recipes for ephemeral values that cannot be removed 527 // explictly in VPlan. 528 if (isa<VPSingleDefRecipe>(R) && 529 ValuesToIgnore.contains( 530 cast<VPSingleDefRecipe>(R)->getUnderlyingValue())) 531 continue; 532 533 // For each VF find the maximum usage of registers. 534 for (unsigned J = 0, E = VFs.size(); J < E; ++J) { 535 // Count the number of registers used, per register class, given all open 536 // intervals. 537 // Note that elements in this SmallMapVector will be default constructed 538 // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if 539 // there is no previous entry for ClassID. 540 SmallMapVector<unsigned, unsigned, 4> RegUsage; 541 542 for (auto *R : OpenIntervals) { 543 // Skip recipes that weren't present in the original loop. 544 // TODO: Remove after removing the legacy 545 // LoopVectorizationCostModel::calculateRegisterUsage 546 if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe, 547 VPBranchOnMaskRecipe>(R)) 548 continue; 549 550 if (VFs[J].isScalar() || 551 isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe, 552 VPScalarIVStepsRecipe>(R) || 553 (isa<VPInstruction>(R) && 554 all_of(cast<VPSingleDefRecipe>(R)->users(), 555 [&](VPUser *U) { 556 return cast<VPRecipeBase>(U)->usesScalars( 557 R->getVPSingleValue()); 558 })) || 559 (isa<VPReductionPHIRecipe>(R) && 560 (cast<VPReductionPHIRecipe>(R))->isInLoop())) { 561 unsigned ClassID = TTI.getRegisterClassForType( 562 false, TypeInfo.inferScalarType(R->getVPSingleValue())); 563 // FIXME: The target might use more than one register for the type 564 // even in the scalar case. 565 RegUsage[ClassID] += 1; 566 } else { 567 // The output from scaled phis and scaled reductions actually has 568 // fewer lanes than the VF. 569 unsigned ScaleFactor = getVFScaleFactor(R); 570 ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor); 571 LLVM_DEBUG(if (VF != VFs[J]) { 572 dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF 573 << " for " << *R << "\n"; 574 }); 575 576 for (VPValue *DefV : R->definedValues()) { 577 Type *ScalarTy = TypeInfo.inferScalarType(DefV); 578 unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy); 579 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF); 580 } 581 } 582 } 583 584 for (const auto &Pair : RegUsage) { 585 auto &Entry = MaxUsages[J][Pair.first]; 586 Entry = std::max(Entry, Pair.second); 587 } 588 } 589 590 LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # " 591 << OpenIntervals.size() << '\n'); 592 593 // Add the current recipe to the list of open intervals. 594 OpenIntervals.insert(R); 595 } 596 597 // We also search for instructions that are defined outside the loop, but are 598 // used inside the loop. We need this number separately from the max-interval 599 // usage number because when we unroll, loop-invariant values do not take 600 // more register. 601 VPRegisterUsage RU; 602 for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) { 603 // Note that elements in this SmallMapVector will be default constructed 604 // as 0. So we can use "Invariant[ClassID] += n" in the code below even if 605 // there is no previous entry for ClassID. 606 SmallMapVector<unsigned, unsigned, 4> Invariant; 607 608 for (auto *In : LoopInvariants) { 609 // FIXME: The target might use more than one register for the type 610 // even in the scalar case. 611 bool IsScalar = all_of(In->users(), [&](VPUser *U) { 612 return cast<VPRecipeBase>(U)->usesScalars(In); 613 }); 614 615 ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx]; 616 unsigned ClassID = TTI.getRegisterClassForType( 617 VF.isVector(), TypeInfo.inferScalarType(In)); 618 Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF); 619 } 620 621 LLVM_DEBUG({ 622 dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n'; 623 dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size() 624 << " item\n"; 625 for (const auto &pair : MaxUsages[Idx]) { 626 dbgs() << "LV(REG): RegisterClass: " 627 << TTI.getRegisterClassName(pair.first) << ", " << pair.second 628 << " registers\n"; 629 } 630 dbgs() << "LV(REG): Found invariant usage: " << Invariant.size() 631 << " item\n"; 632 for (const auto &pair : Invariant) { 633 dbgs() << "LV(REG): RegisterClass: " 634 << TTI.getRegisterClassName(pair.first) << ", " << pair.second 635 << " registers\n"; 636 } 637 }); 638 639 RU.LoopInvariantRegs = Invariant; 640 RU.MaxLocalUsers = MaxUsages[Idx]; 641 RUs[Idx] = RU; 642 } 643 644 return RUs; 645 } 646