xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "VPlanAnalysis.h"
10 #include "VPlan.h"
11 #include "VPlanCFG.h"
12 #include "VPlanDominatorTree.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/TypeSwitch.h"
15 #include "llvm/Analysis/ScalarEvolution.h"
16 #include "llvm/Analysis/TargetTransformInfo.h"
17 #include "llvm/IR/Instruction.h"
18 #include "llvm/IR/PatternMatch.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "vplan"
23 
24 VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan)
25     : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) {
26   if (auto LoopRegion = Plan.getVectorLoopRegion()) {
27     if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
28             &LoopRegion->getEntryBasicBlock()->front())) {
29       CanonicalIVTy = CanIV->getScalarType();
30       return;
31     }
32   }
33 
34   // If there's no canonical IV, retrieve the type from the trip count
35   // expression.
36   auto *TC = Plan.getTripCount();
37   if (TC->isLiveIn()) {
38     CanonicalIVTy = TC->getLiveInIRValue()->getType();
39     return;
40   }
41   CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
42 }
43 
44 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
45   Type *ResTy = inferScalarType(R->getIncomingValue(0));
46   for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
47     VPValue *Inc = R->getIncomingValue(I);
48     assert(inferScalarType(Inc) == ResTy &&
49            "different types inferred for different incoming values");
50     CachedTypes[Inc] = ResTy;
51   }
52   return ResTy;
53 }
54 
55 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
56   // Set the result type from the first operand, check if the types for all
57   // other operands match and cache them.
58   auto SetResultTyFromOp = [this, R]() {
59     Type *ResTy = inferScalarType(R->getOperand(0));
60     for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {
61       VPValue *OtherV = R->getOperand(Op);
62       assert(inferScalarType(OtherV) == ResTy &&
63              "different types inferred for different operands");
64       CachedTypes[OtherV] = ResTy;
65     }
66     return ResTy;
67   };
68 
69   unsigned Opcode = R->getOpcode();
70   if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
71     return SetResultTyFromOp();
72 
73   switch (Opcode) {
74   case Instruction::ExtractElement:
75   case Instruction::Freeze:
76   case VPInstruction::ReductionStartVector:
77     return inferScalarType(R->getOperand(0));
78   case Instruction::Select: {
79     Type *ResTy = inferScalarType(R->getOperand(1));
80     VPValue *OtherV = R->getOperand(2);
81     assert(inferScalarType(OtherV) == ResTy &&
82            "different types inferred for different operands");
83     CachedTypes[OtherV] = ResTy;
84     return ResTy;
85   }
86   case Instruction::ICmp:
87   case VPInstruction::ActiveLaneMask:
88     assert(inferScalarType(R->getOperand(0)) ==
89                inferScalarType(R->getOperand(1)) &&
90            "different types inferred for different operands");
91     return IntegerType::get(Ctx, 1);
92   case VPInstruction::ComputeAnyOfResult:
93     return inferScalarType(R->getOperand(1));
94   case VPInstruction::ComputeFindIVResult:
95   case VPInstruction::ComputeReductionResult: {
96     return inferScalarType(R->getOperand(0));
97   }
98   case VPInstruction::ExplicitVectorLength:
99     return Type::getIntNTy(Ctx, 32);
100   case Instruction::PHI:
101     // Infer the type of first operand only, as other operands of header phi's
102     // may lead to infinite recursion.
103     return inferScalarType(R->getOperand(0));
104   case VPInstruction::FirstOrderRecurrenceSplice:
105   case VPInstruction::Not:
106   case VPInstruction::CalculateTripCountMinusVF:
107   case VPInstruction::CanonicalIVIncrementForPart:
108   case VPInstruction::AnyOf:
109   case VPInstruction::BuildStructVector:
110   case VPInstruction::BuildVector:
111     return SetResultTyFromOp();
112   case VPInstruction::FirstActiveLane:
113     return Type::getIntNTy(Ctx, 64);
114   case VPInstruction::ExtractLastElement:
115   case VPInstruction::ExtractPenultimateElement: {
116     Type *BaseTy = inferScalarType(R->getOperand(0));
117     if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
118       return VecTy->getElementType();
119     return BaseTy;
120   }
121   case VPInstruction::LogicalAnd:
122     assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
123            inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
124            "LogicalAnd operands should be bool");
125     return IntegerType::get(Ctx, 1);
126   case VPInstruction::Broadcast:
127   case VPInstruction::PtrAdd:
128     // Return the type based on first operand.
129     return inferScalarType(R->getOperand(0));
130   case VPInstruction::BranchOnCond:
131   case VPInstruction::BranchOnCount:
132     return Type::getVoidTy(Ctx);
133   default:
134     break;
135   }
136   // Type inference not implemented for opcode.
137   LLVM_DEBUG({
138     dbgs() << "LV: Found unhandled opcode for: ";
139     R->getVPSingleValue()->dump();
140   });
141   llvm_unreachable("Unhandled opcode!");
142 }
143 
144 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
145   unsigned Opcode = R->getOpcode();
146   if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
147       Instruction::isBitwiseLogicOp(Opcode)) {
148     Type *ResTy = inferScalarType(R->getOperand(0));
149     assert(ResTy == inferScalarType(R->getOperand(1)) &&
150            "types for both operands must match for binary op");
151     CachedTypes[R->getOperand(1)] = ResTy;
152     return ResTy;
153   }
154 
155   switch (Opcode) {
156   case Instruction::ICmp:
157   case Instruction::FCmp:
158     return IntegerType::get(Ctx, 1);
159   case Instruction::FNeg:
160   case Instruction::Freeze:
161     return inferScalarType(R->getOperand(0));
162   case Instruction::ExtractValue: {
163     assert(R->getNumOperands() == 2 && "expected single level extractvalue");
164     auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
165     auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
166     return StructTy->getTypeAtIndex(CI->getZExtValue());
167   }
168   default:
169     break;
170   }
171 
172   // Type inference not implemented for opcode.
173   LLVM_DEBUG({
174     dbgs() << "LV: Found unhandled opcode for: ";
175     R->getVPSingleValue()->dump();
176   });
177   llvm_unreachable("Unhandled opcode!");
178 }
179 
180 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
181   auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
182   return CI.getType();
183 }
184 
185 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
186   assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
187          "Store recipes should not define any values");
188   return cast<LoadInst>(&R->getIngredient())->getType();
189 }
190 
191 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) {
192   Type *ResTy = inferScalarType(R->getOperand(1));
193   VPValue *OtherV = R->getOperand(2);
194   assert(inferScalarType(OtherV) == ResTy &&
195          "different types inferred for different operands");
196   CachedTypes[OtherV] = ResTy;
197   return ResTy;
198 }
199 
200 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
201   unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
202 
203   if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
204       Instruction::isBitwiseLogicOp(Opcode)) {
205     Type *ResTy = inferScalarType(R->getOperand(0));
206     assert(ResTy == inferScalarType(R->getOperand(1)) &&
207            "inferred types for operands of binary op don't match");
208     CachedTypes[R->getOperand(1)] = ResTy;
209     return ResTy;
210   }
211 
212   if (Instruction::isCast(Opcode))
213     return R->getUnderlyingInstr()->getType();
214 
215   switch (Opcode) {
216   case Instruction::Call: {
217     unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
218     return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue())
219         ->getReturnType();
220   }
221   case Instruction::Select: {
222     Type *ResTy = inferScalarType(R->getOperand(1));
223     assert(ResTy == inferScalarType(R->getOperand(2)) &&
224            "inferred types for operands of select op don't match");
225     CachedTypes[R->getOperand(2)] = ResTy;
226     return ResTy;
227   }
228   case Instruction::ICmp:
229   case Instruction::FCmp:
230     return IntegerType::get(Ctx, 1);
231   case Instruction::Alloca:
232   case Instruction::ExtractValue:
233     return R->getUnderlyingInstr()->getType();
234   case Instruction::Freeze:
235   case Instruction::FNeg:
236   case Instruction::GetElementPtr:
237     return inferScalarType(R->getOperand(0));
238   case Instruction::Load:
239     return cast<LoadInst>(R->getUnderlyingInstr())->getType();
240   case Instruction::Store:
241     // FIXME: VPReplicateRecipes with store opcodes still define a result
242     // VPValue, so we need to handle them here. Remove the code here once this
243     // is modeled accurately in VPlan.
244     return Type::getVoidTy(Ctx);
245   default:
246     break;
247   }
248   // Type inference not implemented for opcode.
249   LLVM_DEBUG({
250     dbgs() << "LV: Found unhandled opcode for: ";
251     R->getVPSingleValue()->dump();
252   });
253   llvm_unreachable("Unhandled opcode");
254 }
255 
256 Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
257   if (Type *CachedTy = CachedTypes.lookup(V))
258     return CachedTy;
259 
260   if (V->isLiveIn()) {
261     if (auto *IRValue = V->getLiveInIRValue())
262       return IRValue->getType();
263     // All VPValues without any underlying IR value (like the vector trip count
264     // or the backedge-taken count) have the same type as the canonical IV.
265     return CanonicalIVTy;
266   }
267 
268   Type *ResultTy =
269       TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
270           .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
271                 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
272                 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
273               [this](const auto *R) {
274                 // Handle header phi recipes, except VPWidenIntOrFpInduction
275                 // which needs special handling due it being possibly truncated.
276                 // TODO: consider inferring/caching type of siblings, e.g.,
277                 // backedge value, here and in cases below.
278                 return inferScalarType(R->getStartValue());
279               })
280           .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
281               [](const auto *R) { return R->getScalarType(); })
282           .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
283                 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
284                 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
285                 VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
286             return inferScalarType(R->getOperand(0));
287           })
288           // VPInstructionWithType must be handled before VPInstruction.
289           .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
290                 VPWidenCastRecipe>(
291               [](const auto *R) { return R->getResultType(); })
292           .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
293                 VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
294               [this](const auto *R) { return inferScalarTypeForRecipe(R); })
295           .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
296             // TODO: Use info from interleave group.
297             return V->getUnderlyingValue()->getType();
298           })
299           .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
300             return R->getSCEV()->getType();
301           })
302           .Case<VPReductionRecipe>([this](const auto *R) {
303             return inferScalarType(R->getChainOp());
304           })
305           .Case<VPExpressionRecipe>([this](const auto *R) {
306             return inferScalarType(R->getOperandOfResultType());
307           });
308 
309   assert(ResultTy && "could not infer type for the given VPValue");
310   CachedTypes[V] = ResultTy;
311   return ResultTy;
312 }
313 
314 void llvm::collectEphemeralRecipesForVPlan(
315     VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
316   // First, collect seed recipes which are operands of assumes.
317   SmallVector<VPRecipeBase *> Worklist;
318   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
319            vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
320     for (VPRecipeBase &R : *VPBB) {
321       auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
322       if (!RepR || !match(RepR->getUnderlyingInstr(),
323                           PatternMatch::m_Intrinsic<Intrinsic::assume>()))
324         continue;
325       Worklist.push_back(RepR);
326       EphRecipes.insert(RepR);
327     }
328   }
329 
330   // Process operands of candidates in worklist and add them to the set of
331   // ephemeral recipes, if they don't have side-effects and are only used by
332   // other ephemeral recipes.
333   while (!Worklist.empty()) {
334     VPRecipeBase *Cur = Worklist.pop_back_val();
335     for (VPValue *Op : Cur->operands()) {
336       auto *OpR = Op->getDefiningRecipe();
337       if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
338         continue;
339       if (any_of(Op->users(), [EphRecipes](VPUser *U) {
340             auto *UR = dyn_cast<VPRecipeBase>(U);
341             return !UR || !EphRecipes.contains(UR);
342           }))
343         continue;
344       EphRecipes.insert(OpR);
345       Worklist.push_back(OpR);
346     }
347   }
348 }
349 
350 template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
351     DominatorTreeBase<VPBlockBase, false> &DT);
352 
353 bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
354                                         const VPRecipeBase *B) {
355   if (A == B)
356     return false;
357 
358   auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
359     for (auto &R : *A->getParent()) {
360       if (&R == A)
361         return true;
362       if (&R == B)
363         return false;
364     }
365     llvm_unreachable("recipe not found");
366   };
367   const VPBlockBase *ParentA = A->getParent();
368   const VPBlockBase *ParentB = B->getParent();
369   if (ParentA == ParentB)
370     return LocalComesBefore(A, B);
371 
372 #ifndef NDEBUG
373   auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
374     auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
375     if (Region && Region->isReplicator()) {
376       assert(Region->getNumSuccessors() == 1 &&
377              Region->getNumPredecessors() == 1 && "Expected SESE region!");
378       assert(R->getParent()->size() == 1 &&
379              "A recipe in an original replicator region must be the only "
380              "recipe in its block");
381       return Region;
382     }
383     return nullptr;
384   };
385   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
386          "No replicate regions expected at this point");
387   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
388          "No replicate regions expected at this point");
389 #endif
390   return Base::properlyDominates(ParentA, ParentB);
391 }
392 
393 /// Get the VF scaling factor applied to the recipe's output, if the recipe has
394 /// one.
395 static unsigned getVFScaleFactor(VPRecipeBase *R) {
396   if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
397     return RR->getVFScaleFactor();
398   if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
399     return RR->getVFScaleFactor();
400   assert(
401       (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
402                                      VPInstruction::ReductionStartVector) &&
403       "getting scaling factor of reduction-start-vector not implemented yet");
404   return 1;
405 }
406 
407 bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const {
408   return any_of(MaxLocalUsers, [&TTI](auto &LU) {
409     return LU.second > TTI.getNumberOfRegisters(LU.first);
410   });
411 }
412 
413 SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
414     VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
415     const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
416   // Each 'key' in the map opens a new interval. The values
417   // of the map are the index of the 'last seen' usage of the
418   // recipe that is the key.
419   using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
420 
421   // Maps indices to recipes.
422   SmallVector<VPRecipeBase *, 64> Idx2Recipe;
423   // Marks the end of each interval.
424   IntervalMap EndPoint;
425   // Saves the list of recipe indices that are used in the loop.
426   SmallPtrSet<VPRecipeBase *, 8> Ends;
427   // Saves the list of values that are used in the loop but are defined outside
428   // the loop (not including non-recipe values such as arguments and
429   // constants).
430   SmallSetVector<VPValue *, 8> LoopInvariants;
431   LoopInvariants.insert(&Plan.getVectorTripCount());
432 
433   // We scan the loop in a topological order in order and assign a number to
434   // each recipe. We use RPO to ensure that defs are met before their users. We
435   // assume that each recipe that has in-loop users starts an interval. We
436   // record every time that an in-loop value is used, so we have a list of the
437   // first and last occurrences of each recipe.
438   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
439       Plan.getVectorLoopRegion());
440   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
441     if (!VPBB->getParent())
442       break;
443     for (VPRecipeBase &R : *VPBB) {
444       Idx2Recipe.push_back(&R);
445 
446       // Save the end location of each USE.
447       for (VPValue *U : R.operands()) {
448         auto *DefR = U->getDefiningRecipe();
449 
450         // Ignore non-recipe values such as arguments, constants, etc.
451         // FIXME: Might need some motivation why these values are ignored. If
452         // for example an argument is used inside the loop it will increase the
453         // register pressure (so shouldn't we add it to LoopInvariants).
454         if (!DefR && (!U->getLiveInIRValue() ||
455                       !isa<Instruction>(U->getLiveInIRValue())))
456           continue;
457 
458         // If this recipe is outside the loop then record it and continue.
459         if (!DefR) {
460           LoopInvariants.insert(U);
461           continue;
462         }
463 
464         // Overwrite previous end points.
465         EndPoint[DefR] = Idx2Recipe.size();
466         Ends.insert(DefR);
467       }
468     }
469     if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
470       // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
471       // exiting block, where their increment will get materialized eventually.
472       for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
473         if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
474           EndPoint[&R] = Idx2Recipe.size();
475           Ends.insert(&R);
476         }
477       }
478     }
479   }
480 
481   // Saves the list of intervals that end with the index in 'key'.
482   using RecipeList = SmallVector<VPRecipeBase *, 2>;
483   SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
484 
485   // Next, we transpose the EndPoints into a multi map that holds the list of
486   // intervals that *end* at a specific location.
487   for (auto &Interval : EndPoint)
488     TransposeEnds[Interval.second].push_back(Interval.first);
489 
490   SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
491   SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
492   SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
493 
494   LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
495 
496   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
497 
498   const auto &TTICapture = TTI;
499   auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
500     if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
501         (VF.isScalable() &&
502          !TTICapture.isElementTypeLegalForScalableVector(Ty)))
503       return 0;
504     return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
505   };
506 
507   // We scan the instructions linearly and record each time that a new interval
508   // starts, by placing it in a set. If we find this value in TransposEnds then
509   // we remove it from the set. The max register usage is the maximum register
510   // usage of the recipes of the set.
511   for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
512     VPRecipeBase *R = Idx2Recipe[Idx];
513 
514     // Remove all of the recipes that end at this location.
515     RecipeList &List = TransposeEnds[Idx];
516     for (VPRecipeBase *ToRemove : List)
517       OpenIntervals.erase(ToRemove);
518 
519     // Ignore recipes that are never used within the loop and do not have side
520     // effects.
521     if (!Ends.count(R) && !R->mayHaveSideEffects())
522       continue;
523 
524     // Skip recipes for ignored values.
525     // TODO: Should mark recipes for ephemeral values that cannot be removed
526     // explictly in VPlan.
527     if (isa<VPSingleDefRecipe>(R) &&
528         ValuesToIgnore.contains(
529             cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
530       continue;
531 
532     // For each VF find the maximum usage of registers.
533     for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
534       // Count the number of registers used, per register class, given all open
535       // intervals.
536       // Note that elements in this SmallMapVector will be default constructed
537       // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
538       // there is no previous entry for ClassID.
539       SmallMapVector<unsigned, unsigned, 4> RegUsage;
540 
541       for (auto *R : OpenIntervals) {
542         // Skip recipes that weren't present in the original loop.
543         // TODO: Remove after removing the legacy
544         // LoopVectorizationCostModel::calculateRegisterUsage
545         if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
546                 VPBranchOnMaskRecipe>(R))
547           continue;
548 
549         if (VFs[J].isScalar() ||
550             isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
551                 VPScalarIVStepsRecipe>(R) ||
552             (isa<VPInstruction>(R) &&
553              all_of(cast<VPSingleDefRecipe>(R)->users(),
554                     [&](VPUser *U) {
555                       return cast<VPRecipeBase>(U)->usesScalars(
556                           R->getVPSingleValue());
557                     })) ||
558             (isa<VPReductionPHIRecipe>(R) &&
559              (cast<VPReductionPHIRecipe>(R))->isInLoop())) {
560           unsigned ClassID = TTI.getRegisterClassForType(
561               false, TypeInfo.inferScalarType(R->getVPSingleValue()));
562           // FIXME: The target might use more than one register for the type
563           // even in the scalar case.
564           RegUsage[ClassID] += 1;
565         } else {
566           // The output from scaled phis and scaled reductions actually has
567           // fewer lanes than the VF.
568           unsigned ScaleFactor = getVFScaleFactor(R);
569           ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
570           LLVM_DEBUG(if (VF != VFs[J]) {
571             dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
572                    << " for " << *R << "\n";
573           });
574 
575           for (VPValue *DefV : R->definedValues()) {
576             Type *ScalarTy = TypeInfo.inferScalarType(DefV);
577             unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
578             RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
579           }
580         }
581       }
582 
583       for (const auto &Pair : RegUsage) {
584         auto &Entry = MaxUsages[J][Pair.first];
585         Entry = std::max(Entry, Pair.second);
586       }
587     }
588 
589     LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
590                       << OpenIntervals.size() << '\n');
591 
592     // Add the current recipe to the list of open intervals.
593     OpenIntervals.insert(R);
594   }
595 
596   // We also search for instructions that are defined outside the loop, but are
597   // used inside the loop. We need this number separately from the max-interval
598   // usage number because when we unroll, loop-invariant values do not take
599   // more register.
600   VPRegisterUsage RU;
601   for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
602     // Note that elements in this SmallMapVector will be default constructed
603     // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
604     // there is no previous entry for ClassID.
605     SmallMapVector<unsigned, unsigned, 4> Invariant;
606 
607     for (auto *In : LoopInvariants) {
608       // FIXME: The target might use more than one register for the type
609       // even in the scalar case.
610       bool IsScalar = all_of(In->users(), [&](VPUser *U) {
611         return cast<VPRecipeBase>(U)->usesScalars(In);
612       });
613 
614       ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
615       unsigned ClassID = TTI.getRegisterClassForType(
616           VF.isVector(), TypeInfo.inferScalarType(In));
617       Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
618     }
619 
620     LLVM_DEBUG({
621       dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
622       dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
623              << " item\n";
624       for (const auto &pair : MaxUsages[Idx]) {
625         dbgs() << "LV(REG): RegisterClass: "
626                << TTI.getRegisterClassName(pair.first) << ", " << pair.second
627                << " registers\n";
628       }
629       dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
630              << " item\n";
631       for (const auto &pair : Invariant) {
632         dbgs() << "LV(REG): RegisterClass: "
633                << TTI.getRegisterClassName(pair.first) << ", " << pair.second
634                << " registers\n";
635       }
636     });
637 
638     RU.LoopInvariantRegs = Invariant;
639     RU.MaxLocalUsers = MaxUsages[Idx];
640     RUs[Idx] = RU;
641   }
642 
643   return RUs;
644 }
645