xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- VPlanAnalysis.cpp - Various Analyses working on VPlan ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "VPlanAnalysis.h"
10 #include "VPlan.h"
11 #include "VPlanCFG.h"
12 #include "VPlanDominatorTree.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/TypeSwitch.h"
15 #include "llvm/Analysis/ScalarEvolution.h"
16 #include "llvm/Analysis/TargetTransformInfo.h"
17 #include "llvm/IR/Instruction.h"
18 #include "llvm/IR/PatternMatch.h"
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "vplan"
23 
VPTypeAnalysis(const VPlan & Plan)24 VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan)
25     : Ctx(Plan.getScalarHeader()->getIRBasicBlock()->getContext()) {
26   if (auto LoopRegion = Plan.getVectorLoopRegion()) {
27     if (const auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(
28             &LoopRegion->getEntryBasicBlock()->front())) {
29       CanonicalIVTy = CanIV->getScalarType();
30       return;
31     }
32   }
33 
34   // If there's no canonical IV, retrieve the type from the trip count
35   // expression.
36   auto *TC = Plan.getTripCount();
37   if (TC->isLiveIn()) {
38     CanonicalIVTy = TC->getLiveInIRValue()->getType();
39     return;
40   }
41   CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
42 }
43 
inferScalarTypeForRecipe(const VPBlendRecipe * R)44 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
45   Type *ResTy = inferScalarType(R->getIncomingValue(0));
46   for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
47     VPValue *Inc = R->getIncomingValue(I);
48     assert(inferScalarType(Inc) == ResTy &&
49            "different types inferred for different incoming values");
50     CachedTypes[Inc] = ResTy;
51   }
52   return ResTy;
53 }
54 
inferScalarTypeForRecipe(const VPInstruction * R)55 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
56   // Set the result type from the first operand, check if the types for all
57   // other operands match and cache them.
58   auto SetResultTyFromOp = [this, R]() {
59     Type *ResTy = inferScalarType(R->getOperand(0));
60     for (unsigned Op = 1; Op != R->getNumOperands(); ++Op) {
61       VPValue *OtherV = R->getOperand(Op);
62       assert(inferScalarType(OtherV) == ResTy &&
63              "different types inferred for different operands");
64       CachedTypes[OtherV] = ResTy;
65     }
66     return ResTy;
67   };
68 
69   unsigned Opcode = R->getOpcode();
70   if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode))
71     return SetResultTyFromOp();
72 
73   switch (Opcode) {
74   case Instruction::ExtractElement:
75   case Instruction::Freeze:
76   case VPInstruction::ReductionStartVector:
77     return inferScalarType(R->getOperand(0));
78   case Instruction::Select: {
79     Type *ResTy = inferScalarType(R->getOperand(1));
80     VPValue *OtherV = R->getOperand(2);
81     assert(inferScalarType(OtherV) == ResTy &&
82            "different types inferred for different operands");
83     CachedTypes[OtherV] = ResTy;
84     return ResTy;
85   }
86   case Instruction::ICmp:
87   case Instruction::FCmp:
88   case VPInstruction::ActiveLaneMask:
89     assert(inferScalarType(R->getOperand(0)) ==
90                inferScalarType(R->getOperand(1)) &&
91            "different types inferred for different operands");
92     return IntegerType::get(Ctx, 1);
93   case VPInstruction::ComputeAnyOfResult:
94     return inferScalarType(R->getOperand(1));
95   case VPInstruction::ComputeFindIVResult:
96   case VPInstruction::ComputeReductionResult: {
97     return inferScalarType(R->getOperand(0));
98   }
99   case VPInstruction::ExplicitVectorLength:
100     return Type::getIntNTy(Ctx, 32);
101   case Instruction::PHI:
102     // Infer the type of first operand only, as other operands of header phi's
103     // may lead to infinite recursion.
104     return inferScalarType(R->getOperand(0));
105   case VPInstruction::FirstOrderRecurrenceSplice:
106   case VPInstruction::Not:
107   case VPInstruction::CalculateTripCountMinusVF:
108   case VPInstruction::CanonicalIVIncrementForPart:
109   case VPInstruction::AnyOf:
110   case VPInstruction::BuildStructVector:
111   case VPInstruction::BuildVector:
112     return SetResultTyFromOp();
113   case VPInstruction::FirstActiveLane:
114     return Type::getIntNTy(Ctx, 64);
115   case VPInstruction::ExtractLastElement:
116   case VPInstruction::ExtractPenultimateElement: {
117     Type *BaseTy = inferScalarType(R->getOperand(0));
118     if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
119       return VecTy->getElementType();
120     return BaseTy;
121   }
122   case VPInstruction::LogicalAnd:
123     assert(inferScalarType(R->getOperand(0))->isIntegerTy(1) &&
124            inferScalarType(R->getOperand(1))->isIntegerTy(1) &&
125            "LogicalAnd operands should be bool");
126     return IntegerType::get(Ctx, 1);
127   case VPInstruction::Broadcast:
128   case VPInstruction::PtrAdd:
129     // Return the type based on first operand.
130     return inferScalarType(R->getOperand(0));
131   case VPInstruction::BranchOnCond:
132   case VPInstruction::BranchOnCount:
133     return Type::getVoidTy(Ctx);
134   default:
135     break;
136   }
137   // Type inference not implemented for opcode.
138   LLVM_DEBUG({
139     dbgs() << "LV: Found unhandled opcode for: ";
140     R->getVPSingleValue()->dump();
141   });
142   llvm_unreachable("Unhandled opcode!");
143 }
144 
inferScalarTypeForRecipe(const VPWidenRecipe * R)145 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
146   unsigned Opcode = R->getOpcode();
147   if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
148       Instruction::isBitwiseLogicOp(Opcode)) {
149     Type *ResTy = inferScalarType(R->getOperand(0));
150     assert(ResTy == inferScalarType(R->getOperand(1)) &&
151            "types for both operands must match for binary op");
152     CachedTypes[R->getOperand(1)] = ResTy;
153     return ResTy;
154   }
155 
156   switch (Opcode) {
157   case Instruction::ICmp:
158   case Instruction::FCmp:
159     return IntegerType::get(Ctx, 1);
160   case Instruction::FNeg:
161   case Instruction::Freeze:
162     return inferScalarType(R->getOperand(0));
163   case Instruction::ExtractValue: {
164     assert(R->getNumOperands() == 2 && "expected single level extractvalue");
165     auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
166     auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
167     return StructTy->getTypeAtIndex(CI->getZExtValue());
168   }
169   default:
170     break;
171   }
172 
173   // Type inference not implemented for opcode.
174   LLVM_DEBUG({
175     dbgs() << "LV: Found unhandled opcode for: ";
176     R->getVPSingleValue()->dump();
177   });
178   llvm_unreachable("Unhandled opcode!");
179 }
180 
inferScalarTypeForRecipe(const VPWidenCallRecipe * R)181 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
182   auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
183   return CI.getType();
184 }
185 
inferScalarTypeForRecipe(const VPWidenMemoryRecipe * R)186 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
187   assert((isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R)) &&
188          "Store recipes should not define any values");
189   return cast<LoadInst>(&R->getIngredient())->getType();
190 }
191 
inferScalarTypeForRecipe(const VPWidenSelectRecipe * R)192 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenSelectRecipe *R) {
193   Type *ResTy = inferScalarType(R->getOperand(1));
194   VPValue *OtherV = R->getOperand(2);
195   assert(inferScalarType(OtherV) == ResTy &&
196          "different types inferred for different operands");
197   CachedTypes[OtherV] = ResTy;
198   return ResTy;
199 }
200 
inferScalarTypeForRecipe(const VPReplicateRecipe * R)201 Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPReplicateRecipe *R) {
202   unsigned Opcode = R->getUnderlyingInstr()->getOpcode();
203 
204   if (Instruction::isBinaryOp(Opcode) || Instruction::isShift(Opcode) ||
205       Instruction::isBitwiseLogicOp(Opcode)) {
206     Type *ResTy = inferScalarType(R->getOperand(0));
207     assert(ResTy == inferScalarType(R->getOperand(1)) &&
208            "inferred types for operands of binary op don't match");
209     CachedTypes[R->getOperand(1)] = ResTy;
210     return ResTy;
211   }
212 
213   if (Instruction::isCast(Opcode))
214     return R->getUnderlyingInstr()->getType();
215 
216   switch (Opcode) {
217   case Instruction::Call: {
218     unsigned CallIdx = R->getNumOperands() - (R->isPredicated() ? 2 : 1);
219     return cast<Function>(R->getOperand(CallIdx)->getLiveInIRValue())
220         ->getReturnType();
221   }
222   case Instruction::Select: {
223     Type *ResTy = inferScalarType(R->getOperand(1));
224     assert(ResTy == inferScalarType(R->getOperand(2)) &&
225            "inferred types for operands of select op don't match");
226     CachedTypes[R->getOperand(2)] = ResTy;
227     return ResTy;
228   }
229   case Instruction::ICmp:
230   case Instruction::FCmp:
231     return IntegerType::get(Ctx, 1);
232   case Instruction::Alloca:
233   case Instruction::ExtractValue:
234     return R->getUnderlyingInstr()->getType();
235   case Instruction::Freeze:
236   case Instruction::FNeg:
237   case Instruction::GetElementPtr:
238     return inferScalarType(R->getOperand(0));
239   case Instruction::Load:
240     return cast<LoadInst>(R->getUnderlyingInstr())->getType();
241   case Instruction::Store:
242     // FIXME: VPReplicateRecipes with store opcodes still define a result
243     // VPValue, so we need to handle them here. Remove the code here once this
244     // is modeled accurately in VPlan.
245     return Type::getVoidTy(Ctx);
246   default:
247     break;
248   }
249   // Type inference not implemented for opcode.
250   LLVM_DEBUG({
251     dbgs() << "LV: Found unhandled opcode for: ";
252     R->getVPSingleValue()->dump();
253   });
254   llvm_unreachable("Unhandled opcode");
255 }
256 
inferScalarType(const VPValue * V)257 Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
258   if (Type *CachedTy = CachedTypes.lookup(V))
259     return CachedTy;
260 
261   if (V->isLiveIn()) {
262     if (auto *IRValue = V->getLiveInIRValue())
263       return IRValue->getType();
264     // All VPValues without any underlying IR value (like the vector trip count
265     // or the backedge-taken count) have the same type as the canonical IV.
266     return CanonicalIVTy;
267   }
268 
269   Type *ResultTy =
270       TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
271           .Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
272                 VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
273                 VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
274               [this](const auto *R) {
275                 // Handle header phi recipes, except VPWidenIntOrFpInduction
276                 // which needs special handling due it being possibly truncated.
277                 // TODO: consider inferring/caching type of siblings, e.g.,
278                 // backedge value, here and in cases below.
279                 return inferScalarType(R->getStartValue());
280               })
281           .Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
282               [](const auto *R) { return R->getScalarType(); })
283           .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
284                 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
285                 VPVectorEndPointerRecipe, VPWidenCanonicalIVRecipe,
286                 VPPartialReductionRecipe>([this](const VPRecipeBase *R) {
287             return inferScalarType(R->getOperand(0));
288           })
289           // VPInstructionWithType must be handled before VPInstruction.
290           .Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
291                 VPWidenCastRecipe>(
292               [](const auto *R) { return R->getResultType(); })
293           .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
294                 VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
295               [this](const auto *R) { return inferScalarTypeForRecipe(R); })
296           .Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
297             // TODO: Use info from interleave group.
298             return V->getUnderlyingValue()->getType();
299           })
300           .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
301             return R->getSCEV()->getType();
302           })
303           .Case<VPReductionRecipe>([this](const auto *R) {
304             return inferScalarType(R->getChainOp());
305           })
306           .Case<VPExpressionRecipe>([this](const auto *R) {
307             return inferScalarType(R->getOperandOfResultType());
308           });
309 
310   assert(ResultTy && "could not infer type for the given VPValue");
311   CachedTypes[V] = ResultTy;
312   return ResultTy;
313 }
314 
collectEphemeralRecipesForVPlan(VPlan & Plan,DenseSet<VPRecipeBase * > & EphRecipes)315 void llvm::collectEphemeralRecipesForVPlan(
316     VPlan &Plan, DenseSet<VPRecipeBase *> &EphRecipes) {
317   // First, collect seed recipes which are operands of assumes.
318   SmallVector<VPRecipeBase *> Worklist;
319   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
320            vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) {
321     for (VPRecipeBase &R : *VPBB) {
322       auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
323       if (!RepR || !match(RepR->getUnderlyingInstr(),
324                           PatternMatch::m_Intrinsic<Intrinsic::assume>()))
325         continue;
326       Worklist.push_back(RepR);
327       EphRecipes.insert(RepR);
328     }
329   }
330 
331   // Process operands of candidates in worklist and add them to the set of
332   // ephemeral recipes, if they don't have side-effects and are only used by
333   // other ephemeral recipes.
334   while (!Worklist.empty()) {
335     VPRecipeBase *Cur = Worklist.pop_back_val();
336     for (VPValue *Op : Cur->operands()) {
337       auto *OpR = Op->getDefiningRecipe();
338       if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.contains(OpR))
339         continue;
340       if (any_of(Op->users(), [EphRecipes](VPUser *U) {
341             auto *UR = dyn_cast<VPRecipeBase>(U);
342             return !UR || !EphRecipes.contains(UR);
343           }))
344         continue;
345       EphRecipes.insert(OpR);
346       Worklist.push_back(OpR);
347     }
348   }
349 }
350 
351 template void DomTreeBuilder::Calculate<DominatorTreeBase<VPBlockBase, false>>(
352     DominatorTreeBase<VPBlockBase, false> &DT);
353 
properlyDominates(const VPRecipeBase * A,const VPRecipeBase * B)354 bool VPDominatorTree::properlyDominates(const VPRecipeBase *A,
355                                         const VPRecipeBase *B) {
356   if (A == B)
357     return false;
358 
359   auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
360     for (auto &R : *A->getParent()) {
361       if (&R == A)
362         return true;
363       if (&R == B)
364         return false;
365     }
366     llvm_unreachable("recipe not found");
367   };
368   const VPBlockBase *ParentA = A->getParent();
369   const VPBlockBase *ParentB = B->getParent();
370   if (ParentA == ParentB)
371     return LocalComesBefore(A, B);
372 
373 #ifndef NDEBUG
374   auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
375     auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
376     if (Region && Region->isReplicator()) {
377       assert(Region->getNumSuccessors() == 1 &&
378              Region->getNumPredecessors() == 1 && "Expected SESE region!");
379       assert(R->getParent()->size() == 1 &&
380              "A recipe in an original replicator region must be the only "
381              "recipe in its block");
382       return Region;
383     }
384     return nullptr;
385   };
386   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
387          "No replicate regions expected at this point");
388   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
389          "No replicate regions expected at this point");
390 #endif
391   return Base::properlyDominates(ParentA, ParentB);
392 }
393 
394 /// Get the VF scaling factor applied to the recipe's output, if the recipe has
395 /// one.
getVFScaleFactor(VPRecipeBase * R)396 static unsigned getVFScaleFactor(VPRecipeBase *R) {
397   if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R))
398     return RR->getVFScaleFactor();
399   if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
400     return RR->getVFScaleFactor();
401   assert(
402       (!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() !=
403                                      VPInstruction::ReductionStartVector) &&
404       "getting scaling factor of reduction-start-vector not implemented yet");
405   return 1;
406 }
407 
exceedsMaxNumRegs(const TargetTransformInfo & TTI) const408 bool VPRegisterUsage::exceedsMaxNumRegs(const TargetTransformInfo &TTI) const {
409   return any_of(MaxLocalUsers, [&TTI](auto &LU) {
410     return LU.second > TTI.getNumberOfRegisters(LU.first);
411   });
412 }
413 
calculateRegisterUsageForPlan(VPlan & Plan,ArrayRef<ElementCount> VFs,const TargetTransformInfo & TTI,const SmallPtrSetImpl<const Value * > & ValuesToIgnore)414 SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
415     VPlan &Plan, ArrayRef<ElementCount> VFs, const TargetTransformInfo &TTI,
416     const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
417   // Each 'key' in the map opens a new interval. The values
418   // of the map are the index of the 'last seen' usage of the
419   // recipe that is the key.
420   using IntervalMap = SmallDenseMap<VPRecipeBase *, unsigned, 16>;
421 
422   // Maps indices to recipes.
423   SmallVector<VPRecipeBase *, 64> Idx2Recipe;
424   // Marks the end of each interval.
425   IntervalMap EndPoint;
426   // Saves the list of recipe indices that are used in the loop.
427   SmallPtrSet<VPRecipeBase *, 8> Ends;
428   // Saves the list of values that are used in the loop but are defined outside
429   // the loop (not including non-recipe values such as arguments and
430   // constants).
431   SmallSetVector<VPValue *, 8> LoopInvariants;
432   LoopInvariants.insert(&Plan.getVectorTripCount());
433 
434   // We scan the loop in a topological order in order and assign a number to
435   // each recipe. We use RPO to ensure that defs are met before their users. We
436   // assume that each recipe that has in-loop users starts an interval. We
437   // record every time that an in-loop value is used, so we have a list of the
438   // first and last occurrences of each recipe.
439   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
440       Plan.getVectorLoopRegion());
441   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
442     if (!VPBB->getParent())
443       break;
444     for (VPRecipeBase &R : *VPBB) {
445       Idx2Recipe.push_back(&R);
446 
447       // Save the end location of each USE.
448       for (VPValue *U : R.operands()) {
449         auto *DefR = U->getDefiningRecipe();
450 
451         // Ignore non-recipe values such as arguments, constants, etc.
452         // FIXME: Might need some motivation why these values are ignored. If
453         // for example an argument is used inside the loop it will increase the
454         // register pressure (so shouldn't we add it to LoopInvariants).
455         if (!DefR && (!U->getLiveInIRValue() ||
456                       !isa<Instruction>(U->getLiveInIRValue())))
457           continue;
458 
459         // If this recipe is outside the loop then record it and continue.
460         if (!DefR) {
461           LoopInvariants.insert(U);
462           continue;
463         }
464 
465         // Overwrite previous end points.
466         EndPoint[DefR] = Idx2Recipe.size();
467         Ends.insert(DefR);
468       }
469     }
470     if (VPBB == Plan.getVectorLoopRegion()->getExiting()) {
471       // VPWidenIntOrFpInductionRecipes are used implicitly at the end of the
472       // exiting block, where their increment will get materialized eventually.
473       for (auto &R : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
474         if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
475           EndPoint[&R] = Idx2Recipe.size();
476           Ends.insert(&R);
477         }
478       }
479     }
480   }
481 
482   // Saves the list of intervals that end with the index in 'key'.
483   using RecipeList = SmallVector<VPRecipeBase *, 2>;
484   SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
485 
486   // Next, we transpose the EndPoints into a multi map that holds the list of
487   // intervals that *end* at a specific location.
488   for (auto &Interval : EndPoint)
489     TransposeEnds[Interval.second].push_back(Interval.first);
490 
491   SmallPtrSet<VPRecipeBase *, 8> OpenIntervals;
492   SmallVector<VPRegisterUsage, 8> RUs(VFs.size());
493   SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
494 
495   LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
496 
497   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
498 
499   const auto &TTICapture = TTI;
500   auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
501     if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty) ||
502         (VF.isScalable() &&
503          !TTICapture.isElementTypeLegalForScalableVector(Ty)))
504       return 0;
505     return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
506   };
507 
508   // We scan the instructions linearly and record each time that a new interval
509   // starts, by placing it in a set. If we find this value in TransposEnds then
510   // we remove it from the set. The max register usage is the maximum register
511   // usage of the recipes of the set.
512   for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
513     VPRecipeBase *R = Idx2Recipe[Idx];
514 
515     // Remove all of the recipes that end at this location.
516     RecipeList &List = TransposeEnds[Idx];
517     for (VPRecipeBase *ToRemove : List)
518       OpenIntervals.erase(ToRemove);
519 
520     // Ignore recipes that are never used within the loop and do not have side
521     // effects.
522     if (!Ends.count(R) && !R->mayHaveSideEffects())
523       continue;
524 
525     // Skip recipes for ignored values.
526     // TODO: Should mark recipes for ephemeral values that cannot be removed
527     // explictly in VPlan.
528     if (isa<VPSingleDefRecipe>(R) &&
529         ValuesToIgnore.contains(
530             cast<VPSingleDefRecipe>(R)->getUnderlyingValue()))
531       continue;
532 
533     // For each VF find the maximum usage of registers.
534     for (unsigned J = 0, E = VFs.size(); J < E; ++J) {
535       // Count the number of registers used, per register class, given all open
536       // intervals.
537       // Note that elements in this SmallMapVector will be default constructed
538       // as 0. So we can use "RegUsage[ClassID] += n" in the code below even if
539       // there is no previous entry for ClassID.
540       SmallMapVector<unsigned, unsigned, 4> RegUsage;
541 
542       for (auto *R : OpenIntervals) {
543         // Skip recipes that weren't present in the original loop.
544         // TODO: Remove after removing the legacy
545         // LoopVectorizationCostModel::calculateRegisterUsage
546         if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
547                 VPBranchOnMaskRecipe>(R))
548           continue;
549 
550         if (VFs[J].isScalar() ||
551             isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
552                 VPScalarIVStepsRecipe>(R) ||
553             (isa<VPInstruction>(R) &&
554              all_of(cast<VPSingleDefRecipe>(R)->users(),
555                     [&](VPUser *U) {
556                       return cast<VPRecipeBase>(U)->usesScalars(
557                           R->getVPSingleValue());
558                     })) ||
559             (isa<VPReductionPHIRecipe>(R) &&
560              (cast<VPReductionPHIRecipe>(R))->isInLoop())) {
561           unsigned ClassID = TTI.getRegisterClassForType(
562               false, TypeInfo.inferScalarType(R->getVPSingleValue()));
563           // FIXME: The target might use more than one register for the type
564           // even in the scalar case.
565           RegUsage[ClassID] += 1;
566         } else {
567           // The output from scaled phis and scaled reductions actually has
568           // fewer lanes than the VF.
569           unsigned ScaleFactor = getVFScaleFactor(R);
570           ElementCount VF = VFs[J].divideCoefficientBy(ScaleFactor);
571           LLVM_DEBUG(if (VF != VFs[J]) {
572             dbgs() << "LV(REG): Scaled down VF from " << VFs[J] << " to " << VF
573                    << " for " << *R << "\n";
574           });
575 
576           for (VPValue *DefV : R->definedValues()) {
577             Type *ScalarTy = TypeInfo.inferScalarType(DefV);
578             unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
579             RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
580           }
581         }
582       }
583 
584       for (const auto &Pair : RegUsage) {
585         auto &Entry = MaxUsages[J][Pair.first];
586         Entry = std::max(Entry, Pair.second);
587       }
588     }
589 
590     LLVM_DEBUG(dbgs() << "LV(REG): At #" << Idx << " Interval # "
591                       << OpenIntervals.size() << '\n');
592 
593     // Add the current recipe to the list of open intervals.
594     OpenIntervals.insert(R);
595   }
596 
597   // We also search for instructions that are defined outside the loop, but are
598   // used inside the loop. We need this number separately from the max-interval
599   // usage number because when we unroll, loop-invariant values do not take
600   // more register.
601   VPRegisterUsage RU;
602   for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
603     // Note that elements in this SmallMapVector will be default constructed
604     // as 0. So we can use "Invariant[ClassID] += n" in the code below even if
605     // there is no previous entry for ClassID.
606     SmallMapVector<unsigned, unsigned, 4> Invariant;
607 
608     for (auto *In : LoopInvariants) {
609       // FIXME: The target might use more than one register for the type
610       // even in the scalar case.
611       bool IsScalar = all_of(In->users(), [&](VPUser *U) {
612         return cast<VPRecipeBase>(U)->usesScalars(In);
613       });
614 
615       ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[Idx];
616       unsigned ClassID = TTI.getRegisterClassForType(
617           VF.isVector(), TypeInfo.inferScalarType(In));
618       Invariant[ClassID] += GetRegUsage(TypeInfo.inferScalarType(In), VF);
619     }
620 
621     LLVM_DEBUG({
622       dbgs() << "LV(REG): VF = " << VFs[Idx] << '\n';
623       dbgs() << "LV(REG): Found max usage: " << MaxUsages[Idx].size()
624              << " item\n";
625       for (const auto &pair : MaxUsages[Idx]) {
626         dbgs() << "LV(REG): RegisterClass: "
627                << TTI.getRegisterClassName(pair.first) << ", " << pair.second
628                << " registers\n";
629       }
630       dbgs() << "LV(REG): Found invariant usage: " << Invariant.size()
631              << " item\n";
632       for (const auto &pair : Invariant) {
633         dbgs() << "LV(REG): RegisterClass: "
634                << TTI.getRegisterClassName(pair.first) << ", " << pair.second
635                << " registers\n";
636       }
637     });
638 
639     RU.LoopInvariantRegs = Invariant;
640     RU.MaxLocalUsers = MaxUsages[Idx];
641     RUs[Idx] = RU;
642   }
643 
644   return RUs;
645 }
646