xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include <queue>
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "riscv-insert-vsetvli"
35 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
36 
37 static cl::opt<bool> DisableInsertVSETVLPHIOpt(
38     "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
39     cl::desc("Disable looking through phis when inserting vsetvlis."));
40 
41 static cl::opt<bool> UseStrictAsserts(
42     "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
43     cl::desc("Enable strict assertion checking for the dataflow algorithm"));
44 
45 namespace {
46 
47 static unsigned getVLOpNum(const MachineInstr &MI) {
48   return RISCVII::getVLOpNum(MI.getDesc());
49 }
50 
51 static unsigned getSEWOpNum(const MachineInstr &MI) {
52   return RISCVII::getSEWOpNum(MI.getDesc());
53 }
54 
55 static bool isVectorConfigInstr(const MachineInstr &MI) {
56   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
57          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
58          MI.getOpcode() == RISCV::PseudoVSETIVLI;
59 }
60 
61 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
62 /// VL and only sets VTYPE.
63 static bool isVLPreservingConfig(const MachineInstr &MI) {
64   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
65     return false;
66   assert(RISCV::X0 == MI.getOperand(1).getReg());
67   return RISCV::X0 == MI.getOperand(0).getReg();
68 }
69 
70 static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
71   const RISCVVPseudosTable::PseudoInfo *RVV =
72       RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
73   if (!RVV)
74     return 0;
75   return RVV->BaseInstr;
76 }
77 
78 static bool isScalarMoveInstr(const MachineInstr &MI) {
79   switch (getRVVMCOpcode(MI.getOpcode())) {
80   default:
81     return false;
82   case RISCV::VMV_S_X:
83   case RISCV::VFMV_S_F:
84     return true;
85   }
86 }
87 
88 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
89 /// not a load or store which ignores SEW.
90 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
91   switch (getRVVMCOpcode(MI.getOpcode())) {
92   default:
93     return std::nullopt;
94   case RISCV::VLE8_V:
95   case RISCV::VLSE8_V:
96   case RISCV::VSE8_V:
97   case RISCV::VSSE8_V:
98     return 8;
99   case RISCV::VLE16_V:
100   case RISCV::VLSE16_V:
101   case RISCV::VSE16_V:
102   case RISCV::VSSE16_V:
103     return 16;
104   case RISCV::VLE32_V:
105   case RISCV::VLSE32_V:
106   case RISCV::VSE32_V:
107   case RISCV::VSSE32_V:
108     return 32;
109   case RISCV::VLE64_V:
110   case RISCV::VLSE64_V:
111   case RISCV::VSE64_V:
112   case RISCV::VSSE64_V:
113     return 64;
114   }
115 }
116 
117 /// Return true if this is an operation on mask registers.  Note that
118 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
119 static bool isMaskRegOp(const MachineInstr &MI) {
120   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
121     return false;
122   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
123   // A Log2SEW of 0 is an operation on mask registers only.
124   return Log2SEW == 0;
125 }
126 
127 /// Which subfields of VL or VTYPE have values we need to preserve?
128 struct DemandedFields {
129   // Some unknown property of VL is used.  If demanded, must preserve entire
130   // value.
131   bool VLAny = false;
132   // Only zero vs non-zero is used. If demanded, can change non-zero values.
133   bool VLZeroness = false;
134   bool SEW = false;
135   bool LMUL = false;
136   bool SEWLMULRatio = false;
137   bool TailPolicy = false;
138   bool MaskPolicy = false;
139 
140   // Return true if any part of VTYPE was used
141   bool usedVTYPE() const {
142     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
143   }
144 
145   // Return true if any property of VL was used
146   bool usedVL() {
147     return VLAny || VLZeroness;
148   }
149 
150   // Mark all VTYPE subfields and properties as demanded
151   void demandVTYPE() {
152     SEW = true;
153     LMUL = true;
154     SEWLMULRatio = true;
155     TailPolicy = true;
156     MaskPolicy = true;
157   }
158 
159   // Mark all VL properties as demanded
160   void demandVL() {
161     VLAny = true;
162     VLZeroness = true;
163   }
164 
165 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
166   /// Support for debugging, callable in GDB: V->dump()
167   LLVM_DUMP_METHOD void dump() const {
168     print(dbgs());
169     dbgs() << "\n";
170   }
171 
172   /// Implement operator<<.
173   void print(raw_ostream &OS) const {
174     OS << "{";
175     OS << "VLAny=" << VLAny << ", ";
176     OS << "VLZeroness=" << VLZeroness << ", ";
177     OS << "SEW=" << SEW << ", ";
178     OS << "LMUL=" << LMUL << ", ";
179     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
180     OS << "TailPolicy=" << TailPolicy << ", ";
181     OS << "MaskPolicy=" << MaskPolicy;
182     OS << "}";
183   }
184 #endif
185 };
186 
187 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
188 LLVM_ATTRIBUTE_USED
189 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
190   DF.print(OS);
191   return OS;
192 }
193 #endif
194 
195 
196 /// Return true if the two values of the VTYPE register provided are
197 /// indistinguishable from the perspective of an instruction (or set of
198 /// instructions) which use only the Used subfields and properties.
199 static bool areCompatibleVTYPEs(uint64_t VType1,
200                                 uint64_t VType2,
201                                 const DemandedFields &Used) {
202   if (Used.SEW &&
203       RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
204     return false;
205 
206   if (Used.LMUL &&
207       RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
208     return false;
209 
210   if (Used.SEWLMULRatio) {
211     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
212                                               RISCVVType::getVLMUL(VType1));
213     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
214                                               RISCVVType::getVLMUL(VType2));
215     if (Ratio1 != Ratio2)
216       return false;
217   }
218 
219   if (Used.TailPolicy &&
220       RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
221     return false;
222   if (Used.MaskPolicy &&
223       RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
224     return false;
225   return true;
226 }
227 
228 /// Return the fields and properties demanded by the provided instruction.
229 static DemandedFields getDemanded(const MachineInstr &MI) {
230   // Warning: This function has to work on both the lowered (i.e. post
231   // emitVSETVLIs) and pre-lowering forms.  The main implication of this is
232   // that it can't use the value of a SEW, VL, or Policy operand as they might
233   // be stale after lowering.
234 
235   // Most instructions don't use any of these subfeilds.
236   DemandedFields Res;
237   // Start conservative if registers are used
238   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
239     Res.demandVL();;
240   if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
241     Res.demandVTYPE();
242   // Start conservative on the unlowered form too
243   uint64_t TSFlags = MI.getDesc().TSFlags;
244   if (RISCVII::hasSEWOp(TSFlags)) {
245     Res.demandVTYPE();
246     if (RISCVII::hasVLOp(TSFlags))
247       Res.demandVL();
248 
249     // Behavior is independent of mask policy.
250     if (!RISCVII::usesMaskPolicy(TSFlags))
251       Res.MaskPolicy = false;
252   }
253 
254   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
255   // They instead demand the ratio of the two which is used in computing
256   // EMUL, but which allows us the flexibility to change SEW and LMUL
257   // provided we don't change the ratio.
258   // Note: We assume that the instructions initial SEW is the EEW encoded
259   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
260   if (getEEWForLoadStore(MI)) {
261     Res.SEW = false;
262     Res.LMUL = false;
263   }
264 
265   // Store instructions don't use the policy fields.
266   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
267     Res.TailPolicy = false;
268     Res.MaskPolicy = false;
269   }
270 
271   // If this is a mask reg operation, it only cares about VLMAX.
272   // TODO: Possible extensions to this logic
273   // * Probably ok if available VLMax is larger than demanded
274   // * The policy bits can probably be ignored..
275   if (isMaskRegOp(MI)) {
276     Res.SEW = false;
277     Res.LMUL = false;
278   }
279 
280   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
281   if (isScalarMoveInstr(MI)) {
282     Res.LMUL = false;
283     Res.SEWLMULRatio = false;
284     Res.VLAny = false;
285   }
286 
287   return Res;
288 }
289 
290 /// Defines the abstract state with which the forward dataflow models the
291 /// values of the VL and VTYPE registers after insertion.
292 class VSETVLIInfo {
293   union {
294     Register AVLReg;
295     unsigned AVLImm;
296   };
297 
298   enum : uint8_t {
299     Uninitialized,
300     AVLIsReg,
301     AVLIsImm,
302     Unknown,
303   } State = Uninitialized;
304 
305   // Fields from VTYPE.
306   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
307   uint8_t SEW = 0;
308   uint8_t TailAgnostic : 1;
309   uint8_t MaskAgnostic : 1;
310   uint8_t SEWLMULRatioOnly : 1;
311 
312 public:
313   VSETVLIInfo()
314       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
315         SEWLMULRatioOnly(false) {}
316 
317   static VSETVLIInfo getUnknown() {
318     VSETVLIInfo Info;
319     Info.setUnknown();
320     return Info;
321   }
322 
323   bool isValid() const { return State != Uninitialized; }
324   void setUnknown() { State = Unknown; }
325   bool isUnknown() const { return State == Unknown; }
326 
327   void setAVLReg(Register Reg) {
328     AVLReg = Reg;
329     State = AVLIsReg;
330   }
331 
332   void setAVLImm(unsigned Imm) {
333     AVLImm = Imm;
334     State = AVLIsImm;
335   }
336 
337   bool hasAVLImm() const { return State == AVLIsImm; }
338   bool hasAVLReg() const { return State == AVLIsReg; }
339   Register getAVLReg() const {
340     assert(hasAVLReg());
341     return AVLReg;
342   }
343   unsigned getAVLImm() const {
344     assert(hasAVLImm());
345     return AVLImm;
346   }
347 
348   unsigned getSEW() const { return SEW; }
349   RISCVII::VLMUL getVLMUL() const { return VLMul; }
350 
351   bool hasNonZeroAVL() const {
352     if (hasAVLImm())
353       return getAVLImm() > 0;
354     if (hasAVLReg())
355       return getAVLReg() == RISCV::X0;
356     return false;
357   }
358 
359   bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
360     if (hasSameAVL(Other))
361       return true;
362     return (hasNonZeroAVL() && Other.hasNonZeroAVL());
363   }
364 
365   bool hasSameAVL(const VSETVLIInfo &Other) const {
366     if (hasAVLReg() && Other.hasAVLReg())
367       return getAVLReg() == Other.getAVLReg();
368 
369     if (hasAVLImm() && Other.hasAVLImm())
370       return getAVLImm() == Other.getAVLImm();
371 
372     return false;
373   }
374 
375   void setVTYPE(unsigned VType) {
376     assert(isValid() && !isUnknown() &&
377            "Can't set VTYPE for uninitialized or unknown");
378     VLMul = RISCVVType::getVLMUL(VType);
379     SEW = RISCVVType::getSEW(VType);
380     TailAgnostic = RISCVVType::isTailAgnostic(VType);
381     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
382   }
383   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
384     assert(isValid() && !isUnknown() &&
385            "Can't set VTYPE for uninitialized or unknown");
386     VLMul = L;
387     SEW = S;
388     TailAgnostic = TA;
389     MaskAgnostic = MA;
390   }
391 
392   unsigned encodeVTYPE() const {
393     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
394            "Can't encode VTYPE for uninitialized or unknown");
395     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
396   }
397 
398   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
399 
400   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
401     assert(isValid() && Other.isValid() &&
402            "Can't compare invalid VSETVLIInfos");
403     assert(!isUnknown() && !Other.isUnknown() &&
404            "Can't compare VTYPE in unknown state");
405     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
406            "Can't compare when only LMUL/SEW ratio is valid.");
407     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
408            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
409                     Other.MaskAgnostic);
410   }
411 
412   unsigned getSEWLMULRatio() const {
413     assert(isValid() && !isUnknown() &&
414            "Can't use VTYPE for uninitialized or unknown");
415     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
416   }
417 
418   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
419   // Note that having the same VLMAX ensures that both share the same
420   // function from AVL to VL; that is, they must produce the same VL value
421   // for any given AVL value.
422   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
423     assert(isValid() && Other.isValid() &&
424            "Can't compare invalid VSETVLIInfos");
425     assert(!isUnknown() && !Other.isUnknown() &&
426            "Can't compare VTYPE in unknown state");
427     return getSEWLMULRatio() == Other.getSEWLMULRatio();
428   }
429 
430   bool hasCompatibleVTYPE(const DemandedFields &Used,
431                           const VSETVLIInfo &Require) const {
432     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
433   }
434 
435   // Determine whether the vector instructions requirements represented by
436   // Require are compatible with the previous vsetvli instruction represented
437   // by this.  MI is the instruction whose requirements we're considering.
438   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const {
439     assert(isValid() && Require.isValid() &&
440            "Can't compare invalid VSETVLIInfos");
441     assert(!Require.SEWLMULRatioOnly &&
442            "Expected a valid VTYPE for instruction!");
443     // Nothing is compatible with Unknown.
444     if (isUnknown() || Require.isUnknown())
445       return false;
446 
447     // If only our VLMAX ratio is valid, then this isn't compatible.
448     if (SEWLMULRatioOnly)
449       return false;
450 
451     // If the instruction doesn't need an AVLReg and the SEW matches, consider
452     // it compatible.
453     if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
454       if (SEW == Require.SEW)
455         return true;
456 
457     if (Used.VLAny && !hasSameAVL(Require))
458       return false;
459 
460     if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
461       return false;
462 
463     return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
464   }
465 
466   bool operator==(const VSETVLIInfo &Other) const {
467     // Uninitialized is only equal to another Uninitialized.
468     if (!isValid())
469       return !Other.isValid();
470     if (!Other.isValid())
471       return !isValid();
472 
473     // Unknown is only equal to another Unknown.
474     if (isUnknown())
475       return Other.isUnknown();
476     if (Other.isUnknown())
477       return isUnknown();
478 
479     if (!hasSameAVL(Other))
480       return false;
481 
482     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
483     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
484       return false;
485 
486     // If only the VLMAX is valid, check that it is the same.
487     if (SEWLMULRatioOnly)
488       return hasSameVLMAX(Other);
489 
490     // If the full VTYPE is valid, check that it is the same.
491     return hasSameVTYPE(Other);
492   }
493 
494   bool operator!=(const VSETVLIInfo &Other) const {
495     return !(*this == Other);
496   }
497 
498   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
499   // both predecessors.
500   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
501     // If the new value isn't valid, ignore it.
502     if (!Other.isValid())
503       return *this;
504 
505     // If this value isn't valid, this must be the first predecessor, use it.
506     if (!isValid())
507       return Other;
508 
509     // If either is unknown, the result is unknown.
510     if (isUnknown() || Other.isUnknown())
511       return VSETVLIInfo::getUnknown();
512 
513     // If we have an exact, match return this.
514     if (*this == Other)
515       return *this;
516 
517     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
518     // return an SEW/LMUL ratio only value.
519     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
520       VSETVLIInfo MergeInfo = *this;
521       MergeInfo.SEWLMULRatioOnly = true;
522       return MergeInfo;
523     }
524 
525     // Otherwise the result is unknown.
526     return VSETVLIInfo::getUnknown();
527   }
528 
529 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
530   /// Support for debugging, callable in GDB: V->dump()
531   LLVM_DUMP_METHOD void dump() const {
532     print(dbgs());
533     dbgs() << "\n";
534   }
535 
536   /// Implement operator<<.
537   /// @{
538   void print(raw_ostream &OS) const {
539     OS << "{";
540     if (!isValid())
541       OS << "Uninitialized";
542     if (isUnknown())
543       OS << "unknown";
544     if (hasAVLReg())
545       OS << "AVLReg=" << (unsigned)AVLReg;
546     if (hasAVLImm())
547       OS << "AVLImm=" << (unsigned)AVLImm;
548     OS << ", "
549        << "VLMul=" << (unsigned)VLMul << ", "
550        << "SEW=" << (unsigned)SEW << ", "
551        << "TailAgnostic=" << (bool)TailAgnostic << ", "
552        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
553        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
554   }
555 #endif
556 };
557 
558 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
559 LLVM_ATTRIBUTE_USED
560 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
561   V.print(OS);
562   return OS;
563 }
564 #endif
565 
566 struct BlockData {
567   // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
568   // made by this block. Calculated in Phase 1.
569   VSETVLIInfo Change;
570 
571   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
572   // block. Calculated in Phase 2.
573   VSETVLIInfo Exit;
574 
575   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
576   // blocks. Calculated in Phase 2, and used by Phase 3.
577   VSETVLIInfo Pred;
578 
579   // Keeps track of whether the block is already in the queue.
580   bool InQueue = false;
581 
582   BlockData() = default;
583 };
584 
585 class RISCVInsertVSETVLI : public MachineFunctionPass {
586   const TargetInstrInfo *TII;
587   MachineRegisterInfo *MRI;
588 
589   std::vector<BlockData> BlockInfo;
590   std::queue<const MachineBasicBlock *> WorkList;
591 
592 public:
593   static char ID;
594 
595   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
596     initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
597   }
598   bool runOnMachineFunction(MachineFunction &MF) override;
599 
600   void getAnalysisUsage(AnalysisUsage &AU) const override {
601     AU.setPreservesCFG();
602     MachineFunctionPass::getAnalysisUsage(AU);
603   }
604 
605   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
606 
607 private:
608   bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
609                    const VSETVLIInfo &CurInfo) const;
610   bool needVSETVLIPHI(const VSETVLIInfo &Require,
611                       const MachineBasicBlock &MBB) const;
612   void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
613                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
614   void insertVSETVLI(MachineBasicBlock &MBB,
615                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
616                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
617 
618   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
619   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
620   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
621   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
622   void emitVSETVLIs(MachineBasicBlock &MBB);
623   void doLocalPostpass(MachineBasicBlock &MBB);
624   void doPRE(MachineBasicBlock &MBB);
625   void insertReadVL(MachineBasicBlock &MBB);
626 };
627 
628 } // end anonymous namespace
629 
630 char RISCVInsertVSETVLI::ID = 0;
631 
632 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
633                 false, false)
634 
635 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
636                                        const MachineRegisterInfo *MRI) {
637   VSETVLIInfo InstrInfo;
638 
639   bool TailAgnostic, MaskAgnostic;
640   unsigned UseOpIdx;
641   if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
642     // Start with undisturbed.
643     TailAgnostic = false;
644     MaskAgnostic = false;
645 
646     // If there is a policy operand, use it.
647     if (RISCVII::hasVecPolicyOp(TSFlags)) {
648       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
649       uint64_t Policy = Op.getImm();
650       assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
651              "Invalid Policy Value");
652       TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
653       MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
654     }
655 
656     // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
657     // MaskAgnostic.
658     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
659     MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
660     if (UseMI && UseMI->isImplicitDef()) {
661       TailAgnostic = true;
662       MaskAgnostic = true;
663     }
664     // Some pseudo instructions force a tail agnostic policy despite having a
665     // tied def.
666     if (RISCVII::doesForceTailAgnostic(TSFlags))
667       TailAgnostic = true;
668 
669     if (!RISCVII::usesMaskPolicy(TSFlags))
670       MaskAgnostic = true;
671   } else {
672     // If there is no tied operand,, there shouldn't be a policy operand.
673     assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
674     // No tied operand use agnostic policies.
675     TailAgnostic = true;
676     MaskAgnostic = true;
677   }
678 
679   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
680 
681   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
682   // A Log2SEW of 0 is an operation on mask registers only.
683   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
684   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
685 
686   if (RISCVII::hasVLOp(TSFlags)) {
687     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
688     if (VLOp.isImm()) {
689       int64_t Imm = VLOp.getImm();
690       // Conver the VLMax sentintel to X0 register.
691       if (Imm == RISCV::VLMaxSentinel)
692         InstrInfo.setAVLReg(RISCV::X0);
693       else
694         InstrInfo.setAVLImm(Imm);
695     } else {
696       InstrInfo.setAVLReg(VLOp.getReg());
697     }
698   } else {
699     InstrInfo.setAVLReg(RISCV::NoRegister);
700   }
701 #ifndef NDEBUG
702   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
703     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
704   }
705 #endif
706   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
707 
708   return InstrInfo;
709 }
710 
711 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
712                                        const VSETVLIInfo &Info,
713                                        const VSETVLIInfo &PrevInfo) {
714   DebugLoc DL = MI.getDebugLoc();
715   insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
716 }
717 
718 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
719                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
720                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
721 
722   // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
723   // VLMAX.
724   if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
725       Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
726     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
727         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
728         .addReg(RISCV::X0, RegState::Kill)
729         .addImm(Info.encodeVTYPE())
730         .addReg(RISCV::VL, RegState::Implicit);
731     return;
732   }
733 
734   if (Info.hasAVLImm()) {
735     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
736         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
737         .addImm(Info.getAVLImm())
738         .addImm(Info.encodeVTYPE());
739     return;
740   }
741 
742   Register AVLReg = Info.getAVLReg();
743   if (AVLReg == RISCV::NoRegister) {
744     // We can only use x0, x0 if there's no chance of the vtype change causing
745     // the previous vl to become invalid.
746     if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
747         Info.hasSameVLMAX(PrevInfo)) {
748       BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
749           .addReg(RISCV::X0, RegState::Define | RegState::Dead)
750           .addReg(RISCV::X0, RegState::Kill)
751           .addImm(Info.encodeVTYPE())
752           .addReg(RISCV::VL, RegState::Implicit);
753       return;
754     }
755     // Otherwise use an AVL of 0 to avoid depending on previous vl.
756     BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
757         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
758         .addImm(0)
759         .addImm(Info.encodeVTYPE());
760     return;
761   }
762 
763   if (AVLReg.isVirtual())
764     MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
765 
766   // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
767   // opcode if the AVLReg is X0 as they have different register classes for
768   // the AVL operand.
769   Register DestReg = RISCV::X0;
770   unsigned Opcode = RISCV::PseudoVSETVLI;
771   if (AVLReg == RISCV::X0) {
772     DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
773     Opcode = RISCV::PseudoVSETVLIX0;
774   }
775   BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
776       .addReg(DestReg, RegState::Define | RegState::Dead)
777       .addReg(AVLReg)
778       .addImm(Info.encodeVTYPE());
779 }
780 
781 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
782 // VSETIVLI instruction.
783 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
784   VSETVLIInfo NewInfo;
785   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
786     NewInfo.setAVLImm(MI.getOperand(1).getImm());
787   } else {
788     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
789            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
790     Register AVLReg = MI.getOperand(1).getReg();
791     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
792            "Can't handle X0, X0 vsetvli yet");
793     NewInfo.setAVLReg(AVLReg);
794   }
795   NewInfo.setVTYPE(MI.getOperand(2).getImm());
796 
797   return NewInfo;
798 }
799 
800 /// Return true if a VSETVLI is required to transition from CurInfo to Require
801 /// before MI.
802 bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
803                                      const VSETVLIInfo &Require,
804                                      const VSETVLIInfo &CurInfo) const {
805   assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
806 
807   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
808     return true;
809 
810   DemandedFields Used = getDemanded(MI);
811 
812   if (isScalarMoveInstr(MI)) {
813     // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
814     // need to preserve any other bits and are thus compatible with any larger,
815     // etype and can disregard policy bits.  Warning: It's tempting to try doing
816     // this for any tail agnostic operation, but we can't as TA requires
817     // tail lanes to either be the original value or -1.  We are writing
818     // unknown bits to the lanes here.
819     auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
820     if (VRegDef && VRegDef->isImplicitDef() &&
821         CurInfo.getSEW() >= Require.getSEW()) {
822       Used.SEW = false;
823       Used.TailPolicy = false;
824     }
825   }
826 
827   if (CurInfo.isCompatible(Used, Require))
828     return false;
829 
830   // We didn't find a compatible value. If our AVL is a virtual register,
831   // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
832   // and the last VL/VTYPE we observed is the same, we don't need a
833   // VSETVLI here.
834   if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
835       CurInfo.hasCompatibleVTYPE(Used, Require)) {
836     if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
837       if (isVectorConfigInstr(*DefMI)) {
838         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
839         if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
840           return false;
841       }
842     }
843   }
844 
845   return true;
846 }
847 
848 // Given an incoming state reaching MI, modifies that state so that it is minimally
849 // compatible with MI.  The resulting state is guaranteed to be semantically legal
850 // for MI, but may not be the state requested by MI.
851 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
852   uint64_t TSFlags = MI.getDesc().TSFlags;
853   if (!RISCVII::hasSEWOp(TSFlags))
854     return;
855 
856   const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
857   if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
858     return;
859 
860   const VSETVLIInfo PrevInfo = Info;
861   Info = NewInfo;
862 
863   if (!RISCVII::hasVLOp(TSFlags))
864     return;
865 
866   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
867   // VL > 0. We can discard the user requested AVL and just use the last
868   // one if we can prove it equally zero.  This removes a vsetvli entirely
869   // if the types match or allows use of cheaper avl preserving variant
870   // if VLMAX doesn't change.  If VLMAX might change, we couldn't use
871   // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
872   // prevent extending live range of an avl register operand.
873   // TODO: We can probably relax this for immediates.
874   if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
875       PrevInfo.hasEquallyZeroAVL(Info) &&
876       Info.hasSameVLMAX(PrevInfo)) {
877     if (PrevInfo.hasAVLImm())
878       Info.setAVLImm(PrevInfo.getAVLImm());
879     else
880       Info.setAVLReg(PrevInfo.getAVLReg());
881     return;
882   }
883 
884   // If AVL is defined by a vsetvli with the same VLMAX, we can
885   // replace the AVL operand with the AVL of the defining vsetvli.
886   // We avoid general register AVLs to avoid extending live ranges
887   // without being sure we can kill the original source reg entirely.
888   if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
889     return;
890   MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
891   if (!DefMI || !isVectorConfigInstr(*DefMI))
892     return;
893 
894   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
895   if (DefInfo.hasSameVLMAX(Info) &&
896       (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
897     if (DefInfo.hasAVLImm())
898       Info.setAVLImm(DefInfo.getAVLImm());
899     else
900       Info.setAVLReg(DefInfo.getAVLReg());
901     return;
902   }
903 }
904 
905 // Given a state with which we evaluated MI (see transferBefore above for why
906 // this might be different that the state MI requested), modify the state to
907 // reflect the changes MI might make.
908 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
909   if (isVectorConfigInstr(MI)) {
910     Info = getInfoForVSETVLI(MI);
911     return;
912   }
913 
914   if (RISCV::isFaultFirstLoad(MI)) {
915     // Update AVL to vl-output of the fault first load.
916     Info.setAVLReg(MI.getOperand(1).getReg());
917     return;
918   }
919 
920   // If this is something that updates VL/VTYPE that we don't know about, set
921   // the state to unknown.
922   if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
923       MI.modifiesRegister(RISCV::VTYPE))
924     Info = VSETVLIInfo::getUnknown();
925 }
926 
927 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
928   bool HadVectorOp = false;
929 
930   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
931   BBInfo.Change = BBInfo.Pred;
932   for (const MachineInstr &MI : MBB) {
933     transferBefore(BBInfo.Change, MI);
934 
935     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
936       HadVectorOp = true;
937 
938     transferAfter(BBInfo.Change, MI);
939   }
940 
941   return HadVectorOp;
942 }
943 
944 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
945 
946   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
947 
948   BBInfo.InQueue = false;
949 
950   // Start with the previous entry so that we keep the most conservative state
951   // we have ever found.
952   VSETVLIInfo InInfo = BBInfo.Pred;
953   if (MBB.pred_empty()) {
954     // There are no predecessors, so use the default starting status.
955     InInfo.setUnknown();
956   } else {
957     for (MachineBasicBlock *P : MBB.predecessors())
958       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
959   }
960 
961   // If we don't have any valid predecessor value, wait until we do.
962   if (!InInfo.isValid())
963     return;
964 
965   // If no change, no need to rerun block
966   if (InInfo == BBInfo.Pred)
967     return;
968 
969   BBInfo.Pred = InInfo;
970   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
971                     << " changed to " << BBInfo.Pred << "\n");
972 
973   // Note: It's tempting to cache the state changes here, but due to the
974   // compatibility checks performed a blocks output state can change based on
975   // the input state.  To cache, we'd have to add logic for finding
976   // never-compatible state changes.
977   computeVLVTYPEChanges(MBB);
978   VSETVLIInfo TmpStatus = BBInfo.Change;
979 
980   // If the new exit value matches the old exit value, we don't need to revisit
981   // any blocks.
982   if (BBInfo.Exit == TmpStatus)
983     return;
984 
985   BBInfo.Exit = TmpStatus;
986   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
987                     << " changed to " << BBInfo.Exit << "\n");
988 
989   // Add the successors to the work list so we can propagate the changed exit
990   // status.
991   for (MachineBasicBlock *S : MBB.successors())
992     if (!BlockInfo[S->getNumber()].InQueue) {
993       BlockInfo[S->getNumber()].InQueue = true;
994       WorkList.push(S);
995     }
996 }
997 
998 // If we weren't able to prove a vsetvli was directly unneeded, it might still
999 // be unneeded if the AVL is a phi node where all incoming values are VL
1000 // outputs from the last VSETVLI in their respective basic blocks.
1001 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1002                                         const MachineBasicBlock &MBB) const {
1003   if (DisableInsertVSETVLPHIOpt)
1004     return true;
1005 
1006   if (!Require.hasAVLReg())
1007     return true;
1008 
1009   Register AVLReg = Require.getAVLReg();
1010   if (!AVLReg.isVirtual())
1011     return true;
1012 
1013   // We need the AVL to be produce by a PHI node in this basic block.
1014   MachineInstr *PHI = MRI->getVRegDef(AVLReg);
1015   if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1016     return true;
1017 
1018   for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1019        PHIOp += 2) {
1020     Register InReg = PHI->getOperand(PHIOp).getReg();
1021     MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
1022     const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1023     // If the exit from the predecessor has the VTYPE we are looking for
1024     // we might be able to avoid a VSETVLI.
1025     if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
1026       return true;
1027 
1028     // We need the PHI input to the be the output of a VSET(I)VLI.
1029     MachineInstr *DefMI = MRI->getVRegDef(InReg);
1030     if (!DefMI || !isVectorConfigInstr(*DefMI))
1031       return true;
1032 
1033     // We found a VSET(I)VLI make sure it matches the output of the
1034     // predecessor block.
1035     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1036     if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
1037         !DefInfo.hasSameVTYPE(PBBInfo.Exit))
1038       return true;
1039   }
1040 
1041   // If all the incoming values to the PHI checked out, we don't need
1042   // to insert a VSETVLI.
1043   return false;
1044 }
1045 
1046 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1047   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1048   // Track whether the prefix of the block we've scanned is transparent
1049   // (meaning has not yet changed the abstract state).
1050   bool PrefixTransparent = true;
1051   for (MachineInstr &MI : MBB) {
1052     const VSETVLIInfo PrevInfo = CurInfo;
1053     transferBefore(CurInfo, MI);
1054 
1055     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1056     if (isVectorConfigInstr(MI)) {
1057       // Conservatively, mark the VL and VTYPE as live.
1058       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1059              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1060              "Unexpected operands where VL and VTYPE should be");
1061       MI.getOperand(3).setIsDead(false);
1062       MI.getOperand(4).setIsDead(false);
1063       PrefixTransparent = false;
1064     }
1065 
1066     uint64_t TSFlags = MI.getDesc().TSFlags;
1067     if (RISCVII::hasSEWOp(TSFlags)) {
1068       if (PrevInfo != CurInfo) {
1069         // If this is the first implicit state change, and the state change
1070         // requested can be proven to produce the same register contents, we
1071         // can skip emitting the actual state change and continue as if we
1072         // had since we know the GPR result of the implicit state change
1073         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1074         // we *do* need to model the state as if it changed as while the
1075         // register contents are unchanged, the abstract model can change.
1076         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1077           insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
1078         PrefixTransparent = false;
1079       }
1080 
1081       if (RISCVII::hasVLOp(TSFlags)) {
1082         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1083         if (VLOp.isReg()) {
1084           // Erase the AVL operand from the instruction.
1085           VLOp.setReg(RISCV::NoRegister);
1086           VLOp.setIsKill(false);
1087         }
1088         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1089                                                 /*isImp*/ true));
1090       }
1091       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1092                                               /*isImp*/ true));
1093     }
1094 
1095     if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
1096         MI.modifiesRegister(RISCV::VTYPE))
1097       PrefixTransparent = false;
1098 
1099     transferAfter(CurInfo, MI);
1100   }
1101 
1102   // If we reach the end of the block and our current info doesn't match the
1103   // expected info, insert a vsetvli to correct.
1104   if (!UseStrictAsserts) {
1105     const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1106     if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1107         CurInfo != ExitInfo) {
1108       // Note there's an implicit assumption here that terminators never use
1109       // or modify VL or VTYPE.  Also, fallthrough will return end().
1110       auto InsertPt = MBB.getFirstInstrTerminator();
1111       insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
1112                     CurInfo);
1113       CurInfo = ExitInfo;
1114     }
1115   }
1116 
1117   if (UseStrictAsserts && CurInfo.isValid()) {
1118     const auto &Info = BlockInfo[MBB.getNumber()];
1119     if (CurInfo != Info.Exit) {
1120       LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1121       LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1122       LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1123       LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1124     }
1125     assert(CurInfo == Info.Exit &&
1126            "InsertVSETVLI dataflow invariant violated");
1127   }
1128 }
1129 
1130 /// Return true if the VL value configured must be equal to the requested one.
1131 static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1132   if (!Info.hasAVLImm())
1133     // VLMAX is always the same value.
1134     // TODO: Could extend to other registers by looking at the associated vreg
1135     // def placement.
1136     return RISCV::X0 == Info.getAVLReg();
1137 
1138   unsigned AVL = Info.getAVLImm();
1139   unsigned SEW = Info.getSEW();
1140   unsigned AVLInBits = AVL * SEW;
1141 
1142   unsigned LMul;
1143   bool Fractional;
1144   std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1145 
1146   if (Fractional)
1147     return ST.getRealMinVLen() / LMul >= AVLInBits;
1148   return ST.getRealMinVLen() * LMul >= AVLInBits;
1149 }
1150 
1151 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1152 /// we're about to insert by looking for cases where we can PRE from the
1153 /// beginning of one block to the end of one of its predecessors.  Specifically,
1154 /// this is geared to catch the common case of a fixed length vsetvl in a single
1155 /// block loop when it could execute once in the preheader instead.
1156 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1157   const MachineFunction &MF = *MBB.getParent();
1158   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1159 
1160   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1161     return;
1162 
1163   MachineBasicBlock *UnavailablePred = nullptr;
1164   VSETVLIInfo AvailableInfo;
1165   for (MachineBasicBlock *P : MBB.predecessors()) {
1166     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1167     if (PredInfo.isUnknown()) {
1168       if (UnavailablePred)
1169         return;
1170       UnavailablePred = P;
1171     } else if (!AvailableInfo.isValid()) {
1172       AvailableInfo = PredInfo;
1173     } else if (AvailableInfo != PredInfo) {
1174       return;
1175     }
1176   }
1177 
1178   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1179   // phase 3.
1180   if (!UnavailablePred || !AvailableInfo.isValid())
1181     return;
1182 
1183   // Critical edge - TODO: consider splitting?
1184   if (UnavailablePred->succ_size() != 1)
1185     return;
1186 
1187   // If VL can be less than AVL, then we can't reduce the frequency of exec.
1188   if (!hasFixedResult(AvailableInfo, ST))
1189     return;
1190 
1191   // Does it actually let us remove an implicit transition in MBB?
1192   bool Found = false;
1193   for (auto &MI : MBB) {
1194     if (isVectorConfigInstr(MI))
1195       return;
1196 
1197     const uint64_t TSFlags = MI.getDesc().TSFlags;
1198     if (RISCVII::hasSEWOp(TSFlags)) {
1199       if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
1200         return;
1201       Found = true;
1202       break;
1203     }
1204   }
1205   if (!Found)
1206     return;
1207 
1208   // Finally, update both data flow state and insert the actual vsetvli.
1209   // Doing both keeps the code in sync with the dataflow results, which
1210   // is critical for correctness of phase 3.
1211   auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
1212   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1213                     << UnavailablePred->getName() << " with state "
1214                     << AvailableInfo << "\n");
1215   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1216   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1217 
1218   // Note there's an implicit assumption here that terminators never use
1219   // or modify VL or VTYPE.  Also, fallthrough will return end().
1220   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1221   insertVSETVLI(*UnavailablePred, InsertPt,
1222                 UnavailablePred->findDebugLoc(InsertPt),
1223                 AvailableInfo, OldInfo);
1224 }
1225 
1226 static void doUnion(DemandedFields &A, DemandedFields B) {
1227   A.VLAny |= B.VLAny;
1228   A.VLZeroness |= B.VLZeroness;
1229   A.SEW |= B.SEW;
1230   A.LMUL |= B.LMUL;
1231   A.SEWLMULRatio |= B.SEWLMULRatio;
1232   A.TailPolicy |= B.TailPolicy;
1233   A.MaskPolicy |= B.MaskPolicy;
1234 }
1235 
1236 static bool isNonZeroAVL(const MachineOperand &MO) {
1237   if (MO.isReg())
1238     return RISCV::X0 == MO.getReg();
1239   assert(MO.isImm());
1240   return 0 != MO.getImm();
1241 }
1242 
1243 // Return true if we can mutate PrevMI to match MI without changing any the
1244 // fields which would be observed.
1245 static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1246                                  const MachineInstr &MI,
1247                                  const DemandedFields &Used) {
1248   // If the VL values aren't equal, return false if either a) the former is
1249   // demanded, or b) we can't rewrite the former to be the later for
1250   // implementation reasons.
1251   if (!isVLPreservingConfig(MI)) {
1252     if (Used.VLAny)
1253       return false;
1254 
1255     // TODO: Requires more care in the mutation...
1256     if (isVLPreservingConfig(PrevMI))
1257       return false;
1258 
1259     // We don't bother to handle the equally zero case here as it's largely
1260     // uninteresting.
1261     if (Used.VLZeroness &&
1262         (!isNonZeroAVL(MI.getOperand(1)) ||
1263          !isNonZeroAVL(PrevMI.getOperand(1))))
1264       return false;
1265 
1266     // TODO: Track whether the register is defined between
1267     // PrevMI and MI.
1268     if (MI.getOperand(1).isReg() &&
1269         RISCV::X0 != MI.getOperand(1).getReg())
1270       return false;
1271 
1272     // TODO: We need to change the result register to allow this rewrite
1273     // without the result forming a vl preserving vsetvli which is not
1274     // a correct state merge.
1275     if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
1276         MI.getOperand(1).isReg())
1277       return false;
1278   }
1279 
1280   if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
1281     return false;
1282 
1283   auto PriorVType = PrevMI.getOperand(2).getImm();
1284   auto VType = MI.getOperand(2).getImm();
1285   return areCompatibleVTYPEs(PriorVType, VType, Used);
1286 }
1287 
1288 void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1289   MachineInstr *NextMI = nullptr;
1290   // We can have arbitrary code in successors, so VL and VTYPE
1291   // must be considered demanded.
1292   DemandedFields Used;
1293   Used.demandVL();
1294   Used.demandVTYPE();
1295   SmallVector<MachineInstr*> ToDelete;
1296   for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1297 
1298     if (!isVectorConfigInstr(MI)) {
1299       doUnion(Used, getDemanded(MI));
1300       continue;
1301     }
1302 
1303     Register VRegDef = MI.getOperand(0).getReg();
1304     if (VRegDef != RISCV::X0 &&
1305         !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1306       Used.demandVL();
1307 
1308     if (NextMI) {
1309       if (!Used.usedVL() && !Used.usedVTYPE()) {
1310         ToDelete.push_back(&MI);
1311         // Leave NextMI unchanged
1312         continue;
1313       } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
1314         if (!isVLPreservingConfig(*NextMI)) {
1315           if (NextMI->getOperand(1).isImm())
1316             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1317           else
1318             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1319           MI.setDesc(NextMI->getDesc());
1320         }
1321         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1322         ToDelete.push_back(NextMI);
1323         // fallthrough
1324       }
1325     }
1326     NextMI = &MI;
1327     Used = getDemanded(MI);
1328   }
1329 
1330   for (auto *MI : ToDelete)
1331     MI->eraseFromParent();
1332 }
1333 
1334 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1335   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1336     MachineInstr &MI = *I++;
1337     if (RISCV::isFaultFirstLoad(MI)) {
1338       Register VLOutput = MI.getOperand(1).getReg();
1339       if (!MRI->use_nodbg_empty(VLOutput))
1340         BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1341                 VLOutput);
1342       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1343       MI.getOperand(1).setReg(RISCV::X0);
1344     }
1345   }
1346 }
1347 
1348 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1349   // Skip if the vector extension is not enabled.
1350   const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
1351   if (!ST.hasVInstructions())
1352     return false;
1353 
1354   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1355 
1356   TII = ST.getInstrInfo();
1357   MRI = &MF.getRegInfo();
1358 
1359   assert(BlockInfo.empty() && "Expect empty block infos");
1360   BlockInfo.resize(MF.getNumBlockIDs());
1361 
1362   bool HaveVectorOp = false;
1363 
1364   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1365   for (const MachineBasicBlock &MBB : MF) {
1366     HaveVectorOp |= computeVLVTYPEChanges(MBB);
1367     // Initial exit state is whatever change we found in the block.
1368     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1369     BBInfo.Exit = BBInfo.Change;
1370     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1371                       << " is " << BBInfo.Exit << "\n");
1372 
1373   }
1374 
1375   // If we didn't find any instructions that need VSETVLI, we're done.
1376   if (!HaveVectorOp) {
1377     BlockInfo.clear();
1378     return false;
1379   }
1380 
1381   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1382   // blocks to the list here, but will also add any that need to be revisited
1383   // during Phase 2 processing.
1384   for (const MachineBasicBlock &MBB : MF) {
1385     WorkList.push(&MBB);
1386     BlockInfo[MBB.getNumber()].InQueue = true;
1387   }
1388   while (!WorkList.empty()) {
1389     const MachineBasicBlock &MBB = *WorkList.front();
1390     WorkList.pop();
1391     computeIncomingVLVTYPE(MBB);
1392   }
1393 
1394   // Perform partial redundancy elimination of vsetvli transitions.
1395   for (MachineBasicBlock &MBB : MF)
1396     doPRE(MBB);
1397 
1398   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1399   // Phase 2 information to avoid adding vsetvlis before the first vector
1400   // instruction in the block if the VL/VTYPE is satisfied by its
1401   // predecessors.
1402   for (MachineBasicBlock &MBB : MF)
1403     emitVSETVLIs(MBB);
1404 
1405   // Now that all vsetvlis are explicit, go through and do block local
1406   // DSE and peephole based demanded fields based transforms.  Note that
1407   // this *must* be done outside the main dataflow so long as we allow
1408   // any cross block analysis within the dataflow.  We can't have both
1409   // demanded fields based mutation and non-local analysis in the
1410   // dataflow at the same time without introducing inconsistencies.
1411   for (MachineBasicBlock &MBB : MF)
1412     doLocalPostpass(MBB);
1413 
1414   // Once we're fully done rewriting all the instructions, do a final pass
1415   // through to check for VSETVLIs which write to an unused destination.
1416   // For the non X0, X0 variant, we can replace the destination register
1417   // with X0 to reduce register pressure.  This is really a generic
1418   // optimization which can be applied to any dead def (TODO: generalize).
1419   for (MachineBasicBlock &MBB : MF) {
1420     for (MachineInstr &MI : MBB) {
1421       if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
1422           MI.getOpcode() == RISCV::PseudoVSETIVLI) {
1423         Register VRegDef = MI.getOperand(0).getReg();
1424         if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
1425           MI.getOperand(0).setReg(RISCV::X0);
1426       }
1427     }
1428   }
1429 
1430   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1431   // of VLEFF/VLSEGFF.
1432   for (MachineBasicBlock &MBB : MF)
1433     insertReadVL(MBB);
1434 
1435   BlockInfo.clear();
1436   return HaveVectorOp;
1437 }
1438 
1439 /// Returns an instance of the Insert VSETVLI pass.
1440 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1441   return new RISCVInsertVSETVLI();
1442 }
1443