xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/PostOrderIterator.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/CodeGen/LiveDebugVariables.h"
32 #include "llvm/CodeGen/LiveIntervals.h"
33 #include "llvm/CodeGen/LiveStacks.h"
34 #include "llvm/CodeGen/MachineFunctionPass.h"
35 #include <queue>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "riscv-insert-vsetvli"
39 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
40 
41 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43 
44 static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE(
45     DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden,
46     cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and "
47              "vill is cleared"),
48     cl::init(true));
49 
50 namespace {
51 
52 /// Given a virtual register \p Reg, return the corresponding VNInfo for it.
53 /// This will return nullptr if the virtual register is an implicit_def or
54 /// if LiveIntervals is not available.
getVNInfoFromReg(Register Reg,const MachineInstr & MI,const LiveIntervals * LIS)55 static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
56                                 const LiveIntervals *LIS) {
57   assert(Reg.isVirtual());
58   if (!LIS)
59     return nullptr;
60   auto &LI = LIS->getInterval(Reg);
61   SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
62   return LI.getVNInfoBefore(SI);
63 }
64 
getVLOpNum(const MachineInstr & MI)65 static unsigned getVLOpNum(const MachineInstr &MI) {
66   return RISCVII::getVLOpNum(MI.getDesc());
67 }
68 
getSEWOpNum(const MachineInstr & MI)69 static unsigned getSEWOpNum(const MachineInstr &MI) {
70   return RISCVII::getSEWOpNum(MI.getDesc());
71 }
72 
73 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
74 /// not a load or store which ignores SEW.
getEEWForLoadStore(const MachineInstr & MI)75 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
76   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
77   default:
78     return std::nullopt;
79   case RISCV::VLE8_V:
80   case RISCV::VLSE8_V:
81   case RISCV::VSE8_V:
82   case RISCV::VSSE8_V:
83     return 8;
84   case RISCV::VLE16_V:
85   case RISCV::VLSE16_V:
86   case RISCV::VSE16_V:
87   case RISCV::VSSE16_V:
88     return 16;
89   case RISCV::VLE32_V:
90   case RISCV::VLSE32_V:
91   case RISCV::VSE32_V:
92   case RISCV::VSSE32_V:
93     return 32;
94   case RISCV::VLE64_V:
95   case RISCV::VLSE64_V:
96   case RISCV::VSE64_V:
97   case RISCV::VSSE64_V:
98     return 64;
99   }
100 }
101 
102 /// Return true if this is an operation on mask registers.  Note that
103 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
isMaskRegOp(const MachineInstr & MI)104 static bool isMaskRegOp(const MachineInstr &MI) {
105   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
106     return false;
107   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
108   // A Log2SEW of 0 is an operation on mask registers only.
109   return Log2SEW == 0;
110 }
111 
112 /// Return true if the inactive elements in the result are entirely undefined.
113 /// Note that this is different from "agnostic" as defined by the vector
114 /// specification.  Agnostic requires each lane to either be undisturbed, or
115 /// take the value -1; no other value is allowed.
hasUndefinedPassthru(const MachineInstr & MI)116 static bool hasUndefinedPassthru(const MachineInstr &MI) {
117 
118   unsigned UseOpIdx;
119   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
120     // If there is no passthrough operand, then the pass through
121     // lanes are undefined.
122     return true;
123 
124   // All undefined passthrus should be $noreg: see
125   // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
126   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
127   return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
128 }
129 
130 /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
isVectorCopy(const TargetRegisterInfo * TRI,const MachineInstr & MI)131 static bool isVectorCopy(const TargetRegisterInfo *TRI,
132                          const MachineInstr &MI) {
133   return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() &&
134          RISCVRegisterInfo::isRVVRegClass(
135              TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg()));
136 }
137 
138 /// Which subfields of VL or VTYPE have values we need to preserve?
139 struct DemandedFields {
140   // Some unknown property of VL is used.  If demanded, must preserve entire
141   // value.
142   bool VLAny = false;
143   // Only zero vs non-zero is used. If demanded, can change non-zero values.
144   bool VLZeroness = false;
145   // What properties of SEW we need to preserve.
146   enum : uint8_t {
147     SEWEqual = 3, // The exact value of SEW needs to be preserved.
148     SEWGreaterThanOrEqualAndLessThan64 =
149         2, // SEW can be changed as long as it's greater
150            // than or equal to the original value, but must be less
151            // than 64.
152     SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
153                                // than or equal to the original value.
154     SEWNone = 0                // We don't need to preserve SEW at all.
155   } SEW = SEWNone;
156   enum : uint8_t {
157     LMULEqual = 2, // The exact value of LMUL needs to be preserved.
158     LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
159     LMULNone = 0                 // We don't need to preserve LMUL at all.
160   } LMUL = LMULNone;
161   bool SEWLMULRatio = false;
162   bool TailPolicy = false;
163   bool MaskPolicy = false;
164   // If this is true, we demand that VTYPE is set to some legal state, i.e. that
165   // vill is unset.
166   bool VILL = false;
167 
168   // Return true if any part of VTYPE was used
usedVTYPE__anoncddf45c50111::DemandedFields169   bool usedVTYPE() const {
170     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL;
171   }
172 
173   // Return true if any property of VL was used
usedVL__anoncddf45c50111::DemandedFields174   bool usedVL() {
175     return VLAny || VLZeroness;
176   }
177 
178   // Mark all VTYPE subfields and properties as demanded
demandVTYPE__anoncddf45c50111::DemandedFields179   void demandVTYPE() {
180     SEW = SEWEqual;
181     LMUL = LMULEqual;
182     SEWLMULRatio = true;
183     TailPolicy = true;
184     MaskPolicy = true;
185     VILL = true;
186   }
187 
188   // Mark all VL properties as demanded
demandVL__anoncddf45c50111::DemandedFields189   void demandVL() {
190     VLAny = true;
191     VLZeroness = true;
192   }
193 
all__anoncddf45c50111::DemandedFields194   static DemandedFields all() {
195     DemandedFields DF;
196     DF.demandVTYPE();
197     DF.demandVL();
198     return DF;
199   }
200 
201   // Make this the result of demanding both the fields in this and B.
doUnion__anoncddf45c50111::DemandedFields202   void doUnion(const DemandedFields &B) {
203     VLAny |= B.VLAny;
204     VLZeroness |= B.VLZeroness;
205     SEW = std::max(SEW, B.SEW);
206     LMUL = std::max(LMUL, B.LMUL);
207     SEWLMULRatio |= B.SEWLMULRatio;
208     TailPolicy |= B.TailPolicy;
209     MaskPolicy |= B.MaskPolicy;
210     VILL |= B.VILL;
211   }
212 
213 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
214   /// Support for debugging, callable in GDB: V->dump()
dump__anoncddf45c50111::DemandedFields215   LLVM_DUMP_METHOD void dump() const {
216     print(dbgs());
217     dbgs() << "\n";
218   }
219 
220   /// Implement operator<<.
print__anoncddf45c50111::DemandedFields221   void print(raw_ostream &OS) const {
222     OS << "{";
223     OS << "VLAny=" << VLAny << ", ";
224     OS << "VLZeroness=" << VLZeroness << ", ";
225     OS << "SEW=";
226     switch (SEW) {
227     case SEWEqual:
228       OS << "SEWEqual";
229       break;
230     case SEWGreaterThanOrEqual:
231       OS << "SEWGreaterThanOrEqual";
232       break;
233     case SEWGreaterThanOrEqualAndLessThan64:
234       OS << "SEWGreaterThanOrEqualAndLessThan64";
235       break;
236     case SEWNone:
237       OS << "SEWNone";
238       break;
239     };
240     OS << ", ";
241     OS << "LMUL=";
242     switch (LMUL) {
243     case LMULEqual:
244       OS << "LMULEqual";
245       break;
246     case LMULLessThanOrEqualToM1:
247       OS << "LMULLessThanOrEqualToM1";
248       break;
249     case LMULNone:
250       OS << "LMULNone";
251       break;
252     };
253     OS << ", ";
254     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
255     OS << "TailPolicy=" << TailPolicy << ", ";
256     OS << "MaskPolicy=" << MaskPolicy << ", ";
257     OS << "VILL=" << VILL;
258     OS << "}";
259   }
260 #endif
261 };
262 
263 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
264 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const DemandedFields & DF)265 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
266   DF.print(OS);
267   return OS;
268 }
269 #endif
270 
isLMUL1OrSmaller(RISCVVType::VLMUL LMUL)271 static bool isLMUL1OrSmaller(RISCVVType::VLMUL LMUL) {
272   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
273   return Fractional || LMul == 1;
274 }
275 
276 /// Return true if moving from CurVType to NewVType is
277 /// indistinguishable from the perspective of an instruction (or set
278 /// of instructions) which use only the Used subfields and properties.
areCompatibleVTYPEs(uint64_t CurVType,uint64_t NewVType,const DemandedFields & Used)279 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
280                                 const DemandedFields &Used) {
281   switch (Used.SEW) {
282   case DemandedFields::SEWNone:
283     break;
284   case DemandedFields::SEWEqual:
285     if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
286       return false;
287     break;
288   case DemandedFields::SEWGreaterThanOrEqual:
289     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
290       return false;
291     break;
292   case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
293     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
294         RISCVVType::getSEW(NewVType) >= 64)
295       return false;
296     break;
297   }
298 
299   switch (Used.LMUL) {
300   case DemandedFields::LMULNone:
301     break;
302   case DemandedFields::LMULEqual:
303     if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
304       return false;
305     break;
306   case DemandedFields::LMULLessThanOrEqualToM1:
307     if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
308       return false;
309     break;
310   }
311 
312   if (Used.SEWLMULRatio) {
313     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
314                                               RISCVVType::getVLMUL(CurVType));
315     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
316                                               RISCVVType::getVLMUL(NewVType));
317     if (Ratio1 != Ratio2)
318       return false;
319   }
320 
321   if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
322                              RISCVVType::isTailAgnostic(NewVType))
323     return false;
324   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
325                              RISCVVType::isMaskAgnostic(NewVType))
326     return false;
327   return true;
328 }
329 
330 /// Return the fields and properties demanded by the provided instruction.
getDemanded(const MachineInstr & MI,const RISCVSubtarget * ST)331 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
332   // This function works in coalesceVSETVLI too. We can still use the value of a
333   // SEW, VL, or Policy operand even though it might not be the exact value in
334   // the VL or VTYPE, since we only care about what the instruction originally
335   // demanded.
336 
337   // Most instructions don't use any of these subfeilds.
338   DemandedFields Res;
339   // Start conservative if registers are used
340   if (MI.isCall() || MI.isInlineAsm() ||
341       MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
342     Res.demandVL();
343   if (MI.isCall() || MI.isInlineAsm() ||
344       MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
345     Res.demandVTYPE();
346   // Start conservative on the unlowered form too
347   uint64_t TSFlags = MI.getDesc().TSFlags;
348   if (RISCVII::hasSEWOp(TSFlags)) {
349     Res.demandVTYPE();
350     if (RISCVII::hasVLOp(TSFlags))
351       if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
352           !VLOp.isReg() || !VLOp.isUndef())
353         Res.demandVL();
354 
355     // Behavior is independent of mask policy.
356     if (!RISCVII::usesMaskPolicy(TSFlags))
357       Res.MaskPolicy = false;
358   }
359 
360   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
361   // They instead demand the ratio of the two which is used in computing
362   // EMUL, but which allows us the flexibility to change SEW and LMUL
363   // provided we don't change the ratio.
364   // Note: We assume that the instructions initial SEW is the EEW encoded
365   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
366   if (getEEWForLoadStore(MI)) {
367     Res.SEW = DemandedFields::SEWNone;
368     Res.LMUL = DemandedFields::LMULNone;
369   }
370 
371   // Store instructions don't use the policy fields.
372   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
373     Res.TailPolicy = false;
374     Res.MaskPolicy = false;
375   }
376 
377   // If this is a mask reg operation, it only cares about VLMAX.
378   // TODO: Possible extensions to this logic
379   // * Probably ok if available VLMax is larger than demanded
380   // * The policy bits can probably be ignored..
381   if (isMaskRegOp(MI)) {
382     Res.SEW = DemandedFields::SEWNone;
383     Res.LMUL = DemandedFields::LMULNone;
384   }
385 
386   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
387   if (RISCVInstrInfo::isScalarInsertInstr(MI)) {
388     Res.LMUL = DemandedFields::LMULNone;
389     Res.SEWLMULRatio = false;
390     Res.VLAny = false;
391     // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't
392     // need to preserve any other bits and are thus compatible with any larger,
393     // etype and can disregard policy bits.  Warning: It's tempting to try doing
394     // this for any tail agnostic operation, but we can't as TA requires
395     // tail lanes to either be the original value or -1.  We are writing
396     // unknown bits to the lanes here.
397     if (hasUndefinedPassthru(MI)) {
398       if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
399           !ST->hasVInstructionsF64())
400         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
401       else
402         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
403       Res.TailPolicy = false;
404     }
405   }
406 
407   // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW.
408   if (RISCVInstrInfo::isScalarExtractInstr(MI)) {
409     assert(!RISCVII::hasVLOp(TSFlags));
410     Res.LMUL = DemandedFields::LMULNone;
411     Res.SEWLMULRatio = false;
412     Res.TailPolicy = false;
413     Res.MaskPolicy = false;
414   }
415 
416   if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
417     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
418     // A slidedown/slideup with an *undefined* passthru can freely clobber
419     // elements not copied from the source vector (e.g. masked off, tail, or
420     // slideup's prefix). Notes:
421     // * We can't modify SEW here since the slide amount is in units of SEW.
422     // * VL=1 is special only because we have existing support for zero vs
423     //   non-zero VL.  We could generalize this if we had a VL > C predicate.
424     // * The LMUL1 restriction is for machines whose latency may depend on LMUL.
425     // * As above, this is only legal for tail "undefined" not "agnostic".
426     // * We avoid increasing vl if the subtarget has +vl-dependent-latency
427     if (RISCVInstrInfo::isVSlideInstr(MI) && VLOp.isImm() &&
428         VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
429         !ST->hasVLDependentLatency()) {
430       Res.VLAny = false;
431       Res.VLZeroness = true;
432       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
433       Res.TailPolicy = false;
434     }
435 
436     // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
437     // same semantically as vmv.s.x.  This is particularly useful since we don't
438     // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
439     // it's place. Since a splat is non-constant time in LMUL, we do need to be
440     // careful to not increase the number of active vector registers (unlike for
441     // vmv.s.x.)
442     if (RISCVInstrInfo::isScalarSplatInstr(MI) && VLOp.isImm() &&
443         VLOp.getImm() == 1 && hasUndefinedPassthru(MI) &&
444         !ST->hasVLDependentLatency()) {
445       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
446       Res.SEWLMULRatio = false;
447       Res.VLAny = false;
448       if (RISCVInstrInfo::isFloatScalarMoveOrScalarSplatInstr(MI) &&
449           !ST->hasVInstructionsF64())
450         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
451       else
452         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
453       Res.TailPolicy = false;
454     }
455   }
456 
457   // In §32.16.6, whole vector register moves have a dependency on SEW. At the
458   // MIR level though we don't encode the element type, and it gives the same
459   // result whatever the SEW may be.
460   //
461   // However it does need valid SEW, i.e. vill must be cleared. The entry to a
462   // function, calls and inline assembly may all set it, so make sure we clear
463   // it for whole register copies. Do this by leaving VILL demanded.
464   if (isVectorCopy(ST->getRegisterInfo(), MI)) {
465     Res.LMUL = DemandedFields::LMULNone;
466     Res.SEW = DemandedFields::SEWNone;
467     Res.SEWLMULRatio = false;
468     Res.TailPolicy = false;
469     Res.MaskPolicy = false;
470   }
471 
472   if (RISCVInstrInfo::isVExtractInstr(MI)) {
473     assert(!RISCVII::hasVLOp(TSFlags));
474     // TODO: LMUL can be any larger value (without cost)
475     Res.TailPolicy = false;
476   }
477 
478   return Res;
479 }
480 
481 /// Defines the abstract state with which the forward dataflow models the
482 /// values of the VL and VTYPE registers after insertion.
483 class VSETVLIInfo {
484   struct AVLDef {
485     // Every AVLDef should have a VNInfo, unless we're running without
486     // LiveIntervals in which case this will be nullptr.
487     const VNInfo *ValNo;
488     Register DefReg;
489   };
490   union {
491     AVLDef AVLRegDef;
492     unsigned AVLImm;
493   };
494 
495   enum : uint8_t {
496     Uninitialized,
497     AVLIsReg,
498     AVLIsImm,
499     AVLIsVLMAX,
500     Unknown, // AVL and VTYPE are fully unknown
501   } State = Uninitialized;
502 
503   // Fields from VTYPE.
504   RISCVVType::VLMUL VLMul = RISCVVType::LMUL_1;
505   uint8_t SEW = 0;
506   uint8_t TailAgnostic : 1;
507   uint8_t MaskAgnostic : 1;
508   uint8_t SEWLMULRatioOnly : 1;
509 
510 public:
VSETVLIInfo()511   VSETVLIInfo()
512       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
513         SEWLMULRatioOnly(false) {}
514 
getUnknown()515   static VSETVLIInfo getUnknown() {
516     VSETVLIInfo Info;
517     Info.setUnknown();
518     return Info;
519   }
520 
isValid() const521   bool isValid() const { return State != Uninitialized; }
setUnknown()522   void setUnknown() { State = Unknown; }
isUnknown() const523   bool isUnknown() const { return State == Unknown; }
524 
setAVLRegDef(const VNInfo * VNInfo,Register AVLReg)525   void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
526     assert(AVLReg.isVirtual());
527     AVLRegDef.ValNo = VNInfo;
528     AVLRegDef.DefReg = AVLReg;
529     State = AVLIsReg;
530   }
531 
setAVLImm(unsigned Imm)532   void setAVLImm(unsigned Imm) {
533     AVLImm = Imm;
534     State = AVLIsImm;
535   }
536 
setAVLVLMAX()537   void setAVLVLMAX() { State = AVLIsVLMAX; }
538 
hasAVLImm() const539   bool hasAVLImm() const { return State == AVLIsImm; }
hasAVLReg() const540   bool hasAVLReg() const { return State == AVLIsReg; }
hasAVLVLMAX() const541   bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
getAVLReg() const542   Register getAVLReg() const {
543     assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
544     return AVLRegDef.DefReg;
545   }
getAVLImm() const546   unsigned getAVLImm() const {
547     assert(hasAVLImm());
548     return AVLImm;
549   }
getAVLVNInfo() const550   const VNInfo *getAVLVNInfo() const {
551     assert(hasAVLReg());
552     return AVLRegDef.ValNo;
553   }
554   // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
555   // a PHI node. In that case getAVLVNInfo()->def will point to the block
556   // boundary slot and this will return nullptr.  If LiveIntervals isn't
557   // available, nullptr is also returned.
getAVLDefMI(const LiveIntervals * LIS) const558   const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
559     assert(hasAVLReg());
560     if (!LIS || getAVLVNInfo()->isPHIDef())
561       return nullptr;
562     auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
563     assert(MI);
564     return MI;
565   }
566 
setAVL(const VSETVLIInfo & Info)567   void setAVL(const VSETVLIInfo &Info) {
568     assert(Info.isValid());
569     if (Info.isUnknown())
570       setUnknown();
571     else if (Info.hasAVLReg())
572       setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
573     else if (Info.hasAVLVLMAX())
574       setAVLVLMAX();
575     else {
576       assert(Info.hasAVLImm());
577       setAVLImm(Info.getAVLImm());
578     }
579   }
580 
getSEW() const581   unsigned getSEW() const { return SEW; }
getVLMUL() const582   RISCVVType::VLMUL getVLMUL() const { return VLMul; }
getTailAgnostic() const583   bool getTailAgnostic() const { return TailAgnostic; }
getMaskAgnostic() const584   bool getMaskAgnostic() const { return MaskAgnostic; }
585 
hasNonZeroAVL(const LiveIntervals * LIS) const586   bool hasNonZeroAVL(const LiveIntervals *LIS) const {
587     if (hasAVLImm())
588       return getAVLImm() > 0;
589     if (hasAVLReg()) {
590       if (auto *DefMI = getAVLDefMI(LIS))
591         return RISCVInstrInfo::isNonZeroLoadImmediate(*DefMI);
592     }
593     if (hasAVLVLMAX())
594       return true;
595     return false;
596   }
597 
hasEquallyZeroAVL(const VSETVLIInfo & Other,const LiveIntervals * LIS) const598   bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
599                          const LiveIntervals *LIS) const {
600     if (hasSameAVL(Other))
601       return true;
602     return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
603   }
604 
hasSameAVLLatticeValue(const VSETVLIInfo & Other) const605   bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
606     if (hasAVLReg() && Other.hasAVLReg()) {
607       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
608              "we either have intervals or we don't");
609       if (!getAVLVNInfo())
610         return getAVLReg() == Other.getAVLReg();
611       return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
612              getAVLReg() == Other.getAVLReg();
613     }
614 
615     if (hasAVLImm() && Other.hasAVLImm())
616       return getAVLImm() == Other.getAVLImm();
617 
618     if (hasAVLVLMAX())
619       return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
620 
621     return false;
622   }
623 
624   // Return true if the two lattice values are guaranteed to have
625   // the same AVL value at runtime.
hasSameAVL(const VSETVLIInfo & Other) const626   bool hasSameAVL(const VSETVLIInfo &Other) const {
627     // Without LiveIntervals, we don't know which instruction defines a
628     // register.  Since a register may be redefined, this means all AVLIsReg
629     // states must be treated as possibly distinct.
630     if (hasAVLReg() && Other.hasAVLReg()) {
631       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
632              "we either have intervals or we don't");
633       if (!getAVLVNInfo())
634         return false;
635     }
636     return hasSameAVLLatticeValue(Other);
637   }
638 
setVTYPE(unsigned VType)639   void setVTYPE(unsigned VType) {
640     assert(isValid() && !isUnknown() &&
641            "Can't set VTYPE for uninitialized or unknown");
642     VLMul = RISCVVType::getVLMUL(VType);
643     SEW = RISCVVType::getSEW(VType);
644     TailAgnostic = RISCVVType::isTailAgnostic(VType);
645     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
646   }
setVTYPE(RISCVVType::VLMUL L,unsigned S,bool TA,bool MA)647   void setVTYPE(RISCVVType::VLMUL L, unsigned S, bool TA, bool MA) {
648     assert(isValid() && !isUnknown() &&
649            "Can't set VTYPE for uninitialized or unknown");
650     VLMul = L;
651     SEW = S;
652     TailAgnostic = TA;
653     MaskAgnostic = MA;
654   }
655 
setVLMul(RISCVVType::VLMUL VLMul)656   void setVLMul(RISCVVType::VLMUL VLMul) { this->VLMul = VLMul; }
657 
encodeVTYPE() const658   unsigned encodeVTYPE() const {
659     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
660            "Can't encode VTYPE for uninitialized or unknown");
661     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
662   }
663 
hasSEWLMULRatioOnly() const664   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
665 
hasSameVTYPE(const VSETVLIInfo & Other) const666   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
667     assert(isValid() && Other.isValid() &&
668            "Can't compare invalid VSETVLIInfos");
669     assert(!isUnknown() && !Other.isUnknown() &&
670            "Can't compare VTYPE in unknown state");
671     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
672            "Can't compare when only LMUL/SEW ratio is valid.");
673     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
674            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
675                     Other.MaskAgnostic);
676   }
677 
getSEWLMULRatio() const678   unsigned getSEWLMULRatio() const {
679     assert(isValid() && !isUnknown() &&
680            "Can't use VTYPE for uninitialized or unknown");
681     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
682   }
683 
684   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
685   // Note that having the same VLMAX ensures that both share the same
686   // function from AVL to VL; that is, they must produce the same VL value
687   // for any given AVL value.
hasSameVLMAX(const VSETVLIInfo & Other) const688   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
689     assert(isValid() && Other.isValid() &&
690            "Can't compare invalid VSETVLIInfos");
691     assert(!isUnknown() && !Other.isUnknown() &&
692            "Can't compare VTYPE in unknown state");
693     return getSEWLMULRatio() == Other.getSEWLMULRatio();
694   }
695 
hasCompatibleVTYPE(const DemandedFields & Used,const VSETVLIInfo & Require) const696   bool hasCompatibleVTYPE(const DemandedFields &Used,
697                           const VSETVLIInfo &Require) const {
698     return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
699   }
700 
701   // Determine whether the vector instructions requirements represented by
702   // Require are compatible with the previous vsetvli instruction represented
703   // by this.  MI is the instruction whose requirements we're considering.
isCompatible(const DemandedFields & Used,const VSETVLIInfo & Require,const LiveIntervals * LIS) const704   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
705                     const LiveIntervals *LIS) const {
706     assert(isValid() && Require.isValid() &&
707            "Can't compare invalid VSETVLIInfos");
708     // Nothing is compatible with Unknown.
709     if (isUnknown() || Require.isUnknown())
710       return false;
711 
712     // If only our VLMAX ratio is valid, then this isn't compatible.
713     if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
714       return false;
715 
716     if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
717       return false;
718 
719     if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
720       return false;
721 
722     return hasCompatibleVTYPE(Used, Require);
723   }
724 
operator ==(const VSETVLIInfo & Other) const725   bool operator==(const VSETVLIInfo &Other) const {
726     // Uninitialized is only equal to another Uninitialized.
727     if (!isValid())
728       return !Other.isValid();
729     if (!Other.isValid())
730       return !isValid();
731 
732     // Unknown is only equal to another Unknown.
733     if (isUnknown())
734       return Other.isUnknown();
735     if (Other.isUnknown())
736       return isUnknown();
737 
738     if (!hasSameAVLLatticeValue(Other))
739       return false;
740 
741     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
742     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
743       return false;
744 
745     // If only the VLMAX is valid, check that it is the same.
746     if (SEWLMULRatioOnly)
747       return hasSameVLMAX(Other);
748 
749     // If the full VTYPE is valid, check that it is the same.
750     return hasSameVTYPE(Other);
751   }
752 
operator !=(const VSETVLIInfo & Other) const753   bool operator!=(const VSETVLIInfo &Other) const {
754     return !(*this == Other);
755   }
756 
757   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
758   // both predecessors.
intersect(const VSETVLIInfo & Other) const759   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
760     // If the new value isn't valid, ignore it.
761     if (!Other.isValid())
762       return *this;
763 
764     // If this value isn't valid, this must be the first predecessor, use it.
765     if (!isValid())
766       return Other;
767 
768     // If either is unknown, the result is unknown.
769     if (isUnknown() || Other.isUnknown())
770       return VSETVLIInfo::getUnknown();
771 
772     // If we have an exact, match return this.
773     if (*this == Other)
774       return *this;
775 
776     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
777     // return an SEW/LMUL ratio only value.
778     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
779       VSETVLIInfo MergeInfo = *this;
780       MergeInfo.SEWLMULRatioOnly = true;
781       return MergeInfo;
782     }
783 
784     // Otherwise the result is unknown.
785     return VSETVLIInfo::getUnknown();
786   }
787 
788 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
789   /// Support for debugging, callable in GDB: V->dump()
dump() const790   LLVM_DUMP_METHOD void dump() const {
791     print(dbgs());
792     dbgs() << "\n";
793   }
794 
795   /// Implement operator<<.
796   /// @{
print(raw_ostream & OS) const797   void print(raw_ostream &OS) const {
798     OS << "{";
799     if (!isValid())
800       OS << "Uninitialized";
801     if (isUnknown())
802       OS << "unknown";
803     if (hasAVLReg())
804       OS << "AVLReg=" << llvm::printReg(getAVLReg());
805     if (hasAVLImm())
806       OS << "AVLImm=" << (unsigned)AVLImm;
807     if (hasAVLVLMAX())
808       OS << "AVLVLMAX";
809     OS << ", ";
810 
811     unsigned LMul;
812     bool Fractional;
813     std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
814 
815     OS << "VLMul=";
816     if (Fractional)
817       OS << "mf";
818     else
819       OS << "m";
820     OS << LMul << ", "
821        << "SEW=e" << (unsigned)SEW << ", "
822        << "TailAgnostic=" << (bool)TailAgnostic << ", "
823        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
824        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
825   }
826 #endif
827 };
828 
829 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
830 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VSETVLIInfo & V)831 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
832   V.print(OS);
833   return OS;
834 }
835 #endif
836 
837 struct BlockData {
838   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
839   // block. Calculated in Phase 2.
840   VSETVLIInfo Exit;
841 
842   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
843   // blocks. Calculated in Phase 2, and used by Phase 3.
844   VSETVLIInfo Pred;
845 
846   // Keeps track of whether the block is already in the queue.
847   bool InQueue = false;
848 
849   BlockData() = default;
850 };
851 
852 class RISCVInsertVSETVLI : public MachineFunctionPass {
853   const RISCVSubtarget *ST;
854   const TargetInstrInfo *TII;
855   MachineRegisterInfo *MRI;
856   // Possibly null!
857   LiveIntervals *LIS;
858 
859   std::vector<BlockData> BlockInfo;
860   std::queue<const MachineBasicBlock *> WorkList;
861 
862 public:
863   static char ID;
864 
RISCVInsertVSETVLI()865   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
866   bool runOnMachineFunction(MachineFunction &MF) override;
867 
getAnalysisUsage(AnalysisUsage & AU) const868   void getAnalysisUsage(AnalysisUsage &AU) const override {
869     AU.setPreservesCFG();
870 
871     AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
872     AU.addPreserved<LiveIntervalsWrapperPass>();
873     AU.addPreserved<SlotIndexesWrapperPass>();
874     AU.addPreserved<LiveDebugVariablesWrapperLegacy>();
875     AU.addPreserved<LiveStacksWrapperLegacy>();
876 
877     MachineFunctionPass::getAnalysisUsage(AU);
878   }
879 
getPassName() const880   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
881 
882 private:
883   bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
884                    const VSETVLIInfo &CurInfo) const;
885   bool needVSETVLIPHI(const VSETVLIInfo &Require,
886                       const MachineBasicBlock &MBB) const;
887   void insertVSETVLI(MachineBasicBlock &MBB,
888                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
889                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
890 
891   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
892   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
893   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
894                              VSETVLIInfo &Info) const;
895   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
896   void emitVSETVLIs(MachineBasicBlock &MBB);
897   void doPRE(MachineBasicBlock &MBB);
898   void insertReadVL(MachineBasicBlock &MBB);
899 
900   bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
901                             const DemandedFields &Used) const;
902   void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
903 
904   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
905   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
906   void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
907 };
908 
909 } // end anonymous namespace
910 
911 char RISCVInsertVSETVLI::ID = 0;
912 char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
913 
INITIALIZE_PASS(RISCVInsertVSETVLI,DEBUG_TYPE,RISCV_INSERT_VSETVLI_NAME,false,false)914 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
915                 false, false)
916 
917 // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
918 // replace the AVL operand with the AVL of the defining vsetvli. E.g.
919 //
920 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
921 // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
922 // ->
923 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
924 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
925 void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
926   if (!Info.hasAVLReg())
927     return;
928   const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
929   if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
930     return;
931   VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
932   if (!DefInstrInfo.hasSameVLMAX(Info))
933     return;
934   Info.setAVL(DefInstrInfo);
935 }
936 
937 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
938 // VSETIVLI instruction.
939 VSETVLIInfo
getInfoForVSETVLI(const MachineInstr & MI) const940 RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
941   VSETVLIInfo NewInfo;
942   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
943     NewInfo.setAVLImm(MI.getOperand(1).getImm());
944   } else {
945     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
946            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
947     if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
948       NewInfo.setAVLVLMAX();
949     else if (MI.getOperand(1).isUndef())
950       // Otherwise use an AVL of 1 to avoid depending on previous vl.
951       NewInfo.setAVLImm(1);
952     else {
953       Register AVLReg = MI.getOperand(1).getReg();
954       VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
955       NewInfo.setAVLRegDef(VNI, AVLReg);
956     }
957   }
958   NewInfo.setVTYPE(MI.getOperand(2).getImm());
959 
960   forwardVSETVLIAVL(NewInfo);
961 
962   return NewInfo;
963 }
964 
computeVLMAX(unsigned VLEN,unsigned SEW,RISCVVType::VLMUL VLMul)965 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
966                              RISCVVType::VLMUL VLMul) {
967   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
968   if (Fractional)
969     VLEN = VLEN / LMul;
970   else
971     VLEN = VLEN * LMul;
972   return VLEN/SEW;
973 }
974 
975 VSETVLIInfo
computeInfoForInstr(const MachineInstr & MI) const976 RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
977   VSETVLIInfo InstrInfo;
978   const uint64_t TSFlags = MI.getDesc().TSFlags;
979 
980   bool TailAgnostic = true;
981   bool MaskAgnostic = true;
982   if (!hasUndefinedPassthru(MI)) {
983     // Start with undisturbed.
984     TailAgnostic = false;
985     MaskAgnostic = false;
986 
987     // If there is a policy operand, use it.
988     if (RISCVII::hasVecPolicyOp(TSFlags)) {
989       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
990       uint64_t Policy = Op.getImm();
991       assert(Policy <=
992                  (RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC) &&
993              "Invalid Policy Value");
994       TailAgnostic = Policy & RISCVVType::TAIL_AGNOSTIC;
995       MaskAgnostic = Policy & RISCVVType::MASK_AGNOSTIC;
996     }
997 
998     if (!RISCVII::usesMaskPolicy(TSFlags))
999       MaskAgnostic = true;
1000   }
1001 
1002   RISCVVType::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1003 
1004   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1005   // A Log2SEW of 0 is an operation on mask registers only.
1006   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1007   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1008 
1009   if (RISCVII::hasVLOp(TSFlags)) {
1010     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1011     if (VLOp.isImm()) {
1012       int64_t Imm = VLOp.getImm();
1013       // Convert the VLMax sentintel to X0 register.
1014       if (Imm == RISCV::VLMaxSentinel) {
1015         // If we know the exact VLEN, see if we can use the constant encoding
1016         // for the VLMAX instead.  This reduces register pressure slightly.
1017         const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1018         if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1019           InstrInfo.setAVLImm(VLMAX);
1020         else
1021           InstrInfo.setAVLVLMAX();
1022       }
1023       else
1024         InstrInfo.setAVLImm(Imm);
1025     } else if (VLOp.isUndef()) {
1026       // Otherwise use an AVL of 1 to avoid depending on previous vl.
1027       InstrInfo.setAVLImm(1);
1028     } else {
1029       VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1030       InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1031     }
1032   } else {
1033     assert(RISCVInstrInfo::isScalarExtractInstr(MI) ||
1034            RISCVInstrInfo::isVExtractInstr(MI));
1035     // Pick a random value for state tracking purposes, will be ignored via
1036     // the demanded fields mechanism
1037     InstrInfo.setAVLImm(1);
1038   }
1039 #ifndef NDEBUG
1040   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1041     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1042   }
1043 #endif
1044   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1045 
1046   forwardVSETVLIAVL(InstrInfo);
1047 
1048   return InstrInfo;
1049 }
1050 
insertVSETVLI(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertPt,DebugLoc DL,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)1051 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1052                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
1053                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1054 
1055   ++NumInsertedVSETVL;
1056   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1057     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1058     // VLMAX.
1059     if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1060       auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1061                     .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1062                     .addReg(RISCV::X0, RegState::Kill)
1063                     .addImm(Info.encodeVTYPE())
1064                     .addReg(RISCV::VL, RegState::Implicit);
1065       if (LIS)
1066         LIS->InsertMachineInstrInMaps(*MI);
1067       return;
1068     }
1069 
1070     // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1071     // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1072     // same, we can use the X0, X0 form.
1073     if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1074       if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1075           DefMI && RISCVInstrInfo::isVectorConfigInstr(*DefMI)) {
1076         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1077         if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1078           auto MI =
1079               BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0X0))
1080                   .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1081                   .addReg(RISCV::X0, RegState::Kill)
1082                   .addImm(Info.encodeVTYPE())
1083                   .addReg(RISCV::VL, RegState::Implicit);
1084           if (LIS)
1085             LIS->InsertMachineInstrInMaps(*MI);
1086           return;
1087         }
1088       }
1089     }
1090   }
1091 
1092   if (Info.hasAVLImm()) {
1093     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1094                   .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1095                   .addImm(Info.getAVLImm())
1096                   .addImm(Info.encodeVTYPE());
1097     if (LIS)
1098       LIS->InsertMachineInstrInMaps(*MI);
1099     return;
1100   }
1101 
1102   if (Info.hasAVLVLMAX()) {
1103     Register DestReg = MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1104     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1105                   .addReg(DestReg, RegState::Define | RegState::Dead)
1106                   .addReg(RISCV::X0, RegState::Kill)
1107                   .addImm(Info.encodeVTYPE());
1108     if (LIS) {
1109       LIS->InsertMachineInstrInMaps(*MI);
1110       LIS->createAndComputeVirtRegInterval(DestReg);
1111     }
1112     return;
1113   }
1114 
1115   Register AVLReg = Info.getAVLReg();
1116   MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1117   auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1118                 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1119                 .addReg(AVLReg)
1120                 .addImm(Info.encodeVTYPE());
1121   if (LIS) {
1122     LIS->InsertMachineInstrInMaps(*MI);
1123     LiveInterval &LI = LIS->getInterval(AVLReg);
1124     SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
1125     const VNInfo *CurVNI = Info.getAVLVNInfo();
1126     // If the AVL value isn't live at MI, do a quick check to see if it's easily
1127     // extendable. Otherwise, we need to copy it.
1128     if (LI.getVNInfoBefore(SI) != CurVNI) {
1129       if (!LI.liveAt(SI) && LI.containsOneValue())
1130         LIS->extendToIndices(LI, SI);
1131       else {
1132         Register AVLCopyReg =
1133             MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1134         MachineBasicBlock *MBB = LIS->getMBBFromIndex(CurVNI->def);
1135         MachineBasicBlock::iterator II;
1136         if (CurVNI->isPHIDef())
1137           II = MBB->getFirstNonPHI();
1138         else {
1139           II = LIS->getInstructionFromIndex(CurVNI->def);
1140           II = std::next(II);
1141         }
1142         assert(II.isValid());
1143         auto AVLCopy = BuildMI(*MBB, II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1144                            .addReg(AVLReg);
1145         LIS->InsertMachineInstrInMaps(*AVLCopy);
1146         MI->getOperand(1).setReg(AVLCopyReg);
1147         LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1148       }
1149     }
1150   }
1151 }
1152 
1153 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1154 /// given a set of DemandedFields \p Used.
needVSETVLI(const DemandedFields & Used,const VSETVLIInfo & Require,const VSETVLIInfo & CurInfo) const1155 bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1156                                      const VSETVLIInfo &Require,
1157                                      const VSETVLIInfo &CurInfo) const {
1158   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1159     return true;
1160 
1161   if (CurInfo.isCompatible(Used, Require, LIS))
1162     return false;
1163 
1164   return true;
1165 }
1166 
1167 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1168 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1169 // places.
adjustIncoming(const VSETVLIInfo & PrevInfo,const VSETVLIInfo & NewInfo,DemandedFields & Demanded)1170 static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo,
1171                                   const VSETVLIInfo &NewInfo,
1172                                   DemandedFields &Demanded) {
1173   VSETVLIInfo Info = NewInfo;
1174 
1175   if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1176       !PrevInfo.isUnknown()) {
1177     if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1178             PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1179       Info.setVLMul(*NewVLMul);
1180     Demanded.LMUL = DemandedFields::LMULEqual;
1181   }
1182 
1183   return Info;
1184 }
1185 
1186 // Given an incoming state reaching MI, minimally modifies that state so that it
1187 // is compatible with MI. The resulting state is guaranteed to be semantically
1188 // legal for MI, but may not be the state requested by MI.
transferBefore(VSETVLIInfo & Info,const MachineInstr & MI) const1189 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1190                                         const MachineInstr &MI) const {
1191   if (isVectorCopy(ST->getRegisterInfo(), MI) &&
1192       (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) {
1193     // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may
1194     // be coalesced into another vsetvli since we won't demand any fields.
1195     VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly
1196     NewInfo.setAVLImm(1);
1197     NewInfo.setVTYPE(RISCVVType::LMUL_1, /*sew*/ 8, /*ta*/ true, /*ma*/ true);
1198     Info = NewInfo;
1199     return;
1200   }
1201 
1202   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1203     return;
1204 
1205   DemandedFields Demanded = getDemanded(MI, ST);
1206 
1207   const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1208   assert(NewInfo.isValid() && !NewInfo.isUnknown());
1209   if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1210     return;
1211 
1212   const VSETVLIInfo PrevInfo = Info;
1213   if (!Info.isValid() || Info.isUnknown())
1214     Info = NewInfo;
1215 
1216   const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1217 
1218   // If MI only demands that VL has the same zeroness, we only need to set the
1219   // AVL if the zeroness differs.  This removes a vsetvli entirely if the types
1220   // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1221   // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1222   // variant, so we avoid the transform to prevent extending live range of an
1223   // avl register operand.
1224   // TODO: We can probably relax this for immediates.
1225   bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1226                      IncomingInfo.hasSameVLMAX(PrevInfo);
1227   if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1228     Info.setAVL(IncomingInfo);
1229 
1230   Info.setVTYPE(
1231       ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1232           .getVLMUL(),
1233       ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1234       // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1235       // if needed.
1236       (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1237           IncomingInfo.getTailAgnostic(),
1238       (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1239           IncomingInfo.getMaskAgnostic());
1240 
1241   // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1242   // the AVL.
1243   if (Info.hasSEWLMULRatioOnly()) {
1244     VSETVLIInfo RatiolessInfo = IncomingInfo;
1245     RatiolessInfo.setAVL(Info);
1246     Info = RatiolessInfo;
1247   }
1248 }
1249 
1250 // Given a state with which we evaluated MI (see transferBefore above for why
1251 // this might be different that the state MI requested), modify the state to
1252 // reflect the changes MI might make.
transferAfter(VSETVLIInfo & Info,const MachineInstr & MI) const1253 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1254                                        const MachineInstr &MI) const {
1255   if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1256     Info = getInfoForVSETVLI(MI);
1257     return;
1258   }
1259 
1260   if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1261     // Update AVL to vl-output of the fault first load.
1262     assert(MI.getOperand(1).getReg().isVirtual());
1263     if (LIS) {
1264       auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1265       SlotIndex SI =
1266           LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1267       VNInfo *VNI = LI.getVNInfoAt(SI);
1268       Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1269     } else
1270       Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1271     return;
1272   }
1273 
1274   // If this is something that updates VL/VTYPE that we don't know about, set
1275   // the state to unknown.
1276   if (MI.isCall() || MI.isInlineAsm() ||
1277       MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1278       MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1279     Info = VSETVLIInfo::getUnknown();
1280 }
1281 
computeVLVTYPEChanges(const MachineBasicBlock & MBB,VSETVLIInfo & Info) const1282 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1283                                                VSETVLIInfo &Info) const {
1284   bool HadVectorOp = false;
1285 
1286   Info = BlockInfo[MBB.getNumber()].Pred;
1287   for (const MachineInstr &MI : MBB) {
1288     transferBefore(Info, MI);
1289 
1290     if (RISCVInstrInfo::isVectorConfigInstr(MI) ||
1291         RISCVII::hasSEWOp(MI.getDesc().TSFlags) ||
1292         isVectorCopy(ST->getRegisterInfo(), MI))
1293       HadVectorOp = true;
1294 
1295     transferAfter(Info, MI);
1296   }
1297 
1298   return HadVectorOp;
1299 }
1300 
computeIncomingVLVTYPE(const MachineBasicBlock & MBB)1301 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1302 
1303   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1304 
1305   BBInfo.InQueue = false;
1306 
1307   // Start with the previous entry so that we keep the most conservative state
1308   // we have ever found.
1309   VSETVLIInfo InInfo = BBInfo.Pred;
1310   if (MBB.pred_empty()) {
1311     // There are no predecessors, so use the default starting status.
1312     InInfo.setUnknown();
1313   } else {
1314     for (MachineBasicBlock *P : MBB.predecessors())
1315       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1316   }
1317 
1318   // If we don't have any valid predecessor value, wait until we do.
1319   if (!InInfo.isValid())
1320     return;
1321 
1322   // If no change, no need to rerun block
1323   if (InInfo == BBInfo.Pred)
1324     return;
1325 
1326   BBInfo.Pred = InInfo;
1327   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1328                     << " changed to " << BBInfo.Pred << "\n");
1329 
1330   // Note: It's tempting to cache the state changes here, but due to the
1331   // compatibility checks performed a blocks output state can change based on
1332   // the input state.  To cache, we'd have to add logic for finding
1333   // never-compatible state changes.
1334   VSETVLIInfo TmpStatus;
1335   computeVLVTYPEChanges(MBB, TmpStatus);
1336 
1337   // If the new exit value matches the old exit value, we don't need to revisit
1338   // any blocks.
1339   if (BBInfo.Exit == TmpStatus)
1340     return;
1341 
1342   BBInfo.Exit = TmpStatus;
1343   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1344                     << " changed to " << BBInfo.Exit << "\n");
1345 
1346   // Add the successors to the work list so we can propagate the changed exit
1347   // status.
1348   for (MachineBasicBlock *S : MBB.successors())
1349     if (!BlockInfo[S->getNumber()].InQueue) {
1350       BlockInfo[S->getNumber()].InQueue = true;
1351       WorkList.push(S);
1352     }
1353 }
1354 
1355 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1356 // be unneeded if the AVL was a phi node where all incoming values are VL
1357 // outputs from the last VSETVLI in their respective basic blocks.
needVSETVLIPHI(const VSETVLIInfo & Require,const MachineBasicBlock & MBB) const1358 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1359                                         const MachineBasicBlock &MBB) const {
1360   if (!Require.hasAVLReg())
1361     return true;
1362 
1363   if (!LIS)
1364     return true;
1365 
1366   // We need the AVL to have been produced by a PHI node in this basic block.
1367   const VNInfo *Valno = Require.getAVLVNInfo();
1368   if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1369     return true;
1370 
1371   const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1372 
1373   for (auto *PBB : MBB.predecessors()) {
1374     const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1375 
1376     // We need the PHI input to the be the output of a VSET(I)VLI.
1377     const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1378     if (!Value)
1379       return true;
1380     MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1381     if (!DefMI || !RISCVInstrInfo::isVectorConfigInstr(*DefMI))
1382       return true;
1383 
1384     // We found a VSET(I)VLI make sure it matches the output of the
1385     // predecessor block.
1386     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1387     if (DefInfo != PBBExit)
1388       return true;
1389 
1390     // Require has the same VL as PBBExit, so if the exit from the
1391     // predecessor has the VTYPE we are looking for we might be able
1392     // to avoid a VSETVLI.
1393     if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1394       return true;
1395   }
1396 
1397   // If all the incoming values to the PHI checked out, we don't need
1398   // to insert a VSETVLI.
1399   return false;
1400 }
1401 
emitVSETVLIs(MachineBasicBlock & MBB)1402 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1403   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1404   // Track whether the prefix of the block we've scanned is transparent
1405   // (meaning has not yet changed the abstract state).
1406   bool PrefixTransparent = true;
1407   for (MachineInstr &MI : MBB) {
1408     const VSETVLIInfo PrevInfo = CurInfo;
1409     transferBefore(CurInfo, MI);
1410 
1411     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1412     if (RISCVInstrInfo::isVectorConfigInstr(MI)) {
1413       // Conservatively, mark the VL and VTYPE as live.
1414       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1415              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1416              "Unexpected operands where VL and VTYPE should be");
1417       MI.getOperand(3).setIsDead(false);
1418       MI.getOperand(4).setIsDead(false);
1419       PrefixTransparent = false;
1420     }
1421 
1422     if (EnsureWholeVectorRegisterMoveValidVTYPE &&
1423         isVectorCopy(ST->getRegisterInfo(), MI)) {
1424       if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1425         insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1426         PrefixTransparent = false;
1427       }
1428       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1429                                               /*isImp*/ true));
1430     }
1431 
1432     uint64_t TSFlags = MI.getDesc().TSFlags;
1433     if (RISCVII::hasSEWOp(TSFlags)) {
1434       if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1435         // If this is the first implicit state change, and the state change
1436         // requested can be proven to produce the same register contents, we
1437         // can skip emitting the actual state change and continue as if we
1438         // had since we know the GPR result of the implicit state change
1439         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1440         // we *do* need to model the state as if it changed as while the
1441         // register contents are unchanged, the abstract model can change.
1442         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1443           insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1444         PrefixTransparent = false;
1445       }
1446 
1447       if (RISCVII::hasVLOp(TSFlags)) {
1448         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1449         if (VLOp.isReg()) {
1450           Register Reg = VLOp.getReg();
1451 
1452           // Erase the AVL operand from the instruction.
1453           VLOp.setReg(RISCV::NoRegister);
1454           VLOp.setIsKill(false);
1455           if (LIS) {
1456             LiveInterval &LI = LIS->getInterval(Reg);
1457             SmallVector<MachineInstr *> DeadMIs;
1458             LIS->shrinkToUses(&LI, &DeadMIs);
1459             // We might have separate components that need split due to
1460             // needVSETVLIPHI causing us to skip inserting a new VL def.
1461             SmallVector<LiveInterval *> SplitLIs;
1462             LIS->splitSeparateComponents(LI, SplitLIs);
1463 
1464             // If the AVL was an immediate > 31, then it would have been emitted
1465             // as an ADDI. However, the ADDI might not have been used in the
1466             // vsetvli, or a vsetvli might not have been emitted, so it may be
1467             // dead now.
1468             for (MachineInstr *DeadMI : DeadMIs) {
1469               if (!TII->isAddImmediate(*DeadMI, Reg))
1470                 continue;
1471               LIS->RemoveMachineInstrFromMaps(*DeadMI);
1472               DeadMI->eraseFromParent();
1473             }
1474           }
1475         }
1476         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1477                                                 /*isImp*/ true));
1478       }
1479       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1480                                               /*isImp*/ true));
1481     }
1482 
1483     if (MI.isInlineAsm()) {
1484       MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ true,
1485                                               /*isImp*/ true));
1486       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ true,
1487                                               /*isImp*/ true));
1488     }
1489 
1490     if (MI.isCall() || MI.isInlineAsm() ||
1491         MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1492         MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1493       PrefixTransparent = false;
1494 
1495     transferAfter(CurInfo, MI);
1496   }
1497 
1498   const auto &Info = BlockInfo[MBB.getNumber()];
1499   if (CurInfo != Info.Exit) {
1500     LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1501     LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1502     LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1503     LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1504   }
1505   assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1506 }
1507 
1508 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1509 /// we're about to insert by looking for cases where we can PRE from the
1510 /// beginning of one block to the end of one of its predecessors.  Specifically,
1511 /// this is geared to catch the common case of a fixed length vsetvl in a single
1512 /// block loop when it could execute once in the preheader instead.
doPRE(MachineBasicBlock & MBB)1513 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1514   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1515     return;
1516 
1517   MachineBasicBlock *UnavailablePred = nullptr;
1518   VSETVLIInfo AvailableInfo;
1519   for (MachineBasicBlock *P : MBB.predecessors()) {
1520     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1521     if (PredInfo.isUnknown()) {
1522       if (UnavailablePred)
1523         return;
1524       UnavailablePred = P;
1525     } else if (!AvailableInfo.isValid()) {
1526       AvailableInfo = PredInfo;
1527     } else if (AvailableInfo != PredInfo) {
1528       return;
1529     }
1530   }
1531 
1532   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1533   // phase 3.
1534   if (!UnavailablePred || !AvailableInfo.isValid())
1535     return;
1536 
1537   if (!LIS)
1538     return;
1539 
1540   // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1541   // the unavailable pred.
1542   if (AvailableInfo.hasSEWLMULRatioOnly())
1543     return;
1544 
1545   // Critical edge - TODO: consider splitting?
1546   if (UnavailablePred->succ_size() != 1)
1547     return;
1548 
1549   // If the AVL value is a register (other than our VLMAX sentinel),
1550   // we need to prove the value is available at the point we're going
1551   // to insert the vsetvli at.
1552   if (AvailableInfo.hasAVLReg()) {
1553     SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1554     // This is an inline dominance check which covers the case of
1555     // UnavailablePred being the preheader of a loop.
1556     if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1557       return;
1558     if (!UnavailablePred->terminators().empty() &&
1559         SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1560       return;
1561   }
1562 
1563   // Model the effect of changing the input state of the block MBB to
1564   // AvailableInfo.  We're looking for two issues here; one legality,
1565   // one profitability.
1566   // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1567   //    may hit the end of the block with a different end state.  We can
1568   //    not make this change without reflowing later blocks as well.
1569   // 2) If we don't actually remove a transition, inserting a vsetvli
1570   //    into the predecessor block would be correct, but unprofitable.
1571   VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1572   VSETVLIInfo CurInfo = AvailableInfo;
1573   int TransitionsRemoved = 0;
1574   for (const MachineInstr &MI : MBB) {
1575     const VSETVLIInfo LastInfo = CurInfo;
1576     const VSETVLIInfo LastOldInfo = OldInfo;
1577     transferBefore(CurInfo, MI);
1578     transferBefore(OldInfo, MI);
1579     if (CurInfo == LastInfo)
1580       TransitionsRemoved++;
1581     if (LastOldInfo == OldInfo)
1582       TransitionsRemoved--;
1583     transferAfter(CurInfo, MI);
1584     transferAfter(OldInfo, MI);
1585     if (CurInfo == OldInfo)
1586       // Convergence.  All transitions after this must match by construction.
1587       break;
1588   }
1589   if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1590     // Issues 1 and 2 above
1591     return;
1592 
1593   // Finally, update both data flow state and insert the actual vsetvli.
1594   // Doing both keeps the code in sync with the dataflow results, which
1595   // is critical for correctness of phase 3.
1596   auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1597   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1598                     << UnavailablePred->getName() << " with state "
1599                     << AvailableInfo << "\n");
1600   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1601   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1602 
1603   // Note there's an implicit assumption here that terminators never use
1604   // or modify VL or VTYPE.  Also, fallthrough will return end().
1605   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1606   insertVSETVLI(*UnavailablePred, InsertPt,
1607                 UnavailablePred->findDebugLoc(InsertPt),
1608                 AvailableInfo, OldExit);
1609 }
1610 
1611 // Return true if we can mutate PrevMI to match MI without changing any the
1612 // fields which would be observed.
canMutatePriorConfig(const MachineInstr & PrevMI,const MachineInstr & MI,const DemandedFields & Used) const1613 bool RISCVInsertVSETVLI::canMutatePriorConfig(
1614     const MachineInstr &PrevMI, const MachineInstr &MI,
1615     const DemandedFields &Used) const {
1616   // If the VL values aren't equal, return false if either a) the former is
1617   // demanded, or b) we can't rewrite the former to be the later for
1618   // implementation reasons.
1619   if (!RISCVInstrInfo::isVLPreservingConfig(MI)) {
1620     if (Used.VLAny)
1621       return false;
1622 
1623     if (Used.VLZeroness) {
1624       if (RISCVInstrInfo::isVLPreservingConfig(PrevMI))
1625         return false;
1626       if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1627                                                        LIS))
1628         return false;
1629     }
1630 
1631     auto &AVL = MI.getOperand(1);
1632 
1633     // If the AVL is a register, we need to make sure its definition is the same
1634     // at PrevMI as it was at MI.
1635     if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
1636       VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS);
1637       VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS);
1638       if (!VNI || !PrevVNI || VNI != PrevVNI)
1639         return false;
1640     }
1641   }
1642 
1643   assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1644   auto PriorVType = PrevMI.getOperand(2).getImm();
1645   auto VType = MI.getOperand(2).getImm();
1646   return areCompatibleVTYPEs(PriorVType, VType, Used);
1647 }
1648 
coalesceVSETVLIs(MachineBasicBlock & MBB) const1649 void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1650   MachineInstr *NextMI = nullptr;
1651   // We can have arbitrary code in successors, so VL and VTYPE
1652   // must be considered demanded.
1653   DemandedFields Used;
1654   Used.demandVL();
1655   Used.demandVTYPE();
1656   SmallVector<MachineInstr*> ToDelete;
1657 
1658   auto dropAVLUse = [&](MachineOperand &MO) {
1659     if (!MO.isReg() || !MO.getReg().isVirtual())
1660       return;
1661     Register OldVLReg = MO.getReg();
1662     MO.setReg(RISCV::NoRegister);
1663 
1664     if (LIS)
1665       LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1666 
1667     MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1668     if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1669         MRI->use_nodbg_empty(OldVLReg))
1670       ToDelete.push_back(VLOpDef);
1671   };
1672 
1673   for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
1674 
1675     if (!RISCVInstrInfo::isVectorConfigInstr(MI)) {
1676       Used.doUnion(getDemanded(MI, ST));
1677       if (MI.isCall() || MI.isInlineAsm() ||
1678           MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1679           MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1680         NextMI = nullptr;
1681       continue;
1682     }
1683 
1684     if (!MI.getOperand(0).isDead())
1685       Used.demandVL();
1686 
1687     if (NextMI) {
1688       if (!Used.usedVL() && !Used.usedVTYPE()) {
1689         dropAVLUse(MI.getOperand(1));
1690         if (LIS)
1691           LIS->RemoveMachineInstrFromMaps(MI);
1692         MI.eraseFromParent();
1693         NumCoalescedVSETVL++;
1694         // Leave NextMI unchanged
1695         continue;
1696       }
1697 
1698       if (canMutatePriorConfig(MI, *NextMI, Used)) {
1699         if (!RISCVInstrInfo::isVLPreservingConfig(*NextMI)) {
1700           Register DefReg = NextMI->getOperand(0).getReg();
1701 
1702           MI.getOperand(0).setReg(DefReg);
1703           MI.getOperand(0).setIsDead(false);
1704 
1705           // Move the AVL from NextMI to MI
1706           dropAVLUse(MI.getOperand(1));
1707           if (NextMI->getOperand(1).isImm())
1708             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1709           else
1710             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1711                                               false);
1712           dropAVLUse(NextMI->getOperand(1));
1713 
1714           // The def of DefReg moved to MI, so extend the LiveInterval up to
1715           // it.
1716           if (DefReg.isVirtual() && LIS) {
1717             LiveInterval &DefLI = LIS->getInterval(DefReg);
1718             SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1719             SlotIndex NextMISlot =
1720                 LIS->getInstructionIndex(*NextMI).getRegSlot();
1721             VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
1722             LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
1723             DefLI.addSegment(S);
1724             DefVNI->def = MISlot;
1725             // Mark DefLI as spillable if it was previously unspillable
1726             DefLI.setWeight(0);
1727 
1728             // DefReg may have had no uses, in which case we need to shrink
1729             // the LiveInterval up to MI.
1730             LIS->shrinkToUses(&DefLI);
1731           }
1732 
1733           MI.setDesc(NextMI->getDesc());
1734         }
1735         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1736 
1737         dropAVLUse(NextMI->getOperand(1));
1738         if (LIS)
1739           LIS->RemoveMachineInstrFromMaps(*NextMI);
1740         NextMI->eraseFromParent();
1741         NumCoalescedVSETVL++;
1742         // fallthrough
1743       }
1744     }
1745     NextMI = &MI;
1746     Used = getDemanded(MI, ST);
1747   }
1748 
1749   // Loop over the dead AVL values, and delete them now.  This has
1750   // to be outside the above loop to avoid invalidating iterators.
1751   for (auto *MI : ToDelete) {
1752     if (LIS) {
1753       LIS->removeInterval(MI->getOperand(0).getReg());
1754       LIS->RemoveMachineInstrFromMaps(*MI);
1755     }
1756     MI->eraseFromParent();
1757   }
1758 }
1759 
insertReadVL(MachineBasicBlock & MBB)1760 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1761   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1762     MachineInstr &MI = *I++;
1763     if (RISCVInstrInfo::isFaultOnlyFirstLoad(MI)) {
1764       Register VLOutput = MI.getOperand(1).getReg();
1765       assert(VLOutput.isVirtual());
1766       if (!MI.getOperand(1).isDead()) {
1767         auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1768                                 TII->get(RISCV::PseudoReadVL), VLOutput);
1769         // Move the LiveInterval's definition down to PseudoReadVL.
1770         if (LIS) {
1771           SlotIndex NewDefSI =
1772               LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1773           LiveInterval &DefLI = LIS->getInterval(VLOutput);
1774           LiveRange::Segment *DefSeg = DefLI.getSegmentContaining(NewDefSI);
1775           VNInfo *DefVNI = DefLI.getVNInfoAt(DefSeg->start);
1776           DefLI.removeSegment(DefSeg->start, NewDefSI);
1777           DefVNI->def = NewDefSI;
1778         }
1779       }
1780       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1781       MI.getOperand(1).setReg(RISCV::X0);
1782       MI.addRegisterDefined(RISCV::VL, MRI->getTargetRegisterInfo());
1783     }
1784   }
1785 }
1786 
runOnMachineFunction(MachineFunction & MF)1787 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1788   // Skip if the vector extension is not enabled.
1789   ST = &MF.getSubtarget<RISCVSubtarget>();
1790   if (!ST->hasVInstructions())
1791     return false;
1792 
1793   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1794 
1795   TII = ST->getInstrInfo();
1796   MRI = &MF.getRegInfo();
1797   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1798   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1799 
1800   assert(BlockInfo.empty() && "Expect empty block infos");
1801   BlockInfo.resize(MF.getNumBlockIDs());
1802 
1803   bool HaveVectorOp = false;
1804 
1805   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1806   for (const MachineBasicBlock &MBB : MF) {
1807     VSETVLIInfo TmpStatus;
1808     HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1809     // Initial exit state is whatever change we found in the block.
1810     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1811     BBInfo.Exit = TmpStatus;
1812     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1813                       << " is " << BBInfo.Exit << "\n");
1814 
1815   }
1816 
1817   // If we didn't find any instructions that need VSETVLI, we're done.
1818   if (!HaveVectorOp) {
1819     BlockInfo.clear();
1820     return false;
1821   }
1822 
1823   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1824   // blocks to the list here, but will also add any that need to be revisited
1825   // during Phase 2 processing.
1826   for (const MachineBasicBlock &MBB : MF) {
1827     WorkList.push(&MBB);
1828     BlockInfo[MBB.getNumber()].InQueue = true;
1829   }
1830   while (!WorkList.empty()) {
1831     const MachineBasicBlock &MBB = *WorkList.front();
1832     WorkList.pop();
1833     computeIncomingVLVTYPE(MBB);
1834   }
1835 
1836   // Perform partial redundancy elimination of vsetvli transitions.
1837   for (MachineBasicBlock &MBB : MF)
1838     doPRE(MBB);
1839 
1840   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1841   // Phase 2 information to avoid adding vsetvlis before the first vector
1842   // instruction in the block if the VL/VTYPE is satisfied by its
1843   // predecessors.
1844   for (MachineBasicBlock &MBB : MF)
1845     emitVSETVLIs(MBB);
1846 
1847   // Now that all vsetvlis are explicit, go through and do block local
1848   // DSE and peephole based demanded fields based transforms.  Note that
1849   // this *must* be done outside the main dataflow so long as we allow
1850   // any cross block analysis within the dataflow.  We can't have both
1851   // demanded fields based mutation and non-local analysis in the
1852   // dataflow at the same time without introducing inconsistencies.
1853   // We're visiting blocks from the bottom up because a VSETVLI in the
1854   // earlier block might become dead when its uses in later blocks are
1855   // optimized away.
1856   for (MachineBasicBlock *MBB : post_order(&MF))
1857     coalesceVSETVLIs(*MBB);
1858 
1859   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1860   // of VLEFF/VLSEGFF.
1861   for (MachineBasicBlock &MBB : MF)
1862     insertReadVL(MBB);
1863 
1864   BlockInfo.clear();
1865   return HaveVectorOp;
1866 }
1867 
1868 /// Returns an instance of the Insert VSETVLI pass.
createRISCVInsertVSETVLIPass()1869 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1870   return new RISCVInsertVSETVLI();
1871 }
1872