xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a function pass that inserts VSETVLI instructions where
10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11 // instructions.
12 //
13 // This pass consists of 3 phases:
14 //
15 // Phase 1 collects how each basic block affects VL/VTYPE.
16 //
17 // Phase 2 uses the information from phase 1 to do a data flow analysis to
18 // propagate the VL/VTYPE changes through the function. This gives us the
19 // VL/VTYPE at the start of each basic block.
20 //
21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from
22 // phase 2 is used to prevent inserting a VSETVLI before the first vector
23 // instruction in the block if possible.
24 //
25 //===----------------------------------------------------------------------===//
26 
27 #include "RISCV.h"
28 #include "RISCVSubtarget.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/CodeGen/LiveDebugVariables.h"
31 #include "llvm/CodeGen/LiveIntervals.h"
32 #include "llvm/CodeGen/LiveStacks.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include <queue>
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "riscv-insert-vsetvli"
38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39 #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40 
41 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43 
44 namespace {
45 
46 /// Given a virtual register \p Reg, return the corresponding VNInfo for it.
47 /// This will return nullptr if the virtual register is an implicit_def or
48 /// if LiveIntervals is not available.
getVNInfoFromReg(Register Reg,const MachineInstr & MI,const LiveIntervals * LIS)49 static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
50                                 const LiveIntervals *LIS) {
51   assert(Reg.isVirtual());
52   if (!LIS)
53     return nullptr;
54   auto &LI = LIS->getInterval(Reg);
55   SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
56   return LI.getVNInfoBefore(SI);
57 }
58 
getVLOpNum(const MachineInstr & MI)59 static unsigned getVLOpNum(const MachineInstr &MI) {
60   return RISCVII::getVLOpNum(MI.getDesc());
61 }
62 
getSEWOpNum(const MachineInstr & MI)63 static unsigned getSEWOpNum(const MachineInstr &MI) {
64   return RISCVII::getSEWOpNum(MI.getDesc());
65 }
66 
isVectorConfigInstr(const MachineInstr & MI)67 static bool isVectorConfigInstr(const MachineInstr &MI) {
68   return MI.getOpcode() == RISCV::PseudoVSETVLI ||
69          MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
70          MI.getOpcode() == RISCV::PseudoVSETIVLI;
71 }
72 
73 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
74 /// VL and only sets VTYPE.
isVLPreservingConfig(const MachineInstr & MI)75 static bool isVLPreservingConfig(const MachineInstr &MI) {
76   if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
77     return false;
78   assert(RISCV::X0 == MI.getOperand(1).getReg());
79   return RISCV::X0 == MI.getOperand(0).getReg();
80 }
81 
isFloatScalarMoveOrScalarSplatInstr(const MachineInstr & MI)82 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
83   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
84   default:
85     return false;
86   case RISCV::VFMV_S_F:
87   case RISCV::VFMV_V_F:
88     return true;
89   }
90 }
91 
isScalarExtractInstr(const MachineInstr & MI)92 static bool isScalarExtractInstr(const MachineInstr &MI) {
93   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
94   default:
95     return false;
96   case RISCV::VMV_X_S:
97   case RISCV::VFMV_F_S:
98     return true;
99   }
100 }
101 
isScalarInsertInstr(const MachineInstr & MI)102 static bool isScalarInsertInstr(const MachineInstr &MI) {
103   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
104   default:
105     return false;
106   case RISCV::VMV_S_X:
107   case RISCV::VFMV_S_F:
108     return true;
109   }
110 }
111 
isScalarSplatInstr(const MachineInstr & MI)112 static bool isScalarSplatInstr(const MachineInstr &MI) {
113   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
114   default:
115     return false;
116   case RISCV::VMV_V_I:
117   case RISCV::VMV_V_X:
118   case RISCV::VFMV_V_F:
119     return true;
120   }
121 }
122 
isVSlideInstr(const MachineInstr & MI)123 static bool isVSlideInstr(const MachineInstr &MI) {
124   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
125   default:
126     return false;
127   case RISCV::VSLIDEDOWN_VX:
128   case RISCV::VSLIDEDOWN_VI:
129   case RISCV::VSLIDEUP_VX:
130   case RISCV::VSLIDEUP_VI:
131     return true;
132   }
133 }
134 
135 /// Get the EEW for a load or store instruction.  Return std::nullopt if MI is
136 /// not a load or store which ignores SEW.
getEEWForLoadStore(const MachineInstr & MI)137 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
138   switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
139   default:
140     return std::nullopt;
141   case RISCV::VLE8_V:
142   case RISCV::VLSE8_V:
143   case RISCV::VSE8_V:
144   case RISCV::VSSE8_V:
145     return 8;
146   case RISCV::VLE16_V:
147   case RISCV::VLSE16_V:
148   case RISCV::VSE16_V:
149   case RISCV::VSSE16_V:
150     return 16;
151   case RISCV::VLE32_V:
152   case RISCV::VLSE32_V:
153   case RISCV::VSE32_V:
154   case RISCV::VSSE32_V:
155     return 32;
156   case RISCV::VLE64_V:
157   case RISCV::VLSE64_V:
158   case RISCV::VSE64_V:
159   case RISCV::VSSE64_V:
160     return 64;
161   }
162 }
163 
isNonZeroLoadImmediate(const MachineInstr & MI)164 static bool isNonZeroLoadImmediate(const MachineInstr &MI) {
165   return MI.getOpcode() == RISCV::ADDI &&
166     MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
167     MI.getOperand(1).getReg() == RISCV::X0 &&
168     MI.getOperand(2).getImm() != 0;
169 }
170 
171 /// Return true if this is an operation on mask registers.  Note that
172 /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
isMaskRegOp(const MachineInstr & MI)173 static bool isMaskRegOp(const MachineInstr &MI) {
174   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
175     return false;
176   const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
177   // A Log2SEW of 0 is an operation on mask registers only.
178   return Log2SEW == 0;
179 }
180 
181 /// Return true if the inactive elements in the result are entirely undefined.
182 /// Note that this is different from "agnostic" as defined by the vector
183 /// specification.  Agnostic requires each lane to either be undisturbed, or
184 /// take the value -1; no other value is allowed.
hasUndefinedMergeOp(const MachineInstr & MI)185 static bool hasUndefinedMergeOp(const MachineInstr &MI) {
186 
187   unsigned UseOpIdx;
188   if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
189     // If there is no passthrough operand, then the pass through
190     // lanes are undefined.
191     return true;
192 
193   // All undefined passthrus should be $noreg: see
194   // RISCVDAGToDAGISel::doPeepholeNoRegPassThru
195   const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
196   return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
197 }
198 
199 /// Which subfields of VL or VTYPE have values we need to preserve?
200 struct DemandedFields {
201   // Some unknown property of VL is used.  If demanded, must preserve entire
202   // value.
203   bool VLAny = false;
204   // Only zero vs non-zero is used. If demanded, can change non-zero values.
205   bool VLZeroness = false;
206   // What properties of SEW we need to preserve.
207   enum : uint8_t {
208     SEWEqual = 3,              // The exact value of SEW needs to be preserved.
209     SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
210                                // than or equal to the original value.
211     SEWGreaterThanOrEqualAndLessThan64 =
212         1,      // SEW can be changed as long as it's greater
213                 // than or equal to the original value, but must be less
214                 // than 64.
215     SEWNone = 0 // We don't need to preserve SEW at all.
216   } SEW = SEWNone;
217   enum : uint8_t {
218     LMULEqual = 2, // The exact value of LMUL needs to be preserved.
219     LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1.
220     LMULNone = 0                 // We don't need to preserve LMUL at all.
221   } LMUL = LMULNone;
222   bool SEWLMULRatio = false;
223   bool TailPolicy = false;
224   bool MaskPolicy = false;
225 
226   // Return true if any part of VTYPE was used
usedVTYPE__anoncddf45c50111::DemandedFields227   bool usedVTYPE() const {
228     return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
229   }
230 
231   // Return true if any property of VL was used
usedVL__anoncddf45c50111::DemandedFields232   bool usedVL() {
233     return VLAny || VLZeroness;
234   }
235 
236   // Mark all VTYPE subfields and properties as demanded
demandVTYPE__anoncddf45c50111::DemandedFields237   void demandVTYPE() {
238     SEW = SEWEqual;
239     LMUL = LMULEqual;
240     SEWLMULRatio = true;
241     TailPolicy = true;
242     MaskPolicy = true;
243   }
244 
245   // Mark all VL properties as demanded
demandVL__anoncddf45c50111::DemandedFields246   void demandVL() {
247     VLAny = true;
248     VLZeroness = true;
249   }
250 
all__anoncddf45c50111::DemandedFields251   static DemandedFields all() {
252     DemandedFields DF;
253     DF.demandVTYPE();
254     DF.demandVL();
255     return DF;
256   }
257 
258   // Make this the result of demanding both the fields in this and B.
doUnion__anoncddf45c50111::DemandedFields259   void doUnion(const DemandedFields &B) {
260     VLAny |= B.VLAny;
261     VLZeroness |= B.VLZeroness;
262     SEW = std::max(SEW, B.SEW);
263     LMUL = std::max(LMUL, B.LMUL);
264     SEWLMULRatio |= B.SEWLMULRatio;
265     TailPolicy |= B.TailPolicy;
266     MaskPolicy |= B.MaskPolicy;
267   }
268 
269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270   /// Support for debugging, callable in GDB: V->dump()
dump__anoncddf45c50111::DemandedFields271   LLVM_DUMP_METHOD void dump() const {
272     print(dbgs());
273     dbgs() << "\n";
274   }
275 
276   /// Implement operator<<.
print__anoncddf45c50111::DemandedFields277   void print(raw_ostream &OS) const {
278     OS << "{";
279     OS << "VLAny=" << VLAny << ", ";
280     OS << "VLZeroness=" << VLZeroness << ", ";
281     OS << "SEW=";
282     switch (SEW) {
283     case SEWEqual:
284       OS << "SEWEqual";
285       break;
286     case SEWGreaterThanOrEqual:
287       OS << "SEWGreaterThanOrEqual";
288       break;
289     case SEWGreaterThanOrEqualAndLessThan64:
290       OS << "SEWGreaterThanOrEqualAndLessThan64";
291       break;
292     case SEWNone:
293       OS << "SEWNone";
294       break;
295     };
296     OS << ", ";
297     OS << "LMUL=";
298     switch (LMUL) {
299     case LMULEqual:
300       OS << "LMULEqual";
301       break;
302     case LMULLessThanOrEqualToM1:
303       OS << "LMULLessThanOrEqualToM1";
304       break;
305     case LMULNone:
306       OS << "LMULNone";
307       break;
308     };
309     OS << ", ";
310     OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
311     OS << "TailPolicy=" << TailPolicy << ", ";
312     OS << "MaskPolicy=" << MaskPolicy;
313     OS << "}";
314   }
315 #endif
316 };
317 
318 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
319 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const DemandedFields & DF)320 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
321   DF.print(OS);
322   return OS;
323 }
324 #endif
325 
isLMUL1OrSmaller(RISCVII::VLMUL LMUL)326 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
327   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
328   return Fractional || LMul == 1;
329 }
330 
331 /// Return true if moving from CurVType to NewVType is
332 /// indistinguishable from the perspective of an instruction (or set
333 /// of instructions) which use only the Used subfields and properties.
areCompatibleVTYPEs(uint64_t CurVType,uint64_t NewVType,const DemandedFields & Used)334 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
335                                 const DemandedFields &Used) {
336   switch (Used.SEW) {
337   case DemandedFields::SEWNone:
338     break;
339   case DemandedFields::SEWEqual:
340     if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
341       return false;
342     break;
343   case DemandedFields::SEWGreaterThanOrEqual:
344     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
345       return false;
346     break;
347   case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
348     if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) ||
349         RISCVVType::getSEW(NewVType) >= 64)
350       return false;
351     break;
352   }
353 
354   switch (Used.LMUL) {
355   case DemandedFields::LMULNone:
356     break;
357   case DemandedFields::LMULEqual:
358     if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
359       return false;
360     break;
361   case DemandedFields::LMULLessThanOrEqualToM1:
362     if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType)))
363       return false;
364     break;
365   }
366 
367   if (Used.SEWLMULRatio) {
368     auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
369                                               RISCVVType::getVLMUL(CurVType));
370     auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
371                                               RISCVVType::getVLMUL(NewVType));
372     if (Ratio1 != Ratio2)
373       return false;
374   }
375 
376   if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
377                              RISCVVType::isTailAgnostic(NewVType))
378     return false;
379   if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
380                              RISCVVType::isMaskAgnostic(NewVType))
381     return false;
382   return true;
383 }
384 
385 /// Return the fields and properties demanded by the provided instruction.
getDemanded(const MachineInstr & MI,const RISCVSubtarget * ST)386 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
387   // This function works in coalesceVSETVLI too. We can still use the value of a
388   // SEW, VL, or Policy operand even though it might not be the exact value in
389   // the VL or VTYPE, since we only care about what the instruction originally
390   // demanded.
391 
392   // Most instructions don't use any of these subfeilds.
393   DemandedFields Res;
394   // Start conservative if registers are used
395   if (MI.isCall() || MI.isInlineAsm() ||
396       MI.readsRegister(RISCV::VL, /*TRI=*/nullptr))
397     Res.demandVL();
398   if (MI.isCall() || MI.isInlineAsm() ||
399       MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr))
400     Res.demandVTYPE();
401   // Start conservative on the unlowered form too
402   uint64_t TSFlags = MI.getDesc().TSFlags;
403   if (RISCVII::hasSEWOp(TSFlags)) {
404     Res.demandVTYPE();
405     if (RISCVII::hasVLOp(TSFlags))
406       if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
407           !VLOp.isReg() || !VLOp.isUndef())
408         Res.demandVL();
409 
410     // Behavior is independent of mask policy.
411     if (!RISCVII::usesMaskPolicy(TSFlags))
412       Res.MaskPolicy = false;
413   }
414 
415   // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
416   // They instead demand the ratio of the two which is used in computing
417   // EMUL, but which allows us the flexibility to change SEW and LMUL
418   // provided we don't change the ratio.
419   // Note: We assume that the instructions initial SEW is the EEW encoded
420   // in the opcode.  This is asserted when constructing the VSETVLIInfo.
421   if (getEEWForLoadStore(MI)) {
422     Res.SEW = DemandedFields::SEWNone;
423     Res.LMUL = DemandedFields::LMULNone;
424   }
425 
426   // Store instructions don't use the policy fields.
427   if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
428     Res.TailPolicy = false;
429     Res.MaskPolicy = false;
430   }
431 
432   // If this is a mask reg operation, it only cares about VLMAX.
433   // TODO: Possible extensions to this logic
434   // * Probably ok if available VLMax is larger than demanded
435   // * The policy bits can probably be ignored..
436   if (isMaskRegOp(MI)) {
437     Res.SEW = DemandedFields::SEWNone;
438     Res.LMUL = DemandedFields::LMULNone;
439   }
440 
441   // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
442   if (isScalarInsertInstr(MI)) {
443     Res.LMUL = DemandedFields::LMULNone;
444     Res.SEWLMULRatio = false;
445     Res.VLAny = false;
446     // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
447     // need to preserve any other bits and are thus compatible with any larger,
448     // etype and can disregard policy bits.  Warning: It's tempting to try doing
449     // this for any tail agnostic operation, but we can't as TA requires
450     // tail lanes to either be the original value or -1.  We are writing
451     // unknown bits to the lanes here.
452     if (hasUndefinedMergeOp(MI)) {
453       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
454         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
455       else
456         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
457       Res.TailPolicy = false;
458     }
459   }
460 
461   // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
462   if (isScalarExtractInstr(MI)) {
463     assert(!RISCVII::hasVLOp(TSFlags));
464     Res.LMUL = DemandedFields::LMULNone;
465     Res.SEWLMULRatio = false;
466     Res.TailPolicy = false;
467     Res.MaskPolicy = false;
468   }
469 
470   if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
471     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
472     // A slidedown/slideup with an *undefined* merge op can freely clobber
473     // elements not copied from the source vector (e.g. masked off, tail, or
474     // slideup's prefix). Notes:
475     // * We can't modify SEW here since the slide amount is in units of SEW.
476     // * VL=1 is special only because we have existing support for zero vs
477     //   non-zero VL.  We could generalize this if we had a VL > C predicate.
478     // * The LMUL1 restriction is for machines whose latency may depend on VL.
479     // * As above, this is only legal for tail "undefined" not "agnostic".
480     if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
481         hasUndefinedMergeOp(MI)) {
482       Res.VLAny = false;
483       Res.VLZeroness = true;
484       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
485       Res.TailPolicy = false;
486     }
487 
488     // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the
489     // same semantically as vmv.s.x.  This is particularly useful since we don't
490     // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in
491     // it's place. Since a splat is non-constant time in LMUL, we do need to be
492     // careful to not increase the number of active vector registers (unlike for
493     // vmv.s.x.)
494     if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 &&
495         hasUndefinedMergeOp(MI)) {
496       Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1;
497       Res.SEWLMULRatio = false;
498       Res.VLAny = false;
499       if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
500         Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
501       else
502         Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
503       Res.TailPolicy = false;
504     }
505   }
506 
507   return Res;
508 }
509 
510 /// Defines the abstract state with which the forward dataflow models the
511 /// values of the VL and VTYPE registers after insertion.
512 class VSETVLIInfo {
513   struct AVLDef {
514     // Every AVLDef should have a VNInfo, unless we're running without
515     // LiveIntervals in which case this will be nullptr.
516     const VNInfo *ValNo;
517     Register DefReg;
518   };
519   union {
520     AVLDef AVLRegDef;
521     unsigned AVLImm;
522   };
523 
524   enum : uint8_t {
525     Uninitialized,
526     AVLIsReg,
527     AVLIsImm,
528     AVLIsVLMAX,
529     Unknown, // AVL and VTYPE are fully unknown
530   } State = Uninitialized;
531 
532   // Fields from VTYPE.
533   RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
534   uint8_t SEW = 0;
535   uint8_t TailAgnostic : 1;
536   uint8_t MaskAgnostic : 1;
537   uint8_t SEWLMULRatioOnly : 1;
538 
539 public:
VSETVLIInfo()540   VSETVLIInfo()
541       : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
542         SEWLMULRatioOnly(false) {}
543 
getUnknown()544   static VSETVLIInfo getUnknown() {
545     VSETVLIInfo Info;
546     Info.setUnknown();
547     return Info;
548   }
549 
isValid() const550   bool isValid() const { return State != Uninitialized; }
setUnknown()551   void setUnknown() { State = Unknown; }
isUnknown() const552   bool isUnknown() const { return State == Unknown; }
553 
setAVLRegDef(const VNInfo * VNInfo,Register AVLReg)554   void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
555     assert(AVLReg.isVirtual());
556     AVLRegDef.ValNo = VNInfo;
557     AVLRegDef.DefReg = AVLReg;
558     State = AVLIsReg;
559   }
560 
setAVLImm(unsigned Imm)561   void setAVLImm(unsigned Imm) {
562     AVLImm = Imm;
563     State = AVLIsImm;
564   }
565 
setAVLVLMAX()566   void setAVLVLMAX() { State = AVLIsVLMAX; }
567 
hasAVLImm() const568   bool hasAVLImm() const { return State == AVLIsImm; }
hasAVLReg() const569   bool hasAVLReg() const { return State == AVLIsReg; }
hasAVLVLMAX() const570   bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
getAVLReg() const571   Register getAVLReg() const {
572     assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
573     return AVLRegDef.DefReg;
574   }
getAVLImm() const575   unsigned getAVLImm() const {
576     assert(hasAVLImm());
577     return AVLImm;
578   }
getAVLVNInfo() const579   const VNInfo *getAVLVNInfo() const {
580     assert(hasAVLReg());
581     return AVLRegDef.ValNo;
582   }
583   // Most AVLIsReg infos will have a single defining MachineInstr, unless it was
584   // a PHI node. In that case getAVLVNInfo()->def will point to the block
585   // boundary slot and this will return nullptr.  If LiveIntervals isn't
586   // available, nullptr is also returned.
getAVLDefMI(const LiveIntervals * LIS) const587   const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
588     assert(hasAVLReg());
589     if (!LIS || getAVLVNInfo()->isPHIDef())
590       return nullptr;
591     auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
592     assert(MI);
593     return MI;
594   }
595 
setAVL(VSETVLIInfo Info)596   void setAVL(VSETVLIInfo Info) {
597     assert(Info.isValid());
598     if (Info.isUnknown())
599       setUnknown();
600     else if (Info.hasAVLReg())
601       setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
602     else if (Info.hasAVLVLMAX())
603       setAVLVLMAX();
604     else {
605       assert(Info.hasAVLImm());
606       setAVLImm(Info.getAVLImm());
607     }
608   }
609 
getSEW() const610   unsigned getSEW() const { return SEW; }
getVLMUL() const611   RISCVII::VLMUL getVLMUL() const { return VLMul; }
getTailAgnostic() const612   bool getTailAgnostic() const { return TailAgnostic; }
getMaskAgnostic() const613   bool getMaskAgnostic() const { return MaskAgnostic; }
614 
hasNonZeroAVL(const LiveIntervals * LIS) const615   bool hasNonZeroAVL(const LiveIntervals *LIS) const {
616     if (hasAVLImm())
617       return getAVLImm() > 0;
618     if (hasAVLReg()) {
619       if (auto *DefMI = getAVLDefMI(LIS))
620         return isNonZeroLoadImmediate(*DefMI);
621     }
622     if (hasAVLVLMAX())
623       return true;
624     return false;
625   }
626 
hasEquallyZeroAVL(const VSETVLIInfo & Other,const LiveIntervals * LIS) const627   bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
628                          const LiveIntervals *LIS) const {
629     if (hasSameAVL(Other))
630       return true;
631     return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
632   }
633 
hasSameAVLLatticeValue(const VSETVLIInfo & Other) const634   bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
635     if (hasAVLReg() && Other.hasAVLReg()) {
636       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
637              "we either have intervals or we don't");
638       if (!getAVLVNInfo())
639         return getAVLReg() == Other.getAVLReg();
640       return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
641              getAVLReg() == Other.getAVLReg();
642     }
643 
644     if (hasAVLImm() && Other.hasAVLImm())
645       return getAVLImm() == Other.getAVLImm();
646 
647     if (hasAVLVLMAX())
648       return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
649 
650     return false;
651   }
652 
653   // Return true if the two lattice values are guaranteed to have
654   // the same AVL value at runtime.
hasSameAVL(const VSETVLIInfo & Other) const655   bool hasSameAVL(const VSETVLIInfo &Other) const {
656     // Without LiveIntervals, we don't know which instruction defines a
657     // register.  Since a register may be redefined, this means all AVLIsReg
658     // states must be treated as possibly distinct.
659     if (hasAVLReg() && Other.hasAVLReg()) {
660       assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
661              "we either have intervals or we don't");
662       if (!getAVLVNInfo())
663         return false;
664     }
665     return hasSameAVLLatticeValue(Other);
666   }
667 
setVTYPE(unsigned VType)668   void setVTYPE(unsigned VType) {
669     assert(isValid() && !isUnknown() &&
670            "Can't set VTYPE for uninitialized or unknown");
671     VLMul = RISCVVType::getVLMUL(VType);
672     SEW = RISCVVType::getSEW(VType);
673     TailAgnostic = RISCVVType::isTailAgnostic(VType);
674     MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
675   }
setVTYPE(RISCVII::VLMUL L,unsigned S,bool TA,bool MA)676   void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
677     assert(isValid() && !isUnknown() &&
678            "Can't set VTYPE for uninitialized or unknown");
679     VLMul = L;
680     SEW = S;
681     TailAgnostic = TA;
682     MaskAgnostic = MA;
683   }
684 
setVLMul(RISCVII::VLMUL VLMul)685   void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
686 
encodeVTYPE() const687   unsigned encodeVTYPE() const {
688     assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
689            "Can't encode VTYPE for uninitialized or unknown");
690     return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
691   }
692 
hasSEWLMULRatioOnly() const693   bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
694 
hasSameVTYPE(const VSETVLIInfo & Other) const695   bool hasSameVTYPE(const VSETVLIInfo &Other) const {
696     assert(isValid() && Other.isValid() &&
697            "Can't compare invalid VSETVLIInfos");
698     assert(!isUnknown() && !Other.isUnknown() &&
699            "Can't compare VTYPE in unknown state");
700     assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
701            "Can't compare when only LMUL/SEW ratio is valid.");
702     return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
703            std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
704                     Other.MaskAgnostic);
705   }
706 
getSEWLMULRatio() const707   unsigned getSEWLMULRatio() const {
708     assert(isValid() && !isUnknown() &&
709            "Can't use VTYPE for uninitialized or unknown");
710     return RISCVVType::getSEWLMULRatio(SEW, VLMul);
711   }
712 
713   // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
714   // Note that having the same VLMAX ensures that both share the same
715   // function from AVL to VL; that is, they must produce the same VL value
716   // for any given AVL value.
hasSameVLMAX(const VSETVLIInfo & Other) const717   bool hasSameVLMAX(const VSETVLIInfo &Other) const {
718     assert(isValid() && Other.isValid() &&
719            "Can't compare invalid VSETVLIInfos");
720     assert(!isUnknown() && !Other.isUnknown() &&
721            "Can't compare VTYPE in unknown state");
722     return getSEWLMULRatio() == Other.getSEWLMULRatio();
723   }
724 
hasCompatibleVTYPE(const DemandedFields & Used,const VSETVLIInfo & Require) const725   bool hasCompatibleVTYPE(const DemandedFields &Used,
726                           const VSETVLIInfo &Require) const {
727     return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
728   }
729 
730   // Determine whether the vector instructions requirements represented by
731   // Require are compatible with the previous vsetvli instruction represented
732   // by this.  MI is the instruction whose requirements we're considering.
isCompatible(const DemandedFields & Used,const VSETVLIInfo & Require,const LiveIntervals * LIS) const733   bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
734                     const LiveIntervals *LIS) const {
735     assert(isValid() && Require.isValid() &&
736            "Can't compare invalid VSETVLIInfos");
737     // Nothing is compatible with Unknown.
738     if (isUnknown() || Require.isUnknown())
739       return false;
740 
741     // If only our VLMAX ratio is valid, then this isn't compatible.
742     if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly)
743       return false;
744 
745     if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
746       return false;
747 
748     if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
749       return false;
750 
751     return hasCompatibleVTYPE(Used, Require);
752   }
753 
operator ==(const VSETVLIInfo & Other) const754   bool operator==(const VSETVLIInfo &Other) const {
755     // Uninitialized is only equal to another Uninitialized.
756     if (!isValid())
757       return !Other.isValid();
758     if (!Other.isValid())
759       return !isValid();
760 
761     // Unknown is only equal to another Unknown.
762     if (isUnknown())
763       return Other.isUnknown();
764     if (Other.isUnknown())
765       return isUnknown();
766 
767     if (!hasSameAVLLatticeValue(Other))
768       return false;
769 
770     // If the SEWLMULRatioOnly bits are different, then they aren't equal.
771     if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
772       return false;
773 
774     // If only the VLMAX is valid, check that it is the same.
775     if (SEWLMULRatioOnly)
776       return hasSameVLMAX(Other);
777 
778     // If the full VTYPE is valid, check that it is the same.
779     return hasSameVTYPE(Other);
780   }
781 
operator !=(const VSETVLIInfo & Other) const782   bool operator!=(const VSETVLIInfo &Other) const {
783     return !(*this == Other);
784   }
785 
786   // Calculate the VSETVLIInfo visible to a block assuming this and Other are
787   // both predecessors.
intersect(const VSETVLIInfo & Other) const788   VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
789     // If the new value isn't valid, ignore it.
790     if (!Other.isValid())
791       return *this;
792 
793     // If this value isn't valid, this must be the first predecessor, use it.
794     if (!isValid())
795       return Other;
796 
797     // If either is unknown, the result is unknown.
798     if (isUnknown() || Other.isUnknown())
799       return VSETVLIInfo::getUnknown();
800 
801     // If we have an exact, match return this.
802     if (*this == Other)
803       return *this;
804 
805     // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
806     // return an SEW/LMUL ratio only value.
807     if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
808       VSETVLIInfo MergeInfo = *this;
809       MergeInfo.SEWLMULRatioOnly = true;
810       return MergeInfo;
811     }
812 
813     // Otherwise the result is unknown.
814     return VSETVLIInfo::getUnknown();
815   }
816 
817 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
818   /// Support for debugging, callable in GDB: V->dump()
dump() const819   LLVM_DUMP_METHOD void dump() const {
820     print(dbgs());
821     dbgs() << "\n";
822   }
823 
824   /// Implement operator<<.
825   /// @{
print(raw_ostream & OS) const826   void print(raw_ostream &OS) const {
827     OS << "{";
828     if (!isValid())
829       OS << "Uninitialized";
830     if (isUnknown())
831       OS << "unknown";
832     if (hasAVLReg())
833       OS << "AVLReg=" << llvm::printReg(getAVLReg());
834     if (hasAVLImm())
835       OS << "AVLImm=" << (unsigned)AVLImm;
836     if (hasAVLVLMAX())
837       OS << "AVLVLMAX";
838     OS << ", "
839        << "VLMul=" << (unsigned)VLMul << ", "
840        << "SEW=" << (unsigned)SEW << ", "
841        << "TailAgnostic=" << (bool)TailAgnostic << ", "
842        << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
843        << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
844   }
845 #endif
846 };
847 
848 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
849 LLVM_ATTRIBUTE_USED
operator <<(raw_ostream & OS,const VSETVLIInfo & V)850 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
851   V.print(OS);
852   return OS;
853 }
854 #endif
855 
856 struct BlockData {
857   // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
858   // block. Calculated in Phase 2.
859   VSETVLIInfo Exit;
860 
861   // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
862   // blocks. Calculated in Phase 2, and used by Phase 3.
863   VSETVLIInfo Pred;
864 
865   // Keeps track of whether the block is already in the queue.
866   bool InQueue = false;
867 
868   BlockData() = default;
869 };
870 
871 class RISCVInsertVSETVLI : public MachineFunctionPass {
872   const RISCVSubtarget *ST;
873   const TargetInstrInfo *TII;
874   MachineRegisterInfo *MRI;
875   // Possibly null!
876   LiveIntervals *LIS;
877 
878   std::vector<BlockData> BlockInfo;
879   std::queue<const MachineBasicBlock *> WorkList;
880 
881 public:
882   static char ID;
883 
RISCVInsertVSETVLI()884   RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
885   bool runOnMachineFunction(MachineFunction &MF) override;
886 
getAnalysisUsage(AnalysisUsage & AU) const887   void getAnalysisUsage(AnalysisUsage &AU) const override {
888     AU.setPreservesCFG();
889 
890     AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
891     AU.addPreserved<LiveIntervalsWrapperPass>();
892     AU.addPreserved<SlotIndexesWrapperPass>();
893     AU.addPreserved<LiveDebugVariables>();
894     AU.addPreserved<LiveStacks>();
895 
896     MachineFunctionPass::getAnalysisUsage(AU);
897   }
898 
getPassName() const899   StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
900 
901 private:
902   bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require,
903                    const VSETVLIInfo &CurInfo) const;
904   bool needVSETVLIPHI(const VSETVLIInfo &Require,
905                       const MachineBasicBlock &MBB) const;
906   void insertVSETVLI(MachineBasicBlock &MBB,
907                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
908                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
909 
910   void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
911   void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
912   bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
913                              VSETVLIInfo &Info) const;
914   void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
915   void emitVSETVLIs(MachineBasicBlock &MBB);
916   void doPRE(MachineBasicBlock &MBB);
917   void insertReadVL(MachineBasicBlock &MBB);
918 
919   bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
920                             const DemandedFields &Used) const;
921   void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
922 
923   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
924   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
925   void forwardVSETVLIAVL(VSETVLIInfo &Info) const;
926 };
927 
928 } // end anonymous namespace
929 
930 char RISCVInsertVSETVLI::ID = 0;
931 char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID;
932 
INITIALIZE_PASS(RISCVInsertVSETVLI,DEBUG_TYPE,RISCV_INSERT_VSETVLI_NAME,false,false)933 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
934                 false, false)
935 
936 // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can
937 // replace the AVL operand with the AVL of the defining vsetvli. E.g.
938 //
939 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
940 // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1
941 // ->
942 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
943 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1
944 void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const {
945   if (!Info.hasAVLReg())
946     return;
947   const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
948   if (!DefMI || !isVectorConfigInstr(*DefMI))
949     return;
950   VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI);
951   if (!DefInstrInfo.hasSameVLMAX(Info))
952     return;
953   Info.setAVL(DefInstrInfo);
954 }
955 
956 // Return a VSETVLIInfo representing the changes made by this VSETVLI or
957 // VSETIVLI instruction.
958 VSETVLIInfo
getInfoForVSETVLI(const MachineInstr & MI) const959 RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
960   VSETVLIInfo NewInfo;
961   if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
962     NewInfo.setAVLImm(MI.getOperand(1).getImm());
963   } else {
964     assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
965            MI.getOpcode() == RISCV::PseudoVSETVLIX0);
966     Register AVLReg = MI.getOperand(1).getReg();
967     assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
968            "Can't handle X0, X0 vsetvli yet");
969     if (AVLReg == RISCV::X0)
970       NewInfo.setAVLVLMAX();
971     else if (MI.getOperand(1).isUndef())
972       // Otherwise use an AVL of 1 to avoid depending on previous vl.
973       NewInfo.setAVLImm(1);
974     else {
975       VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
976       NewInfo.setAVLRegDef(VNI, AVLReg);
977     }
978   }
979   NewInfo.setVTYPE(MI.getOperand(2).getImm());
980 
981   forwardVSETVLIAVL(NewInfo);
982 
983   return NewInfo;
984 }
985 
computeVLMAX(unsigned VLEN,unsigned SEW,RISCVII::VLMUL VLMul)986 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
987                              RISCVII::VLMUL VLMul) {
988   auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul);
989   if (Fractional)
990     VLEN = VLEN / LMul;
991   else
992     VLEN = VLEN * LMul;
993   return VLEN/SEW;
994 }
995 
996 VSETVLIInfo
computeInfoForInstr(const MachineInstr & MI) const997 RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
998   VSETVLIInfo InstrInfo;
999   const uint64_t TSFlags = MI.getDesc().TSFlags;
1000 
1001   bool TailAgnostic = true;
1002   bool MaskAgnostic = true;
1003   if (!hasUndefinedMergeOp(MI)) {
1004     // Start with undisturbed.
1005     TailAgnostic = false;
1006     MaskAgnostic = false;
1007 
1008     // If there is a policy operand, use it.
1009     if (RISCVII::hasVecPolicyOp(TSFlags)) {
1010       const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
1011       uint64_t Policy = Op.getImm();
1012       assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
1013              "Invalid Policy Value");
1014       TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
1015       MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
1016     }
1017 
1018     // Some pseudo instructions force a tail agnostic policy despite having a
1019     // tied def.
1020     if (RISCVII::doesForceTailAgnostic(TSFlags))
1021       TailAgnostic = true;
1022 
1023     if (!RISCVII::usesMaskPolicy(TSFlags))
1024       MaskAgnostic = true;
1025   }
1026 
1027   RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
1028 
1029   unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
1030   // A Log2SEW of 0 is an operation on mask registers only.
1031   unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1032   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
1033 
1034   if (RISCVII::hasVLOp(TSFlags)) {
1035     const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1036     if (VLOp.isImm()) {
1037       int64_t Imm = VLOp.getImm();
1038       // Conver the VLMax sentintel to X0 register.
1039       if (Imm == RISCV::VLMaxSentinel) {
1040         // If we know the exact VLEN, see if we can use the constant encoding
1041         // for the VLMAX instead.  This reduces register pressure slightly.
1042         const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul);
1043         if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31)
1044           InstrInfo.setAVLImm(VLMAX);
1045         else
1046           InstrInfo.setAVLVLMAX();
1047       }
1048       else
1049         InstrInfo.setAVLImm(Imm);
1050     } else if (VLOp.isUndef()) {
1051       // Otherwise use an AVL of 1 to avoid depending on previous vl.
1052       InstrInfo.setAVLImm(1);
1053     } else {
1054       VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
1055       InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
1056     }
1057   } else {
1058     assert(isScalarExtractInstr(MI));
1059     // Pick a random value for state tracking purposes, will be ignored via
1060     // the demanded fields mechanism
1061     InstrInfo.setAVLImm(1);
1062   }
1063 #ifndef NDEBUG
1064   if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
1065     assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
1066   }
1067 #endif
1068   InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
1069 
1070   forwardVSETVLIAVL(InstrInfo);
1071 
1072   return InstrInfo;
1073 }
1074 
insertVSETVLI(MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertPt,DebugLoc DL,const VSETVLIInfo & Info,const VSETVLIInfo & PrevInfo)1075 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
1076                      MachineBasicBlock::iterator InsertPt, DebugLoc DL,
1077                      const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
1078 
1079   ++NumInsertedVSETVL;
1080   if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
1081     // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
1082     // VLMAX.
1083     if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
1084       auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1085                     .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1086                     .addReg(RISCV::X0, RegState::Kill)
1087                     .addImm(Info.encodeVTYPE())
1088                     .addReg(RISCV::VL, RegState::Implicit);
1089       if (LIS)
1090         LIS->InsertMachineInstrInMaps(*MI);
1091       return;
1092     }
1093 
1094     // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
1095     // it has the same VLMAX we want and the last VL/VTYPE we observed is the
1096     // same, we can use the X0, X0 form.
1097     if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
1098       if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS);
1099           DefMI && isVectorConfigInstr(*DefMI)) {
1100         VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1101         if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
1102           auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1103                         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1104                         .addReg(RISCV::X0, RegState::Kill)
1105                         .addImm(Info.encodeVTYPE())
1106                         .addReg(RISCV::VL, RegState::Implicit);
1107           if (LIS)
1108             LIS->InsertMachineInstrInMaps(*MI);
1109           return;
1110         }
1111       }
1112     }
1113   }
1114 
1115   if (Info.hasAVLImm()) {
1116     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1117                   .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1118                   .addImm(Info.getAVLImm())
1119                   .addImm(Info.encodeVTYPE());
1120     if (LIS)
1121       LIS->InsertMachineInstrInMaps(*MI);
1122     return;
1123   }
1124 
1125   if (Info.hasAVLVLMAX()) {
1126     Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1127     auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1128                   .addReg(DestReg, RegState::Define | RegState::Dead)
1129                   .addReg(RISCV::X0, RegState::Kill)
1130                   .addImm(Info.encodeVTYPE());
1131     if (LIS) {
1132       LIS->InsertMachineInstrInMaps(*MI);
1133       LIS->createAndComputeVirtRegInterval(DestReg);
1134     }
1135     return;
1136   }
1137 
1138   Register AVLReg = Info.getAVLReg();
1139   MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1140   auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
1141                 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1142                 .addReg(AVLReg)
1143                 .addImm(Info.encodeVTYPE());
1144   if (LIS) {
1145     LIS->InsertMachineInstrInMaps(*MI);
1146     LiveInterval &LI = LIS->getInterval(AVLReg);
1147     SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot();
1148     // If the AVL value isn't live at MI, do a quick check to see if it's easily
1149     // extendable. Otherwise, we need to copy it.
1150     if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) {
1151       if (!LI.liveAt(SI) && LI.containsOneValue())
1152         LIS->extendToIndices(LI, SI);
1153       else {
1154         Register AVLCopyReg =
1155             MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass);
1156         MachineBasicBlock::iterator II;
1157         if (Info.getAVLVNInfo()->isPHIDef())
1158           II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI();
1159         else {
1160           II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def);
1161           II = std::next(II);
1162         }
1163         assert(II.isValid());
1164         auto AVLCopy =
1165             BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg)
1166                 .addReg(AVLReg);
1167         LIS->InsertMachineInstrInMaps(*AVLCopy);
1168         MI->getOperand(1).setReg(AVLCopyReg);
1169         LIS->createAndComputeVirtRegInterval(AVLCopyReg);
1170       }
1171     }
1172   }
1173 }
1174 
1175 /// Return true if a VSETVLI is required to transition from CurInfo to Require
1176 /// given a set of DemandedFields \p Used.
needVSETVLI(const DemandedFields & Used,const VSETVLIInfo & Require,const VSETVLIInfo & CurInfo) const1177 bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used,
1178                                      const VSETVLIInfo &Require,
1179                                      const VSETVLIInfo &CurInfo) const {
1180   if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1181     return true;
1182 
1183   if (CurInfo.isCompatible(Used, Require, LIS))
1184     return false;
1185 
1186   return true;
1187 }
1188 
1189 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1190 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1191 // places.
adjustIncoming(VSETVLIInfo PrevInfo,VSETVLIInfo NewInfo,DemandedFields & Demanded)1192 static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1193                                   DemandedFields &Demanded) {
1194   VSETVLIInfo Info = NewInfo;
1195 
1196   if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1197       !PrevInfo.isUnknown()) {
1198     if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1199             PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
1200       Info.setVLMul(*NewVLMul);
1201     Demanded.LMUL = DemandedFields::LMULEqual;
1202   }
1203 
1204   return Info;
1205 }
1206 
1207 // Given an incoming state reaching MI, minimally modifies that state so that it
1208 // is compatible with MI. The resulting state is guaranteed to be semantically
1209 // legal for MI, but may not be the state requested by MI.
transferBefore(VSETVLIInfo & Info,const MachineInstr & MI) const1210 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1211                                         const MachineInstr &MI) const {
1212   if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1213     return;
1214 
1215   DemandedFields Demanded = getDemanded(MI, ST);
1216 
1217   const VSETVLIInfo NewInfo = computeInfoForInstr(MI);
1218   assert(NewInfo.isValid() && !NewInfo.isUnknown());
1219   if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info))
1220     return;
1221 
1222   const VSETVLIInfo PrevInfo = Info;
1223   if (!Info.isValid() || Info.isUnknown())
1224     Info = NewInfo;
1225 
1226   const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1227 
1228   // If MI only demands that VL has the same zeroness, we only need to set the
1229   // AVL if the zeroness differs.  This removes a vsetvli entirely if the types
1230   // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1231   // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1232   // variant, so we avoid the transform to prevent extending live range of an
1233   // avl register operand.
1234   // TODO: We can probably relax this for immediates.
1235   bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
1236                      IncomingInfo.hasSameVLMAX(PrevInfo);
1237   if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1238     Info.setAVL(IncomingInfo);
1239 
1240   Info.setVTYPE(
1241       ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1242           .getVLMUL(),
1243       ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1244       // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1245       // if needed.
1246       (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1247           IncomingInfo.getTailAgnostic(),
1248       (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1249           IncomingInfo.getMaskAgnostic());
1250 
1251   // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1252   // the AVL.
1253   if (Info.hasSEWLMULRatioOnly()) {
1254     VSETVLIInfo RatiolessInfo = IncomingInfo;
1255     RatiolessInfo.setAVL(Info);
1256     Info = RatiolessInfo;
1257   }
1258 }
1259 
1260 // Given a state with which we evaluated MI (see transferBefore above for why
1261 // this might be different that the state MI requested), modify the state to
1262 // reflect the changes MI might make.
transferAfter(VSETVLIInfo & Info,const MachineInstr & MI) const1263 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1264                                        const MachineInstr &MI) const {
1265   if (isVectorConfigInstr(MI)) {
1266     Info = getInfoForVSETVLI(MI);
1267     return;
1268   }
1269 
1270   if (RISCV::isFaultFirstLoad(MI)) {
1271     // Update AVL to vl-output of the fault first load.
1272     assert(MI.getOperand(1).getReg().isVirtual());
1273     if (LIS) {
1274       auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1275       SlotIndex SI =
1276           LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1277       VNInfo *VNI = LI.getVNInfoAt(SI);
1278       Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1279     } else
1280       Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
1281     return;
1282   }
1283 
1284   // If this is something that updates VL/VTYPE that we don't know about, set
1285   // the state to unknown.
1286   if (MI.isCall() || MI.isInlineAsm() ||
1287       MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1288       MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1289     Info = VSETVLIInfo::getUnknown();
1290 }
1291 
computeVLVTYPEChanges(const MachineBasicBlock & MBB,VSETVLIInfo & Info) const1292 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1293                                                VSETVLIInfo &Info) const {
1294   bool HadVectorOp = false;
1295 
1296   Info = BlockInfo[MBB.getNumber()].Pred;
1297   for (const MachineInstr &MI : MBB) {
1298     transferBefore(Info, MI);
1299 
1300     if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
1301       HadVectorOp = true;
1302 
1303     transferAfter(Info, MI);
1304   }
1305 
1306   return HadVectorOp;
1307 }
1308 
computeIncomingVLVTYPE(const MachineBasicBlock & MBB)1309 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1310 
1311   BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1312 
1313   BBInfo.InQueue = false;
1314 
1315   // Start with the previous entry so that we keep the most conservative state
1316   // we have ever found.
1317   VSETVLIInfo InInfo = BBInfo.Pred;
1318   if (MBB.pred_empty()) {
1319     // There are no predecessors, so use the default starting status.
1320     InInfo.setUnknown();
1321   } else {
1322     for (MachineBasicBlock *P : MBB.predecessors())
1323       InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
1324   }
1325 
1326   // If we don't have any valid predecessor value, wait until we do.
1327   if (!InInfo.isValid())
1328     return;
1329 
1330   // If no change, no need to rerun block
1331   if (InInfo == BBInfo.Pred)
1332     return;
1333 
1334   BBInfo.Pred = InInfo;
1335   LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1336                     << " changed to " << BBInfo.Pred << "\n");
1337 
1338   // Note: It's tempting to cache the state changes here, but due to the
1339   // compatibility checks performed a blocks output state can change based on
1340   // the input state.  To cache, we'd have to add logic for finding
1341   // never-compatible state changes.
1342   VSETVLIInfo TmpStatus;
1343   computeVLVTYPEChanges(MBB, TmpStatus);
1344 
1345   // If the new exit value matches the old exit value, we don't need to revisit
1346   // any blocks.
1347   if (BBInfo.Exit == TmpStatus)
1348     return;
1349 
1350   BBInfo.Exit = TmpStatus;
1351   LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1352                     << " changed to " << BBInfo.Exit << "\n");
1353 
1354   // Add the successors to the work list so we can propagate the changed exit
1355   // status.
1356   for (MachineBasicBlock *S : MBB.successors())
1357     if (!BlockInfo[S->getNumber()].InQueue) {
1358       BlockInfo[S->getNumber()].InQueue = true;
1359       WorkList.push(S);
1360     }
1361 }
1362 
1363 // If we weren't able to prove a vsetvli was directly unneeded, it might still
1364 // be unneeded if the AVL was a phi node where all incoming values are VL
1365 // outputs from the last VSETVLI in their respective basic blocks.
needVSETVLIPHI(const VSETVLIInfo & Require,const MachineBasicBlock & MBB) const1366 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1367                                         const MachineBasicBlock &MBB) const {
1368   if (!Require.hasAVLReg())
1369     return true;
1370 
1371   if (!LIS)
1372     return true;
1373 
1374   // We need the AVL to have been produced by a PHI node in this basic block.
1375   const VNInfo *Valno = Require.getAVLVNInfo();
1376   if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
1377     return true;
1378 
1379   const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
1380 
1381   for (auto *PBB : MBB.predecessors()) {
1382     const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
1383 
1384     // We need the PHI input to the be the output of a VSET(I)VLI.
1385     const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
1386     if (!Value)
1387       return true;
1388     MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
1389     if (!DefMI || !isVectorConfigInstr(*DefMI))
1390       return true;
1391 
1392     // We found a VSET(I)VLI make sure it matches the output of the
1393     // predecessor block.
1394     VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
1395     if (DefInfo != PBBExit)
1396       return true;
1397 
1398     // Require has the same VL as PBBExit, so if the exit from the
1399     // predecessor has the VTYPE we are looking for we might be able
1400     // to avoid a VSETVLI.
1401     if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require))
1402       return true;
1403   }
1404 
1405   // If all the incoming values to the PHI checked out, we don't need
1406   // to insert a VSETVLI.
1407   return false;
1408 }
1409 
emitVSETVLIs(MachineBasicBlock & MBB)1410 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1411   VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1412   // Track whether the prefix of the block we've scanned is transparent
1413   // (meaning has not yet changed the abstract state).
1414   bool PrefixTransparent = true;
1415   for (MachineInstr &MI : MBB) {
1416     const VSETVLIInfo PrevInfo = CurInfo;
1417     transferBefore(CurInfo, MI);
1418 
1419     // If this is an explicit VSETVLI or VSETIVLI, update our state.
1420     if (isVectorConfigInstr(MI)) {
1421       // Conservatively, mark the VL and VTYPE as live.
1422       assert(MI.getOperand(3).getReg() == RISCV::VL &&
1423              MI.getOperand(4).getReg() == RISCV::VTYPE &&
1424              "Unexpected operands where VL and VTYPE should be");
1425       MI.getOperand(3).setIsDead(false);
1426       MI.getOperand(4).setIsDead(false);
1427       PrefixTransparent = false;
1428     }
1429 
1430     uint64_t TSFlags = MI.getDesc().TSFlags;
1431     if (RISCVII::hasSEWOp(TSFlags)) {
1432       if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) {
1433         // If this is the first implicit state change, and the state change
1434         // requested can be proven to produce the same register contents, we
1435         // can skip emitting the actual state change and continue as if we
1436         // had since we know the GPR result of the implicit state change
1437         // wouldn't be used and VL/VTYPE registers are correct.  Note that
1438         // we *do* need to model the state as if it changed as while the
1439         // register contents are unchanged, the abstract model can change.
1440         if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
1441           insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo);
1442         PrefixTransparent = false;
1443       }
1444 
1445       if (RISCVII::hasVLOp(TSFlags)) {
1446         MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
1447         if (VLOp.isReg()) {
1448           Register Reg = VLOp.getReg();
1449 
1450           // Erase the AVL operand from the instruction.
1451           VLOp.setReg(RISCV::NoRegister);
1452           VLOp.setIsKill(false);
1453           if (LIS) {
1454             LiveInterval &LI = LIS->getInterval(Reg);
1455             SmallVector<MachineInstr *> DeadMIs;
1456             LIS->shrinkToUses(&LI, &DeadMIs);
1457             // We might have separate components that need split due to
1458             // needVSETVLIPHI causing us to skip inserting a new VL def.
1459             SmallVector<LiveInterval *> SplitLIs;
1460             LIS->splitSeparateComponents(LI, SplitLIs);
1461 
1462             // If the AVL was an immediate > 31, then it would have been emitted
1463             // as an ADDI. However, the ADDI might not have been used in the
1464             // vsetvli, or a vsetvli might not have been emitted, so it may be
1465             // dead now.
1466             for (MachineInstr *DeadMI : DeadMIs) {
1467               if (!TII->isAddImmediate(*DeadMI, Reg))
1468                 continue;
1469               LIS->RemoveMachineInstrFromMaps(*DeadMI);
1470               DeadMI->eraseFromParent();
1471             }
1472           }
1473         }
1474         MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1475                                                 /*isImp*/ true));
1476       }
1477       MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1478                                               /*isImp*/ true));
1479     }
1480 
1481     if (MI.isCall() || MI.isInlineAsm() ||
1482         MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1483         MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1484       PrefixTransparent = false;
1485 
1486     transferAfter(CurInfo, MI);
1487   }
1488 
1489   const auto &Info = BlockInfo[MBB.getNumber()];
1490   if (CurInfo != Info.Exit) {
1491     LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1492     LLVM_DEBUG(dbgs() << "  begin        state: " << Info.Pred << "\n");
1493     LLVM_DEBUG(dbgs() << "  expected end state: " << Info.Exit << "\n");
1494     LLVM_DEBUG(dbgs() << "  actual   end state: " << CurInfo << "\n");
1495   }
1496   assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated");
1497 }
1498 
1499 /// Perform simple partial redundancy elimination of the VSETVLI instructions
1500 /// we're about to insert by looking for cases where we can PRE from the
1501 /// beginning of one block to the end of one of its predecessors.  Specifically,
1502 /// this is geared to catch the common case of a fixed length vsetvl in a single
1503 /// block loop when it could execute once in the preheader instead.
doPRE(MachineBasicBlock & MBB)1504 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1505   if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1506     return;
1507 
1508   MachineBasicBlock *UnavailablePred = nullptr;
1509   VSETVLIInfo AvailableInfo;
1510   for (MachineBasicBlock *P : MBB.predecessors()) {
1511     const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1512     if (PredInfo.isUnknown()) {
1513       if (UnavailablePred)
1514         return;
1515       UnavailablePred = P;
1516     } else if (!AvailableInfo.isValid()) {
1517       AvailableInfo = PredInfo;
1518     } else if (AvailableInfo != PredInfo) {
1519       return;
1520     }
1521   }
1522 
1523   // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1524   // phase 3.
1525   if (!UnavailablePred || !AvailableInfo.isValid())
1526     return;
1527 
1528   if (!LIS)
1529     return;
1530 
1531   // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1532   // the unavailable pred.
1533   if (AvailableInfo.hasSEWLMULRatioOnly())
1534     return;
1535 
1536   // Critical edge - TODO: consider splitting?
1537   if (UnavailablePred->succ_size() != 1)
1538     return;
1539 
1540   // If the AVL value is a register (other than our VLMAX sentinel),
1541   // we need to prove the value is available at the point we're going
1542   // to insert the vsetvli at.
1543   if (AvailableInfo.hasAVLReg()) {
1544     SlotIndex SI = AvailableInfo.getAVLVNInfo()->def;
1545     // This is an inline dominance check which covers the case of
1546     // UnavailablePred being the preheader of a loop.
1547     if (LIS->getMBBFromIndex(SI) != UnavailablePred)
1548       return;
1549     if (!UnavailablePred->terminators().empty() &&
1550         SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator()))
1551       return;
1552   }
1553 
1554   // Model the effect of changing the input state of the block MBB to
1555   // AvailableInfo.  We're looking for two issues here; one legality,
1556   // one profitability.
1557   // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1558   //    may hit the end of the block with a different end state.  We can
1559   //    not make this change without reflowing later blocks as well.
1560   // 2) If we don't actually remove a transition, inserting a vsetvli
1561   //    into the predecessor block would be correct, but unprofitable.
1562   VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1563   VSETVLIInfo CurInfo = AvailableInfo;
1564   int TransitionsRemoved = 0;
1565   for (const MachineInstr &MI : MBB) {
1566     const VSETVLIInfo LastInfo = CurInfo;
1567     const VSETVLIInfo LastOldInfo = OldInfo;
1568     transferBefore(CurInfo, MI);
1569     transferBefore(OldInfo, MI);
1570     if (CurInfo == LastInfo)
1571       TransitionsRemoved++;
1572     if (LastOldInfo == OldInfo)
1573       TransitionsRemoved--;
1574     transferAfter(CurInfo, MI);
1575     transferAfter(OldInfo, MI);
1576     if (CurInfo == OldInfo)
1577       // Convergence.  All transitions after this must match by construction.
1578       break;
1579   }
1580   if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1581     // Issues 1 and 2 above
1582     return;
1583 
1584   // Finally, update both data flow state and insert the actual vsetvli.
1585   // Doing both keeps the code in sync with the dataflow results, which
1586   // is critical for correctness of phase 3.
1587   auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1588   LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1589                     << UnavailablePred->getName() << " with state "
1590                     << AvailableInfo << "\n");
1591   BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1592   BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1593 
1594   // Note there's an implicit assumption here that terminators never use
1595   // or modify VL or VTYPE.  Also, fallthrough will return end().
1596   auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1597   insertVSETVLI(*UnavailablePred, InsertPt,
1598                 UnavailablePred->findDebugLoc(InsertPt),
1599                 AvailableInfo, OldExit);
1600 }
1601 
1602 // Return true if we can mutate PrevMI to match MI without changing any the
1603 // fields which would be observed.
canMutatePriorConfig(const MachineInstr & PrevMI,const MachineInstr & MI,const DemandedFields & Used) const1604 bool RISCVInsertVSETVLI::canMutatePriorConfig(
1605     const MachineInstr &PrevMI, const MachineInstr &MI,
1606     const DemandedFields &Used) const {
1607   // If the VL values aren't equal, return false if either a) the former is
1608   // demanded, or b) we can't rewrite the former to be the later for
1609   // implementation reasons.
1610   if (!isVLPreservingConfig(MI)) {
1611     if (Used.VLAny)
1612       return false;
1613 
1614     if (Used.VLZeroness) {
1615       if (isVLPreservingConfig(PrevMI))
1616         return false;
1617       if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
1618                                                        LIS))
1619         return false;
1620     }
1621 
1622     auto &AVL = MI.getOperand(1);
1623     auto &PrevAVL = PrevMI.getOperand(1);
1624 
1625     // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1626     // For now just check that PrevMI uses the same virtual register.
1627     if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1628         (!MRI->hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1629          PrevAVL.getReg() != AVL.getReg()))
1630       return false;
1631   }
1632 
1633   assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1634   auto PriorVType = PrevMI.getOperand(2).getImm();
1635   auto VType = MI.getOperand(2).getImm();
1636   return areCompatibleVTYPEs(PriorVType, VType, Used);
1637 }
1638 
coalesceVSETVLIs(MachineBasicBlock & MBB) const1639 void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
1640   MachineInstr *NextMI = nullptr;
1641   // We can have arbitrary code in successors, so VL and VTYPE
1642   // must be considered demanded.
1643   DemandedFields Used;
1644   Used.demandVL();
1645   Used.demandVTYPE();
1646   SmallVector<MachineInstr*> ToDelete;
1647 
1648   // Update LIS and cleanup dead AVLs given a value which has
1649   // has had one use (as an AVL) removed.
1650   auto afterDroppedAVLUse = [&](Register OldVLReg) {
1651     if (LIS)
1652       LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1653 
1654     MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1655     if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1656         MRI->use_nodbg_empty(OldVLReg)) {
1657       if (LIS) {
1658         LIS->removeInterval(OldVLReg);
1659         LIS->RemoveMachineInstrFromMaps(*VLOpDef);
1660       }
1661       VLOpDef->eraseFromParent();
1662     }
1663   };
1664 
1665   for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1666 
1667     if (!isVectorConfigInstr(MI)) {
1668       Used.doUnion(getDemanded(MI, ST));
1669       if (MI.isCall() || MI.isInlineAsm() ||
1670           MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1671           MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1672         NextMI = nullptr;
1673       continue;
1674     }
1675 
1676     if (!MI.getOperand(0).isDead())
1677       Used.demandVL();
1678 
1679     if (NextMI) {
1680       if (!Used.usedVL() && !Used.usedVTYPE()) {
1681         ToDelete.push_back(&MI);
1682         // Leave NextMI unchanged
1683         continue;
1684       }
1685 
1686       if (canMutatePriorConfig(MI, *NextMI, Used)) {
1687         if (!isVLPreservingConfig(*NextMI)) {
1688           Register DefReg = NextMI->getOperand(0).getReg();
1689 
1690           MI.getOperand(0).setReg(DefReg);
1691           MI.getOperand(0).setIsDead(false);
1692 
1693           // The def of DefReg moved to MI, so extend the LiveInterval up to
1694           // it.
1695           if (DefReg.isVirtual() && LIS) {
1696             LiveInterval &DefLI = LIS->getInterval(DefReg);
1697             SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1698             VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1699             LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1700             DefLI.addSegment(S);
1701             DefVNI->def = MISlot;
1702             // Mark DefLI as spillable if it was previously unspillable
1703             DefLI.setWeight(0);
1704 
1705             // DefReg may have had no uses, in which case we need to shrink
1706             // the LiveInterval up to MI.
1707             LIS->shrinkToUses(&DefLI);
1708           }
1709 
1710           Register OldVLReg;
1711           if (MI.getOperand(1).isReg())
1712             OldVLReg = MI.getOperand(1).getReg();
1713           if (NextMI->getOperand(1).isImm())
1714             MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1715           else
1716             MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1717           if (OldVLReg && OldVLReg.isVirtual())
1718             afterDroppedAVLUse(OldVLReg);
1719 
1720           MI.setDesc(NextMI->getDesc());
1721         }
1722         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1723         ToDelete.push_back(NextMI);
1724         // fallthrough
1725       }
1726     }
1727     NextMI = &MI;
1728     Used = getDemanded(MI, ST);
1729   }
1730 
1731   NumCoalescedVSETVL += ToDelete.size();
1732   for (auto *MI : ToDelete) {
1733     if (LIS)
1734       LIS->RemoveMachineInstrFromMaps(*MI);
1735     Register OldAVLReg;
1736     if (MI->getOperand(1).isReg())
1737       OldAVLReg = MI->getOperand(1).getReg();
1738     MI->eraseFromParent();
1739     if (OldAVLReg && OldAVLReg.isVirtual())
1740       afterDroppedAVLUse(OldAVLReg);
1741   }
1742 }
1743 
insertReadVL(MachineBasicBlock & MBB)1744 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1745   for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1746     MachineInstr &MI = *I++;
1747     if (RISCV::isFaultFirstLoad(MI)) {
1748       Register VLOutput = MI.getOperand(1).getReg();
1749       assert(VLOutput.isVirtual());
1750       if (!MI.getOperand(1).isDead()) {
1751         auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
1752                                 TII->get(RISCV::PseudoReadVL), VLOutput);
1753         // Move the LiveInterval's definition down to PseudoReadVL.
1754         if (LIS) {
1755           SlotIndex NewDefSI =
1756               LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1757           LiveInterval &DefLI = LIS->getInterval(VLOutput);
1758           VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1759           DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1760           DefVNI->def = NewDefSI;
1761         }
1762       }
1763       // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1764       MI.getOperand(1).setReg(RISCV::X0);
1765     }
1766   }
1767 }
1768 
runOnMachineFunction(MachineFunction & MF)1769 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1770   // Skip if the vector extension is not enabled.
1771   ST = &MF.getSubtarget<RISCVSubtarget>();
1772   if (!ST->hasVInstructions())
1773     return false;
1774 
1775   LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1776 
1777   TII = ST->getInstrInfo();
1778   MRI = &MF.getRegInfo();
1779   auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
1780   LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
1781 
1782   assert(BlockInfo.empty() && "Expect empty block infos");
1783   BlockInfo.resize(MF.getNumBlockIDs());
1784 
1785   bool HaveVectorOp = false;
1786 
1787   // Phase 1 - determine how VL/VTYPE are affected by the each block.
1788   for (const MachineBasicBlock &MBB : MF) {
1789     VSETVLIInfo TmpStatus;
1790     HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus);
1791     // Initial exit state is whatever change we found in the block.
1792     BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1793     BBInfo.Exit = TmpStatus;
1794     LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1795                       << " is " << BBInfo.Exit << "\n");
1796 
1797   }
1798 
1799   // If we didn't find any instructions that need VSETVLI, we're done.
1800   if (!HaveVectorOp) {
1801     BlockInfo.clear();
1802     return false;
1803   }
1804 
1805   // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1806   // blocks to the list here, but will also add any that need to be revisited
1807   // during Phase 2 processing.
1808   for (const MachineBasicBlock &MBB : MF) {
1809     WorkList.push(&MBB);
1810     BlockInfo[MBB.getNumber()].InQueue = true;
1811   }
1812   while (!WorkList.empty()) {
1813     const MachineBasicBlock &MBB = *WorkList.front();
1814     WorkList.pop();
1815     computeIncomingVLVTYPE(MBB);
1816   }
1817 
1818   // Perform partial redundancy elimination of vsetvli transitions.
1819   for (MachineBasicBlock &MBB : MF)
1820     doPRE(MBB);
1821 
1822   // Phase 3 - add any vsetvli instructions needed in the block. Use the
1823   // Phase 2 information to avoid adding vsetvlis before the first vector
1824   // instruction in the block if the VL/VTYPE is satisfied by its
1825   // predecessors.
1826   for (MachineBasicBlock &MBB : MF)
1827     emitVSETVLIs(MBB);
1828 
1829   // Now that all vsetvlis are explicit, go through and do block local
1830   // DSE and peephole based demanded fields based transforms.  Note that
1831   // this *must* be done outside the main dataflow so long as we allow
1832   // any cross block analysis within the dataflow.  We can't have both
1833   // demanded fields based mutation and non-local analysis in the
1834   // dataflow at the same time without introducing inconsistencies.
1835   for (MachineBasicBlock &MBB : MF)
1836     coalesceVSETVLIs(MBB);
1837 
1838   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1839   // of VLEFF/VLSEGFF.
1840   for (MachineBasicBlock &MBB : MF)
1841     insertReadVL(MBB);
1842 
1843   BlockInfo.clear();
1844   return HaveVectorOp;
1845 }
1846 
1847 /// Returns an instance of the Insert VSETVLI pass.
createRISCVInsertVSETVLIPass()1848 FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1849   return new RISCVInsertVSETVLI();
1850 }
1851