xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the RISC-V implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVInstrInfo.h"
14 #include "MCTargetDesc/RISCVMatInt.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVSubtarget.h"
18 #include "RISCVTargetMachine.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/MemoryLocation.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/LiveVariables.h"
24 #include "llvm/CodeGen/MachineCombinerPattern.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/MachineTraceMetrics.h"
29 #include "llvm/CodeGen/RegisterScavenging.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/MC/MCInstBuilder.h"
32 #include "llvm/MC/TargetRegistry.h"
33 #include "llvm/Support/ErrorHandling.h"
34 
35 using namespace llvm;
36 
37 #define GEN_CHECK_COMPRESS_INSTR
38 #include "RISCVGenCompressInstEmitter.inc"
39 
40 #define GET_INSTRINFO_CTOR_DTOR
41 #define GET_INSTRINFO_NAMED_OPS
42 #include "RISCVGenInstrInfo.inc"
43 
44 static cl::opt<bool> PreferWholeRegisterMove(
45     "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
46     cl::desc("Prefer whole register move for vector registers."));
47 
48 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
49     "riscv-force-machine-combiner-strategy", cl::Hidden,
50     cl::desc("Force machine combiner to use a specific strategy for machine "
51              "trace metrics evaluation."),
52     cl::init(MachineTraceStrategy::TS_NumStrategies),
53     cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
54                           "Local strategy."),
55                clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
56                           "MinInstrCount strategy.")));
57 
58 namespace llvm::RISCVVPseudosTable {
59 
60 using namespace RISCV;
61 
62 #define GET_RISCVVPseudosTable_IMPL
63 #include "RISCVGenSearchableTables.inc"
64 
65 } // namespace llvm::RISCVVPseudosTable
66 
67 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
68     : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
69       STI(STI) {}
70 
71 MCInst RISCVInstrInfo::getNop() const {
72   if (STI.hasStdExtCOrZca())
73     return MCInstBuilder(RISCV::C_NOP);
74   return MCInstBuilder(RISCV::ADDI)
75       .addReg(RISCV::X0)
76       .addReg(RISCV::X0)
77       .addImm(0);
78 }
79 
80 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
81                                              int &FrameIndex) const {
82   unsigned Dummy;
83   return isLoadFromStackSlot(MI, FrameIndex, Dummy);
84 }
85 
86 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
87                                              int &FrameIndex,
88                                              unsigned &MemBytes) const {
89   switch (MI.getOpcode()) {
90   default:
91     return 0;
92   case RISCV::LB:
93   case RISCV::LBU:
94     MemBytes = 1;
95     break;
96   case RISCV::LH:
97   case RISCV::LHU:
98   case RISCV::FLH:
99     MemBytes = 2;
100     break;
101   case RISCV::LW:
102   case RISCV::FLW:
103   case RISCV::LWU:
104     MemBytes = 4;
105     break;
106   case RISCV::LD:
107   case RISCV::FLD:
108     MemBytes = 8;
109     break;
110   }
111 
112   if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
113       MI.getOperand(2).getImm() == 0) {
114     FrameIndex = MI.getOperand(1).getIndex();
115     return MI.getOperand(0).getReg();
116   }
117 
118   return 0;
119 }
120 
121 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
122                                             int &FrameIndex) const {
123   unsigned Dummy;
124   return isStoreToStackSlot(MI, FrameIndex, Dummy);
125 }
126 
127 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
128                                             int &FrameIndex,
129                                             unsigned &MemBytes) const {
130   switch (MI.getOpcode()) {
131   default:
132     return 0;
133   case RISCV::SB:
134     MemBytes = 1;
135     break;
136   case RISCV::SH:
137   case RISCV::FSH:
138     MemBytes = 2;
139     break;
140   case RISCV::SW:
141   case RISCV::FSW:
142     MemBytes = 4;
143     break;
144   case RISCV::SD:
145   case RISCV::FSD:
146     MemBytes = 8;
147     break;
148   }
149 
150   if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
151       MI.getOperand(2).getImm() == 0) {
152     FrameIndex = MI.getOperand(1).getIndex();
153     return MI.getOperand(0).getReg();
154   }
155 
156   return 0;
157 }
158 
159 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
160                                         unsigned NumRegs) {
161   return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
162 }
163 
164 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
165                                    const MachineBasicBlock &MBB,
166                                    MachineBasicBlock::const_iterator MBBI,
167                                    MachineBasicBlock::const_iterator &DefMBBI,
168                                    RISCVII::VLMUL LMul) {
169   if (PreferWholeRegisterMove)
170     return false;
171 
172   assert(MBBI->getOpcode() == TargetOpcode::COPY &&
173          "Unexpected COPY instruction.");
174   Register SrcReg = MBBI->getOperand(1).getReg();
175   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
176 
177   bool FoundDef = false;
178   bool FirstVSetVLI = false;
179   unsigned FirstSEW = 0;
180   while (MBBI != MBB.begin()) {
181     --MBBI;
182     if (MBBI->isMetaInstruction())
183       continue;
184 
185     if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
186         MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
187         MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
188       // There is a vsetvli between COPY and source define instruction.
189       // vy = def_vop ...  (producing instruction)
190       // ...
191       // vsetvli
192       // ...
193       // vx = COPY vy
194       if (!FoundDef) {
195         if (!FirstVSetVLI) {
196           FirstVSetVLI = true;
197           unsigned FirstVType = MBBI->getOperand(2).getImm();
198           RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
199           FirstSEW = RISCVVType::getSEW(FirstVType);
200           // The first encountered vsetvli must have the same lmul as the
201           // register class of COPY.
202           if (FirstLMul != LMul)
203             return false;
204         }
205         // Only permit `vsetvli x0, x0, vtype` between COPY and the source
206         // define instruction.
207         if (MBBI->getOperand(0).getReg() != RISCV::X0)
208           return false;
209         if (MBBI->getOperand(1).isImm())
210           return false;
211         if (MBBI->getOperand(1).getReg() != RISCV::X0)
212           return false;
213         continue;
214       }
215 
216       // MBBI is the first vsetvli before the producing instruction.
217       unsigned VType = MBBI->getOperand(2).getImm();
218       // If there is a vsetvli between COPY and the producing instruction.
219       if (FirstVSetVLI) {
220         // If SEW is different, return false.
221         if (RISCVVType::getSEW(VType) != FirstSEW)
222           return false;
223       }
224 
225       // If the vsetvli is tail undisturbed, keep the whole register move.
226       if (!RISCVVType::isTailAgnostic(VType))
227         return false;
228 
229       // The checking is conservative. We only have register classes for
230       // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
231       // for fractional LMUL operations. However, we could not use the vsetvli
232       // lmul for widening operations. The result of widening operation is
233       // 2 x LMUL.
234       return LMul == RISCVVType::getVLMUL(VType);
235     } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
236       return false;
237     } else if (MBBI->getNumDefs()) {
238       // Check all the instructions which will change VL.
239       // For example, vleff has implicit def VL.
240       if (MBBI->modifiesRegister(RISCV::VL))
241         return false;
242 
243       // Only converting whole register copies to vmv.v.v when the defining
244       // value appears in the explicit operands.
245       for (const MachineOperand &MO : MBBI->explicit_operands()) {
246         if (!MO.isReg() || !MO.isDef())
247           continue;
248         if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
249           // We only permit the source of COPY has the same LMUL as the defined
250           // operand.
251           // There are cases we need to keep the whole register copy if the LMUL
252           // is different.
253           // For example,
254           // $x0 = PseudoVSETIVLI 4, 73   // vsetivli zero, 4, e16,m2,ta,m
255           // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
256           // # The COPY may be created by vlmul_trunc intrinsic.
257           // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
258           //
259           // After widening, the valid value will be 4 x e32 elements. If we
260           // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
261           // FIXME: The COPY of subregister of Zvlsseg register will not be able
262           // to convert to vmv.v.[v|i] under the constraint.
263           if (MO.getReg() != SrcReg)
264             return false;
265 
266           // In widening reduction instructions with LMUL_1 input vector case,
267           // only checking the LMUL is insufficient due to reduction result is
268           // always LMUL_1.
269           // For example,
270           // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
271           // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
272           // $v26 = COPY killed renamable $v8
273           // After widening, The valid value will be 1 x e16 elements. If we
274           // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
275           uint64_t TSFlags = MBBI->getDesc().TSFlags;
276           if (RISCVII::isRVVWideningReduction(TSFlags))
277             return false;
278 
279           // If the producing instruction does not depend on vsetvli, do not
280           // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
281           if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
282             return false;
283 
284           // Found the definition.
285           FoundDef = true;
286           DefMBBI = MBBI;
287           break;
288         }
289       }
290     }
291   }
292 
293   return false;
294 }
295 
296 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
297                                  MachineBasicBlock::iterator MBBI,
298                                  const DebugLoc &DL, MCRegister DstReg,
299                                  MCRegister SrcReg, bool KillSrc) const {
300   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
301 
302   if (RISCV::GPRPF64RegClass.contains(DstReg))
303     DstReg = TRI->getSubReg(DstReg, RISCV::sub_32);
304   if (RISCV::GPRPF64RegClass.contains(SrcReg))
305     SrcReg = TRI->getSubReg(SrcReg, RISCV::sub_32);
306 
307   if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
308     BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
309         .addReg(SrcReg, getKillRegState(KillSrc))
310         .addImm(0);
311     return;
312   }
313 
314   // Handle copy from csr
315   if (RISCV::VCSRRegClass.contains(SrcReg) &&
316       RISCV::GPRRegClass.contains(DstReg)) {
317     BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
318         .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
319         .addReg(RISCV::X0);
320     return;
321   }
322 
323   // FPR->FPR copies and VR->VR copies.
324   unsigned Opc;
325   bool IsScalableVector = true;
326   unsigned NF = 1;
327   RISCVII::VLMUL LMul = RISCVII::LMUL_1;
328   unsigned SubRegIdx = RISCV::sub_vrm1_0;
329   if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
330     if (STI.hasStdExtZfh()) {
331       Opc = RISCV::FSGNJ_H;
332     } else {
333       assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() &&
334              "Unexpected extensions");
335       // Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S.
336       DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
337                                         &RISCV::FPR32RegClass);
338       SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
339                                         &RISCV::FPR32RegClass);
340       Opc = RISCV::FSGNJ_S;
341     }
342     IsScalableVector = false;
343   } else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
344     Opc = RISCV::FSGNJ_S;
345     IsScalableVector = false;
346   } else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
347     Opc = RISCV::FSGNJ_D;
348     IsScalableVector = false;
349   } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
350     Opc = RISCV::VMV1R_V;
351     LMul = RISCVII::LMUL_1;
352   } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
353     Opc = RISCV::VMV2R_V;
354     LMul = RISCVII::LMUL_2;
355   } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
356     Opc = RISCV::VMV4R_V;
357     LMul = RISCVII::LMUL_4;
358   } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
359     Opc = RISCV::VMV8R_V;
360     LMul = RISCVII::LMUL_8;
361   } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
362     Opc = RISCV::VMV1R_V;
363     SubRegIdx = RISCV::sub_vrm1_0;
364     NF = 2;
365     LMul = RISCVII::LMUL_1;
366   } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
367     Opc = RISCV::VMV2R_V;
368     SubRegIdx = RISCV::sub_vrm2_0;
369     NF = 2;
370     LMul = RISCVII::LMUL_2;
371   } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
372     Opc = RISCV::VMV4R_V;
373     SubRegIdx = RISCV::sub_vrm4_0;
374     NF = 2;
375     LMul = RISCVII::LMUL_4;
376   } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
377     Opc = RISCV::VMV1R_V;
378     SubRegIdx = RISCV::sub_vrm1_0;
379     NF = 3;
380     LMul = RISCVII::LMUL_1;
381   } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
382     Opc = RISCV::VMV2R_V;
383     SubRegIdx = RISCV::sub_vrm2_0;
384     NF = 3;
385     LMul = RISCVII::LMUL_2;
386   } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
387     Opc = RISCV::VMV1R_V;
388     SubRegIdx = RISCV::sub_vrm1_0;
389     NF = 4;
390     LMul = RISCVII::LMUL_1;
391   } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
392     Opc = RISCV::VMV2R_V;
393     SubRegIdx = RISCV::sub_vrm2_0;
394     NF = 4;
395     LMul = RISCVII::LMUL_2;
396   } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
397     Opc = RISCV::VMV1R_V;
398     SubRegIdx = RISCV::sub_vrm1_0;
399     NF = 5;
400     LMul = RISCVII::LMUL_1;
401   } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
402     Opc = RISCV::VMV1R_V;
403     SubRegIdx = RISCV::sub_vrm1_0;
404     NF = 6;
405     LMul = RISCVII::LMUL_1;
406   } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
407     Opc = RISCV::VMV1R_V;
408     SubRegIdx = RISCV::sub_vrm1_0;
409     NF = 7;
410     LMul = RISCVII::LMUL_1;
411   } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
412     Opc = RISCV::VMV1R_V;
413     SubRegIdx = RISCV::sub_vrm1_0;
414     NF = 8;
415     LMul = RISCVII::LMUL_1;
416   } else {
417     llvm_unreachable("Impossible reg-to-reg copy");
418   }
419 
420   if (IsScalableVector) {
421     bool UseVMV_V_V = false;
422     bool UseVMV_V_I = false;
423     MachineBasicBlock::const_iterator DefMBBI;
424     if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
425       UseVMV_V_V = true;
426       // We only need to handle LMUL = 1/2/4/8 here because we only define
427       // vector register classes for LMUL = 1/2/4/8.
428       unsigned VIOpc;
429       switch (LMul) {
430       default:
431         llvm_unreachable("Impossible LMUL for vector register copy.");
432       case RISCVII::LMUL_1:
433         Opc = RISCV::PseudoVMV_V_V_M1;
434         VIOpc = RISCV::PseudoVMV_V_I_M1;
435         break;
436       case RISCVII::LMUL_2:
437         Opc = RISCV::PseudoVMV_V_V_M2;
438         VIOpc = RISCV::PseudoVMV_V_I_M2;
439         break;
440       case RISCVII::LMUL_4:
441         Opc = RISCV::PseudoVMV_V_V_M4;
442         VIOpc = RISCV::PseudoVMV_V_I_M4;
443         break;
444       case RISCVII::LMUL_8:
445         Opc = RISCV::PseudoVMV_V_V_M8;
446         VIOpc = RISCV::PseudoVMV_V_I_M8;
447         break;
448       }
449 
450       if (DefMBBI->getOpcode() == VIOpc) {
451         UseVMV_V_I = true;
452         Opc = VIOpc;
453       }
454     }
455 
456     if (NF == 1) {
457       auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
458       if (UseVMV_V_V)
459         MIB.addReg(DstReg, RegState::Undef);
460       if (UseVMV_V_I)
461         MIB = MIB.add(DefMBBI->getOperand(2));
462       else
463         MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
464       if (UseVMV_V_V) {
465         const MCInstrDesc &Desc = DefMBBI->getDesc();
466         MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
467         MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
468         MIB.addImm(0); // tu, mu
469         MIB.addReg(RISCV::VL, RegState::Implicit);
470         MIB.addReg(RISCV::VTYPE, RegState::Implicit);
471       }
472     } else {
473       int I = 0, End = NF, Incr = 1;
474       unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
475       unsigned DstEncoding = TRI->getEncodingValue(DstReg);
476       unsigned LMulVal;
477       bool Fractional;
478       std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
479       assert(!Fractional && "It is impossible be fractional lmul here.");
480       if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
481         I = NF - 1;
482         End = -1;
483         Incr = -1;
484       }
485 
486       for (; I != End; I += Incr) {
487         auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
488                            TRI->getSubReg(DstReg, SubRegIdx + I));
489         if (UseVMV_V_V)
490           MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I),
491                      RegState::Undef);
492         if (UseVMV_V_I)
493           MIB = MIB.add(DefMBBI->getOperand(2));
494         else
495           MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
496                            getKillRegState(KillSrc));
497         if (UseVMV_V_V) {
498           const MCInstrDesc &Desc = DefMBBI->getDesc();
499           MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
500           MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
501           MIB.addImm(0);  // tu, mu
502           MIB.addReg(RISCV::VL, RegState::Implicit);
503           MIB.addReg(RISCV::VTYPE, RegState::Implicit);
504         }
505       }
506     }
507   } else {
508     BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
509         .addReg(SrcReg, getKillRegState(KillSrc))
510         .addReg(SrcReg, getKillRegState(KillSrc));
511   }
512 }
513 
514 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
515                                          MachineBasicBlock::iterator I,
516                                          Register SrcReg, bool IsKill, int FI,
517                                          const TargetRegisterClass *RC,
518                                          const TargetRegisterInfo *TRI,
519                                          Register VReg) const {
520   DebugLoc DL;
521   if (I != MBB.end())
522     DL = I->getDebugLoc();
523 
524   MachineFunction *MF = MBB.getParent();
525   MachineFrameInfo &MFI = MF->getFrameInfo();
526 
527   unsigned Opcode;
528   bool IsScalableVector = true;
529   if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
530     Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
531              RISCV::SW : RISCV::SD;
532     IsScalableVector = false;
533   } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
534     Opcode = RISCV::PseudoRV32ZdinxSD;
535     IsScalableVector = false;
536   } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
537     Opcode = RISCV::FSH;
538     IsScalableVector = false;
539   } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
540     Opcode = RISCV::FSW;
541     IsScalableVector = false;
542   } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
543     Opcode = RISCV::FSD;
544     IsScalableVector = false;
545   } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
546     Opcode = RISCV::VS1R_V;
547   } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
548     Opcode = RISCV::VS2R_V;
549   } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
550     Opcode = RISCV::VS4R_V;
551   } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
552     Opcode = RISCV::VS8R_V;
553   } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
554     Opcode = RISCV::PseudoVSPILL2_M1;
555   else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
556     Opcode = RISCV::PseudoVSPILL2_M2;
557   else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
558     Opcode = RISCV::PseudoVSPILL2_M4;
559   else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
560     Opcode = RISCV::PseudoVSPILL3_M1;
561   else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
562     Opcode = RISCV::PseudoVSPILL3_M2;
563   else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
564     Opcode = RISCV::PseudoVSPILL4_M1;
565   else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
566     Opcode = RISCV::PseudoVSPILL4_M2;
567   else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
568     Opcode = RISCV::PseudoVSPILL5_M1;
569   else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
570     Opcode = RISCV::PseudoVSPILL6_M1;
571   else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
572     Opcode = RISCV::PseudoVSPILL7_M1;
573   else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
574     Opcode = RISCV::PseudoVSPILL8_M1;
575   else
576     llvm_unreachable("Can't store this register to stack slot");
577 
578   if (IsScalableVector) {
579     MachineMemOperand *MMO = MF->getMachineMemOperand(
580         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
581         MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
582 
583     MFI.setStackID(FI, TargetStackID::ScalableVector);
584     BuildMI(MBB, I, DL, get(Opcode))
585         .addReg(SrcReg, getKillRegState(IsKill))
586         .addFrameIndex(FI)
587         .addMemOperand(MMO);
588   } else {
589     MachineMemOperand *MMO = MF->getMachineMemOperand(
590         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
591         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
592 
593     BuildMI(MBB, I, DL, get(Opcode))
594         .addReg(SrcReg, getKillRegState(IsKill))
595         .addFrameIndex(FI)
596         .addImm(0)
597         .addMemOperand(MMO);
598   }
599 }
600 
601 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
602                                           MachineBasicBlock::iterator I,
603                                           Register DstReg, int FI,
604                                           const TargetRegisterClass *RC,
605                                           const TargetRegisterInfo *TRI,
606                                           Register VReg) const {
607   DebugLoc DL;
608   if (I != MBB.end())
609     DL = I->getDebugLoc();
610 
611   MachineFunction *MF = MBB.getParent();
612   MachineFrameInfo &MFI = MF->getFrameInfo();
613 
614   unsigned Opcode;
615   bool IsScalableVector = true;
616   if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
617     Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
618              RISCV::LW : RISCV::LD;
619     IsScalableVector = false;
620   } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
621     Opcode = RISCV::PseudoRV32ZdinxLD;
622     IsScalableVector = false;
623   } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
624     Opcode = RISCV::FLH;
625     IsScalableVector = false;
626   } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
627     Opcode = RISCV::FLW;
628     IsScalableVector = false;
629   } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
630     Opcode = RISCV::FLD;
631     IsScalableVector = false;
632   } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
633     Opcode = RISCV::VL1RE8_V;
634   } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
635     Opcode = RISCV::VL2RE8_V;
636   } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
637     Opcode = RISCV::VL4RE8_V;
638   } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
639     Opcode = RISCV::VL8RE8_V;
640   } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
641     Opcode = RISCV::PseudoVRELOAD2_M1;
642   else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
643     Opcode = RISCV::PseudoVRELOAD2_M2;
644   else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
645     Opcode = RISCV::PseudoVRELOAD2_M4;
646   else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
647     Opcode = RISCV::PseudoVRELOAD3_M1;
648   else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
649     Opcode = RISCV::PseudoVRELOAD3_M2;
650   else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
651     Opcode = RISCV::PseudoVRELOAD4_M1;
652   else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
653     Opcode = RISCV::PseudoVRELOAD4_M2;
654   else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
655     Opcode = RISCV::PseudoVRELOAD5_M1;
656   else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
657     Opcode = RISCV::PseudoVRELOAD6_M1;
658   else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
659     Opcode = RISCV::PseudoVRELOAD7_M1;
660   else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
661     Opcode = RISCV::PseudoVRELOAD8_M1;
662   else
663     llvm_unreachable("Can't load this register from stack slot");
664 
665   if (IsScalableVector) {
666     MachineMemOperand *MMO = MF->getMachineMemOperand(
667         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
668         MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
669 
670     MFI.setStackID(FI, TargetStackID::ScalableVector);
671     BuildMI(MBB, I, DL, get(Opcode), DstReg)
672         .addFrameIndex(FI)
673         .addMemOperand(MMO);
674   } else {
675     MachineMemOperand *MMO = MF->getMachineMemOperand(
676         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
677         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
678 
679     BuildMI(MBB, I, DL, get(Opcode), DstReg)
680         .addFrameIndex(FI)
681         .addImm(0)
682         .addMemOperand(MMO);
683   }
684 }
685 
686 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
687     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
688     MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
689     VirtRegMap *VRM) const {
690   const MachineFrameInfo &MFI = MF.getFrameInfo();
691 
692   // The below optimizations narrow the load so they are only valid for little
693   // endian.
694   // TODO: Support big endian by adding an offset into the frame object?
695   if (MF.getDataLayout().isBigEndian())
696     return nullptr;
697 
698   // Fold load from stack followed by sext.w into lw.
699   // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w?
700   if (Ops.size() != 1 || Ops[0] != 1)
701    return nullptr;
702 
703   unsigned LoadOpc;
704   switch (MI.getOpcode()) {
705   default:
706     if (RISCV::isSEXT_W(MI)) {
707       LoadOpc = RISCV::LW;
708       break;
709     }
710     if (RISCV::isZEXT_W(MI)) {
711       LoadOpc = RISCV::LWU;
712       break;
713     }
714     if (RISCV::isZEXT_B(MI)) {
715       LoadOpc = RISCV::LBU;
716       break;
717     }
718     return nullptr;
719   case RISCV::SEXT_H:
720     LoadOpc = RISCV::LH;
721     break;
722   case RISCV::SEXT_B:
723     LoadOpc = RISCV::LB;
724     break;
725   case RISCV::ZEXT_H_RV32:
726   case RISCV::ZEXT_H_RV64:
727     LoadOpc = RISCV::LHU;
728     break;
729   }
730 
731   MachineMemOperand *MMO = MF.getMachineMemOperand(
732       MachinePointerInfo::getFixedStack(MF, FrameIndex),
733       MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
734       MFI.getObjectAlign(FrameIndex));
735 
736   Register DstReg = MI.getOperand(0).getReg();
737   return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
738                  DstReg)
739       .addFrameIndex(FrameIndex)
740       .addImm(0)
741       .addMemOperand(MMO);
742 }
743 
744 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
745                             MachineBasicBlock::iterator MBBI,
746                             const DebugLoc &DL, Register DstReg, uint64_t Val,
747                             MachineInstr::MIFlag Flag) const {
748   Register SrcReg = RISCV::X0;
749 
750   if (!STI.is64Bit() && !isInt<32>(Val))
751     report_fatal_error("Should only materialize 32-bit constants for RV32");
752 
753   RISCVMatInt::InstSeq Seq =
754       RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
755   assert(!Seq.empty());
756 
757   for (const RISCVMatInt::Inst &Inst : Seq) {
758     switch (Inst.getOpndKind()) {
759     case RISCVMatInt::Imm:
760       BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
761           .addImm(Inst.getImm())
762           .setMIFlag(Flag);
763       break;
764     case RISCVMatInt::RegX0:
765       BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
766           .addReg(SrcReg, RegState::Kill)
767           .addReg(RISCV::X0)
768           .setMIFlag(Flag);
769       break;
770     case RISCVMatInt::RegReg:
771       BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
772           .addReg(SrcReg, RegState::Kill)
773           .addReg(SrcReg, RegState::Kill)
774           .setMIFlag(Flag);
775       break;
776     case RISCVMatInt::RegImm:
777       BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
778           .addReg(SrcReg, RegState::Kill)
779           .addImm(Inst.getImm())
780           .setMIFlag(Flag);
781       break;
782     }
783 
784     // Only the first instruction has X0 as its source.
785     SrcReg = DstReg;
786   }
787 }
788 
789 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
790   switch (Opc) {
791   default:
792     return RISCVCC::COND_INVALID;
793   case RISCV::BEQ:
794     return RISCVCC::COND_EQ;
795   case RISCV::BNE:
796     return RISCVCC::COND_NE;
797   case RISCV::BLT:
798     return RISCVCC::COND_LT;
799   case RISCV::BGE:
800     return RISCVCC::COND_GE;
801   case RISCV::BLTU:
802     return RISCVCC::COND_LTU;
803   case RISCV::BGEU:
804     return RISCVCC::COND_GEU;
805   }
806 }
807 
808 // The contents of values added to Cond are not examined outside of
809 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
810 // push BranchOpcode, Reg1, Reg2.
811 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
812                             SmallVectorImpl<MachineOperand> &Cond) {
813   // Block ends with fall-through condbranch.
814   assert(LastInst.getDesc().isConditionalBranch() &&
815          "Unknown conditional branch");
816   Target = LastInst.getOperand(2).getMBB();
817   unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
818   Cond.push_back(MachineOperand::CreateImm(CC));
819   Cond.push_back(LastInst.getOperand(0));
820   Cond.push_back(LastInst.getOperand(1));
821 }
822 
823 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
824   switch (CC) {
825   default:
826     llvm_unreachable("Unknown condition code!");
827   case RISCVCC::COND_EQ:
828     return get(RISCV::BEQ);
829   case RISCVCC::COND_NE:
830     return get(RISCV::BNE);
831   case RISCVCC::COND_LT:
832     return get(RISCV::BLT);
833   case RISCVCC::COND_GE:
834     return get(RISCV::BGE);
835   case RISCVCC::COND_LTU:
836     return get(RISCV::BLTU);
837   case RISCVCC::COND_GEU:
838     return get(RISCV::BGEU);
839   }
840 }
841 
842 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
843   switch (CC) {
844   default:
845     llvm_unreachable("Unrecognized conditional branch");
846   case RISCVCC::COND_EQ:
847     return RISCVCC::COND_NE;
848   case RISCVCC::COND_NE:
849     return RISCVCC::COND_EQ;
850   case RISCVCC::COND_LT:
851     return RISCVCC::COND_GE;
852   case RISCVCC::COND_GE:
853     return RISCVCC::COND_LT;
854   case RISCVCC::COND_LTU:
855     return RISCVCC::COND_GEU;
856   case RISCVCC::COND_GEU:
857     return RISCVCC::COND_LTU;
858   }
859 }
860 
861 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
862                                    MachineBasicBlock *&TBB,
863                                    MachineBasicBlock *&FBB,
864                                    SmallVectorImpl<MachineOperand> &Cond,
865                                    bool AllowModify) const {
866   TBB = FBB = nullptr;
867   Cond.clear();
868 
869   // If the block has no terminators, it just falls into the block after it.
870   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
871   if (I == MBB.end() || !isUnpredicatedTerminator(*I))
872     return false;
873 
874   // Count the number of terminators and find the first unconditional or
875   // indirect branch.
876   MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
877   int NumTerminators = 0;
878   for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
879        J++) {
880     NumTerminators++;
881     if (J->getDesc().isUnconditionalBranch() ||
882         J->getDesc().isIndirectBranch()) {
883       FirstUncondOrIndirectBr = J.getReverse();
884     }
885   }
886 
887   // If AllowModify is true, we can erase any terminators after
888   // FirstUncondOrIndirectBR.
889   if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
890     while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
891       std::next(FirstUncondOrIndirectBr)->eraseFromParent();
892       NumTerminators--;
893     }
894     I = FirstUncondOrIndirectBr;
895   }
896 
897   // We can't handle blocks that end in an indirect branch.
898   if (I->getDesc().isIndirectBranch())
899     return true;
900 
901   // We can't handle blocks with more than 2 terminators.
902   if (NumTerminators > 2)
903     return true;
904 
905   // Handle a single unconditional branch.
906   if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
907     TBB = getBranchDestBlock(*I);
908     return false;
909   }
910 
911   // Handle a single conditional branch.
912   if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
913     parseCondBranch(*I, TBB, Cond);
914     return false;
915   }
916 
917   // Handle a conditional branch followed by an unconditional branch.
918   if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
919       I->getDesc().isUnconditionalBranch()) {
920     parseCondBranch(*std::prev(I), TBB, Cond);
921     FBB = getBranchDestBlock(*I);
922     return false;
923   }
924 
925   // Otherwise, we can't handle this.
926   return true;
927 }
928 
929 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
930                                       int *BytesRemoved) const {
931   if (BytesRemoved)
932     *BytesRemoved = 0;
933   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
934   if (I == MBB.end())
935     return 0;
936 
937   if (!I->getDesc().isUnconditionalBranch() &&
938       !I->getDesc().isConditionalBranch())
939     return 0;
940 
941   // Remove the branch.
942   if (BytesRemoved)
943     *BytesRemoved += getInstSizeInBytes(*I);
944   I->eraseFromParent();
945 
946   I = MBB.end();
947 
948   if (I == MBB.begin())
949     return 1;
950   --I;
951   if (!I->getDesc().isConditionalBranch())
952     return 1;
953 
954   // Remove the branch.
955   if (BytesRemoved)
956     *BytesRemoved += getInstSizeInBytes(*I);
957   I->eraseFromParent();
958   return 2;
959 }
960 
961 // Inserts a branch into the end of the specific MachineBasicBlock, returning
962 // the number of instructions inserted.
963 unsigned RISCVInstrInfo::insertBranch(
964     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
965     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
966   if (BytesAdded)
967     *BytesAdded = 0;
968 
969   // Shouldn't be a fall through.
970   assert(TBB && "insertBranch must not be told to insert a fallthrough");
971   assert((Cond.size() == 3 || Cond.size() == 0) &&
972          "RISC-V branch conditions have two components!");
973 
974   // Unconditional branch.
975   if (Cond.empty()) {
976     MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
977     if (BytesAdded)
978       *BytesAdded += getInstSizeInBytes(MI);
979     return 1;
980   }
981 
982   // Either a one or two-way conditional branch.
983   auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
984   MachineInstr &CondMI =
985       *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
986   if (BytesAdded)
987     *BytesAdded += getInstSizeInBytes(CondMI);
988 
989   // One-way conditional branch.
990   if (!FBB)
991     return 1;
992 
993   // Two-way conditional branch.
994   MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
995   if (BytesAdded)
996     *BytesAdded += getInstSizeInBytes(MI);
997   return 2;
998 }
999 
1000 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1001                                           MachineBasicBlock &DestBB,
1002                                           MachineBasicBlock &RestoreBB,
1003                                           const DebugLoc &DL, int64_t BrOffset,
1004                                           RegScavenger *RS) const {
1005   assert(RS && "RegScavenger required for long branching");
1006   assert(MBB.empty() &&
1007          "new block should be inserted for expanding unconditional branch");
1008   assert(MBB.pred_size() == 1);
1009   assert(RestoreBB.empty() &&
1010          "restore block should be inserted for restoring clobbered registers");
1011 
1012   MachineFunction *MF = MBB.getParent();
1013   MachineRegisterInfo &MRI = MF->getRegInfo();
1014   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1015   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1016 
1017   if (!isInt<32>(BrOffset))
1018     report_fatal_error(
1019         "Branch offsets outside of the signed 32-bit range not supported");
1020 
1021   // FIXME: A virtual register must be used initially, as the register
1022   // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1023   // uses the same workaround).
1024   Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
1025   auto II = MBB.end();
1026   // We may also update the jump target to RestoreBB later.
1027   MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1028                           .addReg(ScratchReg, RegState::Define | RegState::Dead)
1029                           .addMBB(&DestBB, RISCVII::MO_CALL);
1030 
1031   RS->enterBasicBlockEnd(MBB);
1032   Register TmpGPR =
1033       RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1034                                     /*RestoreAfter=*/false, /*SpAdj=*/0,
1035                                     /*AllowSpill=*/false);
1036   if (TmpGPR != RISCV::NoRegister)
1037     RS->setRegUsed(TmpGPR);
1038   else {
1039     // The case when there is no scavenged register needs special handling.
1040 
1041     // Pick s11 because it doesn't make a difference.
1042     TmpGPR = RISCV::X27;
1043 
1044     int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1045     if (FrameIndex == -1)
1046       report_fatal_error("underestimated function size");
1047 
1048     storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1049                         &RISCV::GPRRegClass, TRI, Register());
1050     TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1051                              /*SpAdj=*/0, /*FIOperandNum=*/1);
1052 
1053     MI.getOperand(1).setMBB(&RestoreBB);
1054 
1055     loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1056                          &RISCV::GPRRegClass, TRI, Register());
1057     TRI->eliminateFrameIndex(RestoreBB.back(),
1058                              /*SpAdj=*/0, /*FIOperandNum=*/1);
1059   }
1060 
1061   MRI.replaceRegWith(ScratchReg, TmpGPR);
1062   MRI.clearVirtRegs();
1063 }
1064 
1065 bool RISCVInstrInfo::reverseBranchCondition(
1066     SmallVectorImpl<MachineOperand> &Cond) const {
1067   assert((Cond.size() == 3) && "Invalid branch condition!");
1068   auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1069   Cond[0].setImm(getOppositeBranchCondition(CC));
1070   return false;
1071 }
1072 
1073 MachineBasicBlock *
1074 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1075   assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1076   // The branch target is always the last operand.
1077   int NumOp = MI.getNumExplicitOperands();
1078   return MI.getOperand(NumOp - 1).getMBB();
1079 }
1080 
1081 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1082                                            int64_t BrOffset) const {
1083   unsigned XLen = STI.getXLen();
1084   // Ideally we could determine the supported branch offset from the
1085   // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1086   // PseudoBR.
1087   switch (BranchOp) {
1088   default:
1089     llvm_unreachable("Unexpected opcode!");
1090   case RISCV::BEQ:
1091   case RISCV::BNE:
1092   case RISCV::BLT:
1093   case RISCV::BGE:
1094   case RISCV::BLTU:
1095   case RISCV::BGEU:
1096     return isIntN(13, BrOffset);
1097   case RISCV::JAL:
1098   case RISCV::PseudoBR:
1099     return isIntN(21, BrOffset);
1100   case RISCV::PseudoJump:
1101     return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
1102   }
1103 }
1104 
1105 // If the operation has a predicated pseudo instruction, return the pseudo
1106 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1107 // TODO: Support more operations.
1108 unsigned getPredicatedOpcode(unsigned Opcode) {
1109   switch (Opcode) {
1110   case RISCV::ADD:   return RISCV::PseudoCCADD;   break;
1111   case RISCV::SUB:   return RISCV::PseudoCCSUB;   break;
1112   case RISCV::AND:   return RISCV::PseudoCCAND;   break;
1113   case RISCV::OR:    return RISCV::PseudoCCOR;    break;
1114   case RISCV::XOR:   return RISCV::PseudoCCXOR;   break;
1115 
1116   case RISCV::ADDW:  return RISCV::PseudoCCADDW;  break;
1117   case RISCV::SUBW:  return RISCV::PseudoCCSUBW;  break;
1118   }
1119 
1120   return RISCV::INSTRUCTION_LIST_END;
1121 }
1122 
1123 /// Identify instructions that can be folded into a CCMOV instruction, and
1124 /// return the defining instruction.
1125 static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1126                                            const MachineRegisterInfo &MRI,
1127                                            const TargetInstrInfo *TII) {
1128   if (!Reg.isVirtual())
1129     return nullptr;
1130   if (!MRI.hasOneNonDBGUse(Reg))
1131     return nullptr;
1132   MachineInstr *MI = MRI.getVRegDef(Reg);
1133   if (!MI)
1134     return nullptr;
1135   // Check if MI can be predicated and folded into the CCMOV.
1136   if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1137     return nullptr;
1138   // Check if MI has any other defs or physreg uses.
1139   for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1140     // Reject frame index operands, PEI can't handle the predicated pseudos.
1141     if (MO.isFI() || MO.isCPI() || MO.isJTI())
1142       return nullptr;
1143     if (!MO.isReg())
1144       continue;
1145     // MI can't have any tied operands, that would conflict with predication.
1146     if (MO.isTied())
1147       return nullptr;
1148     if (MO.isDef())
1149       return nullptr;
1150     // Allow constant physregs.
1151     if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1152       return nullptr;
1153   }
1154   bool DontMoveAcrossStores = true;
1155   if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
1156     return nullptr;
1157   return MI;
1158 }
1159 
1160 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1161                                    SmallVectorImpl<MachineOperand> &Cond,
1162                                    unsigned &TrueOp, unsigned &FalseOp,
1163                                    bool &Optimizable) const {
1164   assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1165          "Unknown select instruction");
1166   // CCMOV operands:
1167   // 0: Def.
1168   // 1: LHS of compare.
1169   // 2: RHS of compare.
1170   // 3: Condition code.
1171   // 4: False use.
1172   // 5: True use.
1173   TrueOp = 5;
1174   FalseOp = 4;
1175   Cond.push_back(MI.getOperand(1));
1176   Cond.push_back(MI.getOperand(2));
1177   Cond.push_back(MI.getOperand(3));
1178   // We can only fold when we support short forward branch opt.
1179   Optimizable = STI.hasShortForwardBranchOpt();
1180   return false;
1181 }
1182 
1183 MachineInstr *
1184 RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1185                                SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1186                                bool PreferFalse) const {
1187   assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1188          "Unknown select instruction");
1189   if (!STI.hasShortForwardBranchOpt())
1190     return nullptr;
1191 
1192   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1193   MachineInstr *DefMI =
1194       canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
1195   bool Invert = !DefMI;
1196   if (!DefMI)
1197     DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
1198   if (!DefMI)
1199     return nullptr;
1200 
1201   // Find new register class to use.
1202   MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1203   Register DestReg = MI.getOperand(0).getReg();
1204   const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1205   if (!MRI.constrainRegClass(DestReg, PreviousClass))
1206     return nullptr;
1207 
1208   unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1209   assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1210 
1211   // Create a new predicated version of DefMI.
1212   MachineInstrBuilder NewMI =
1213       BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1214 
1215   // Copy the condition portion.
1216   NewMI.add(MI.getOperand(1));
1217   NewMI.add(MI.getOperand(2));
1218 
1219   // Add condition code, inverting if necessary.
1220   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1221   if (Invert)
1222     CC = RISCVCC::getOppositeBranchCondition(CC);
1223   NewMI.addImm(CC);
1224 
1225   // Copy the false register.
1226   NewMI.add(FalseReg);
1227 
1228   // Copy all the DefMI operands.
1229   const MCInstrDesc &DefDesc = DefMI->getDesc();
1230   for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1231     NewMI.add(DefMI->getOperand(i));
1232 
1233   // Update SeenMIs set: register newly created MI and erase removed DefMI.
1234   SeenMIs.insert(NewMI);
1235   SeenMIs.erase(DefMI);
1236 
1237   // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1238   // DefMI would be invalid when tranferred inside the loop.  Checking for a
1239   // loop is expensive, but at least remove kill flags if they are in different
1240   // BBs.
1241   if (DefMI->getParent() != MI.getParent())
1242     NewMI->clearKillInfo();
1243 
1244   // The caller will erase MI, but not DefMI.
1245   DefMI->eraseFromParent();
1246   return NewMI;
1247 }
1248 
1249 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1250   if (MI.isMetaInstruction())
1251     return 0;
1252 
1253   unsigned Opcode = MI.getOpcode();
1254 
1255   if (Opcode == TargetOpcode::INLINEASM ||
1256       Opcode == TargetOpcode::INLINEASM_BR) {
1257     const MachineFunction &MF = *MI.getParent()->getParent();
1258     const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
1259     return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1260                               *TM.getMCAsmInfo());
1261   }
1262 
1263   if (!MI.memoperands_empty()) {
1264     MachineMemOperand *MMO = *(MI.memoperands_begin());
1265     const MachineFunction &MF = *MI.getParent()->getParent();
1266     const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1267     if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1268       if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
1269         if (isCompressibleInst(MI, STI))
1270           return 4; // c.ntl.all + c.load/c.store
1271         return 6;   // c.ntl.all + load/store
1272       }
1273       return 8; // ntl.all + load/store
1274     }
1275   }
1276 
1277   if (Opcode == TargetOpcode::BUNDLE)
1278     return getInstBundleLength(MI);
1279 
1280   if (MI.getParent() && MI.getParent()->getParent()) {
1281     if (isCompressibleInst(MI, STI))
1282       return 2;
1283   }
1284   return get(Opcode).getSize();
1285 }
1286 
1287 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1288   unsigned Size = 0;
1289   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1290   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1291   while (++I != E && I->isInsideBundle()) {
1292     assert(!I->isBundle() && "No nested bundle!");
1293     Size += getInstSizeInBytes(*I);
1294   }
1295   return Size;
1296 }
1297 
1298 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1299   const unsigned Opcode = MI.getOpcode();
1300   switch (Opcode) {
1301   default:
1302     break;
1303   case RISCV::FSGNJ_D:
1304   case RISCV::FSGNJ_S:
1305   case RISCV::FSGNJ_H:
1306   case RISCV::FSGNJ_D_INX:
1307   case RISCV::FSGNJ_D_IN32X:
1308   case RISCV::FSGNJ_S_INX:
1309   case RISCV::FSGNJ_H_INX:
1310     // The canonical floating-point move is fsgnj rd, rs, rs.
1311     return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1312            MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
1313   case RISCV::ADDI:
1314   case RISCV::ORI:
1315   case RISCV::XORI:
1316     return (MI.getOperand(1).isReg() &&
1317             MI.getOperand(1).getReg() == RISCV::X0) ||
1318            (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1319   }
1320   return MI.isAsCheapAsAMove();
1321 }
1322 
1323 std::optional<DestSourcePair>
1324 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1325   if (MI.isMoveReg())
1326     return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1327   switch (MI.getOpcode()) {
1328   default:
1329     break;
1330   case RISCV::ADDI:
1331     // Operand 1 can be a frameindex but callers expect registers
1332     if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
1333         MI.getOperand(2).getImm() == 0)
1334       return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1335     break;
1336   case RISCV::FSGNJ_D:
1337   case RISCV::FSGNJ_S:
1338   case RISCV::FSGNJ_H:
1339   case RISCV::FSGNJ_D_INX:
1340   case RISCV::FSGNJ_D_IN32X:
1341   case RISCV::FSGNJ_S_INX:
1342   case RISCV::FSGNJ_H_INX:
1343     // The canonical floating-point move is fsgnj rd, rs, rs.
1344     if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1345         MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
1346       return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1347     break;
1348   }
1349   return std::nullopt;
1350 }
1351 
1352 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1353   if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1354     // The option is unused. Choose Local strategy only for in-order cores. When
1355     // scheduling model is unspecified, use MinInstrCount strategy as more
1356     // generic one.
1357     const auto &SchedModel = STI.getSchedModel();
1358     return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1359                ? MachineTraceStrategy::TS_MinInstrCount
1360                : MachineTraceStrategy::TS_Local;
1361   }
1362   // The strategy was forced by the option.
1363   return ForceMachineCombinerStrategy;
1364 }
1365 
1366 void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
1367                                            MachineInstr &OldMI2,
1368                                            MachineInstr &NewMI1,
1369                                            MachineInstr &NewMI2) const {
1370   uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
1371   NewMI1.setFlags(IntersectedFlags);
1372   NewMI2.setFlags(IntersectedFlags);
1373 }
1374 
1375 void RISCVInstrInfo::finalizeInsInstrs(
1376     MachineInstr &Root, MachineCombinerPattern &P,
1377     SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1378   int16_t FrmOpIdx =
1379       RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1380   if (FrmOpIdx < 0) {
1381     assert(all_of(InsInstrs,
1382                   [](MachineInstr *MI) {
1383                     return RISCV::getNamedOperandIdx(MI->getOpcode(),
1384                                                      RISCV::OpName::frm) < 0;
1385                   }) &&
1386            "New instructions require FRM whereas the old one does not have it");
1387     return;
1388   }
1389 
1390   const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
1391   MachineFunction &MF = *Root.getMF();
1392 
1393   for (auto *NewMI : InsInstrs) {
1394     assert(static_cast<unsigned>(RISCV::getNamedOperandIdx(
1395                NewMI->getOpcode(), RISCV::OpName::frm)) ==
1396                NewMI->getNumOperands() &&
1397            "Instruction has unexpected number of operands");
1398     MachineInstrBuilder MIB(MF, NewMI);
1399     MIB.add(FRM);
1400     if (FRM.getImm() == RISCVFPRndMode::DYN)
1401       MIB.addUse(RISCV::FRM, RegState::Implicit);
1402   }
1403 }
1404 
1405 static bool isFADD(unsigned Opc) {
1406   switch (Opc) {
1407   default:
1408     return false;
1409   case RISCV::FADD_H:
1410   case RISCV::FADD_S:
1411   case RISCV::FADD_D:
1412     return true;
1413   }
1414 }
1415 
1416 static bool isFSUB(unsigned Opc) {
1417   switch (Opc) {
1418   default:
1419     return false;
1420   case RISCV::FSUB_H:
1421   case RISCV::FSUB_S:
1422   case RISCV::FSUB_D:
1423     return true;
1424   }
1425 }
1426 
1427 static bool isFMUL(unsigned Opc) {
1428   switch (Opc) {
1429   default:
1430     return false;
1431   case RISCV::FMUL_H:
1432   case RISCV::FMUL_S:
1433   case RISCV::FMUL_D:
1434     return true;
1435   }
1436 }
1437 
1438 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1439                                             bool &Commuted) const {
1440   if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1441     return false;
1442 
1443   const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1444   unsigned OperandIdx = Commuted ? 2 : 1;
1445   const MachineInstr &Sibling =
1446       *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
1447 
1448   int16_t InstFrmOpIdx =
1449       RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1450   int16_t SiblingFrmOpIdx =
1451       RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1452 
1453   return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1454          RISCV::hasEqualFRM(Inst, Sibling);
1455 }
1456 
1457 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1458                                                  bool Invert) const {
1459   unsigned Opc = Inst.getOpcode();
1460   if (Invert) {
1461     auto InverseOpcode = getInverseOpcode(Opc);
1462     if (!InverseOpcode)
1463       return false;
1464     Opc = *InverseOpcode;
1465   }
1466 
1467   if (isFADD(Opc) || isFMUL(Opc))
1468     return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
1469            Inst.getFlag(MachineInstr::MIFlag::FmNsz);
1470 
1471   switch (Opc) {
1472   default:
1473     return false;
1474   case RISCV::ADD:
1475   case RISCV::ADDW:
1476   case RISCV::AND:
1477   case RISCV::OR:
1478   case RISCV::XOR:
1479   // From RISC-V ISA spec, if both the high and low bits of the same product
1480   // are required, then the recommended code sequence is:
1481   //
1482   // MULH[[S]U] rdh, rs1, rs2
1483   // MUL        rdl, rs1, rs2
1484   // (source register specifiers must be in same order and rdh cannot be the
1485   //  same as rs1 or rs2)
1486   //
1487   // Microarchitectures can then fuse these into a single multiply operation
1488   // instead of performing two separate multiplies.
1489   // MachineCombiner may reassociate MUL operands and lose the fusion
1490   // opportunity.
1491   case RISCV::MUL:
1492   case RISCV::MULW:
1493   case RISCV::MIN:
1494   case RISCV::MINU:
1495   case RISCV::MAX:
1496   case RISCV::MAXU:
1497   case RISCV::FMIN_H:
1498   case RISCV::FMIN_S:
1499   case RISCV::FMIN_D:
1500   case RISCV::FMAX_H:
1501   case RISCV::FMAX_S:
1502   case RISCV::FMAX_D:
1503     return true;
1504   }
1505 
1506   return false;
1507 }
1508 
1509 std::optional<unsigned>
1510 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1511   switch (Opcode) {
1512   default:
1513     return std::nullopt;
1514   case RISCV::FADD_H:
1515     return RISCV::FSUB_H;
1516   case RISCV::FADD_S:
1517     return RISCV::FSUB_S;
1518   case RISCV::FADD_D:
1519     return RISCV::FSUB_D;
1520   case RISCV::FSUB_H:
1521     return RISCV::FADD_H;
1522   case RISCV::FSUB_S:
1523     return RISCV::FADD_S;
1524   case RISCV::FSUB_D:
1525     return RISCV::FADD_D;
1526   case RISCV::ADD:
1527     return RISCV::SUB;
1528   case RISCV::SUB:
1529     return RISCV::ADD;
1530   case RISCV::ADDW:
1531     return RISCV::SUBW;
1532   case RISCV::SUBW:
1533     return RISCV::ADDW;
1534   }
1535 }
1536 
1537 static bool canCombineFPFusedMultiply(const MachineInstr &Root,
1538                                       const MachineOperand &MO,
1539                                       bool DoRegPressureReduce) {
1540   if (!MO.isReg() || !MO.getReg().isVirtual())
1541     return false;
1542   const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1543   MachineInstr *MI = MRI.getVRegDef(MO.getReg());
1544   if (!MI || !isFMUL(MI->getOpcode()))
1545     return false;
1546 
1547   if (!Root.getFlag(MachineInstr::MIFlag::FmContract) ||
1548       !MI->getFlag(MachineInstr::MIFlag::FmContract))
1549     return false;
1550 
1551   // Try combining even if fmul has more than one use as it eliminates
1552   // dependency between fadd(fsub) and fmul. However, it can extend liveranges
1553   // for fmul operands, so reject the transformation in register pressure
1554   // reduction mode.
1555   if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1556     return false;
1557 
1558   // Do not combine instructions from different basic blocks.
1559   if (Root.getParent() != MI->getParent())
1560     return false;
1561   return RISCV::hasEqualFRM(Root, *MI);
1562 }
1563 
1564 static bool
1565 getFPFusedMultiplyPatterns(MachineInstr &Root,
1566                            SmallVectorImpl<MachineCombinerPattern> &Patterns,
1567                            bool DoRegPressureReduce) {
1568   unsigned Opc = Root.getOpcode();
1569   bool IsFAdd = isFADD(Opc);
1570   if (!IsFAdd && !isFSUB(Opc))
1571     return false;
1572   bool Added = false;
1573   if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
1574                                 DoRegPressureReduce)) {
1575     Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX
1576                               : MachineCombinerPattern::FMSUB);
1577     Added = true;
1578   }
1579   if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
1580                                 DoRegPressureReduce)) {
1581     Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA
1582                               : MachineCombinerPattern::FNMSUB);
1583     Added = true;
1584   }
1585   return Added;
1586 }
1587 
1588 static bool getFPPatterns(MachineInstr &Root,
1589                           SmallVectorImpl<MachineCombinerPattern> &Patterns,
1590                           bool DoRegPressureReduce) {
1591   return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
1592 }
1593 
1594 bool RISCVInstrInfo::getMachineCombinerPatterns(
1595     MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
1596     bool DoRegPressureReduce) const {
1597 
1598   if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
1599     return true;
1600 
1601   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
1602                                                      DoRegPressureReduce);
1603 }
1604 
1605 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc,
1606                                          MachineCombinerPattern Pattern) {
1607   switch (RootOpc) {
1608   default:
1609     llvm_unreachable("Unexpected opcode");
1610   case RISCV::FADD_H:
1611     return RISCV::FMADD_H;
1612   case RISCV::FADD_S:
1613     return RISCV::FMADD_S;
1614   case RISCV::FADD_D:
1615     return RISCV::FMADD_D;
1616   case RISCV::FSUB_H:
1617     return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
1618                                                     : RISCV::FNMSUB_H;
1619   case RISCV::FSUB_S:
1620     return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
1621                                                     : RISCV::FNMSUB_S;
1622   case RISCV::FSUB_D:
1623     return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
1624                                                     : RISCV::FNMSUB_D;
1625   }
1626 }
1627 
1628 static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) {
1629   switch (Pattern) {
1630   default:
1631     llvm_unreachable("Unexpected pattern");
1632   case MachineCombinerPattern::FMADD_AX:
1633   case MachineCombinerPattern::FMSUB:
1634     return 2;
1635   case MachineCombinerPattern::FMADD_XA:
1636   case MachineCombinerPattern::FNMSUB:
1637     return 1;
1638   }
1639 }
1640 
1641 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
1642                                    MachineCombinerPattern Pattern,
1643                                    SmallVectorImpl<MachineInstr *> &InsInstrs,
1644                                    SmallVectorImpl<MachineInstr *> &DelInstrs) {
1645   MachineFunction *MF = Root.getMF();
1646   MachineRegisterInfo &MRI = MF->getRegInfo();
1647   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1648 
1649   MachineOperand &Mul1 = Prev.getOperand(1);
1650   MachineOperand &Mul2 = Prev.getOperand(2);
1651   MachineOperand &Dst = Root.getOperand(0);
1652   MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));
1653 
1654   Register DstReg = Dst.getReg();
1655   unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
1656   uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
1657   DebugLoc MergedLoc =
1658       DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
1659 
1660   bool Mul1IsKill = Mul1.isKill();
1661   bool Mul2IsKill = Mul2.isKill();
1662   bool AddendIsKill = Addend.isKill();
1663 
1664   // We need to clear kill flags since we may be extending the live range past
1665   // a kill. If the mul had kill flags, we can preserve those since we know
1666   // where the previous range stopped.
1667   MRI.clearKillFlags(Mul1.getReg());
1668   MRI.clearKillFlags(Mul2.getReg());
1669 
1670   MachineInstrBuilder MIB =
1671       BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
1672           .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
1673           .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
1674           .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
1675           .setMIFlags(IntersectedFlags);
1676 
1677   InsInstrs.push_back(MIB);
1678   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
1679     DelInstrs.push_back(&Prev);
1680   DelInstrs.push_back(&Root);
1681 }
1682 
1683 void RISCVInstrInfo::genAlternativeCodeSequence(
1684     MachineInstr &Root, MachineCombinerPattern Pattern,
1685     SmallVectorImpl<MachineInstr *> &InsInstrs,
1686     SmallVectorImpl<MachineInstr *> &DelInstrs,
1687     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
1688   MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1689   switch (Pattern) {
1690   default:
1691     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
1692                                                 DelInstrs, InstrIdxForVirtReg);
1693     return;
1694   case MachineCombinerPattern::FMADD_AX:
1695   case MachineCombinerPattern::FMSUB: {
1696     MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
1697     combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
1698     return;
1699   }
1700   case MachineCombinerPattern::FMADD_XA:
1701   case MachineCombinerPattern::FNMSUB: {
1702     MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
1703     combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
1704     return;
1705   }
1706   }
1707 }
1708 
1709 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
1710                                        StringRef &ErrInfo) const {
1711   MCInstrDesc const &Desc = MI.getDesc();
1712 
1713   for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
1714     unsigned OpType = Operand.OperandType;
1715     if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
1716         OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
1717       const MachineOperand &MO = MI.getOperand(Index);
1718       if (MO.isImm()) {
1719         int64_t Imm = MO.getImm();
1720         bool Ok;
1721         switch (OpType) {
1722         default:
1723           llvm_unreachable("Unexpected operand type");
1724 
1725           // clang-format off
1726 #define CASE_OPERAND_UIMM(NUM)                                                 \
1727   case RISCVOp::OPERAND_UIMM##NUM:                                             \
1728     Ok = isUInt<NUM>(Imm);                                                     \
1729     break;
1730         CASE_OPERAND_UIMM(1)
1731         CASE_OPERAND_UIMM(2)
1732         CASE_OPERAND_UIMM(3)
1733         CASE_OPERAND_UIMM(4)
1734         CASE_OPERAND_UIMM(5)
1735         CASE_OPERAND_UIMM(6)
1736         CASE_OPERAND_UIMM(7)
1737         CASE_OPERAND_UIMM(8)
1738         CASE_OPERAND_UIMM(12)
1739         CASE_OPERAND_UIMM(20)
1740           // clang-format on
1741         case RISCVOp::OPERAND_UIMM2_LSB0:
1742           Ok = isShiftedUInt<1, 1>(Imm);
1743           break;
1744         case RISCVOp::OPERAND_UIMM7_LSB00:
1745           Ok = isShiftedUInt<5, 2>(Imm);
1746           break;
1747         case RISCVOp::OPERAND_UIMM8_LSB00:
1748           Ok = isShiftedUInt<6, 2>(Imm);
1749           break;
1750         case RISCVOp::OPERAND_UIMM8_LSB000:
1751           Ok = isShiftedUInt<5, 3>(Imm);
1752           break;
1753         case RISCVOp::OPERAND_UIMM8_GE32:
1754           Ok = isUInt<8>(Imm) && Imm >= 32;
1755           break;
1756         case RISCVOp::OPERAND_UIMM9_LSB000:
1757           Ok = isShiftedUInt<6, 3>(Imm);
1758           break;
1759         case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
1760           Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
1761           break;
1762         case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
1763           Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
1764           break;
1765         case RISCVOp::OPERAND_ZERO:
1766           Ok = Imm == 0;
1767           break;
1768         case RISCVOp::OPERAND_SIMM5:
1769           Ok = isInt<5>(Imm);
1770           break;
1771         case RISCVOp::OPERAND_SIMM5_PLUS1:
1772           Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
1773           break;
1774         case RISCVOp::OPERAND_SIMM6:
1775           Ok = isInt<6>(Imm);
1776           break;
1777         case RISCVOp::OPERAND_SIMM6_NONZERO:
1778           Ok = Imm != 0 && isInt<6>(Imm);
1779           break;
1780         case RISCVOp::OPERAND_VTYPEI10:
1781           Ok = isUInt<10>(Imm);
1782           break;
1783         case RISCVOp::OPERAND_VTYPEI11:
1784           Ok = isUInt<11>(Imm);
1785           break;
1786         case RISCVOp::OPERAND_SIMM12:
1787           Ok = isInt<12>(Imm);
1788           break;
1789         case RISCVOp::OPERAND_SIMM12_LSB00000:
1790           Ok = isShiftedInt<7, 5>(Imm);
1791           break;
1792         case RISCVOp::OPERAND_UIMMLOG2XLEN:
1793           Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
1794           break;
1795         case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
1796           Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
1797           Ok = Ok && Imm != 0;
1798           break;
1799         case RISCVOp::OPERAND_CLUI_IMM:
1800           Ok = (isUInt<5>(Imm) && Imm != 0) ||
1801                (Imm >= 0xfffe0 && Imm <= 0xfffff);
1802           break;
1803         case RISCVOp::OPERAND_RVKRNUM:
1804           Ok = Imm >= 0 && Imm <= 10;
1805           break;
1806         case RISCVOp::OPERAND_RVKRNUM_0_7:
1807           Ok = Imm >= 0 && Imm <= 7;
1808           break;
1809         case RISCVOp::OPERAND_RVKRNUM_1_10:
1810           Ok = Imm >= 1 && Imm <= 10;
1811           break;
1812         case RISCVOp::OPERAND_RVKRNUM_2_14:
1813           Ok = Imm >= 2 && Imm <= 14;
1814           break;
1815         }
1816         if (!Ok) {
1817           ErrInfo = "Invalid immediate";
1818           return false;
1819         }
1820       }
1821     }
1822   }
1823 
1824   const uint64_t TSFlags = Desc.TSFlags;
1825   if (RISCVII::hasVLOp(TSFlags)) {
1826     const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
1827     if (!Op.isImm() && !Op.isReg())  {
1828       ErrInfo = "Invalid operand type for VL operand";
1829       return false;
1830     }
1831     if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
1832       const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1833       auto *RC = MRI.getRegClass(Op.getReg());
1834       if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
1835         ErrInfo = "Invalid register class for VL operand";
1836         return false;
1837       }
1838     }
1839     if (!RISCVII::hasSEWOp(TSFlags)) {
1840       ErrInfo = "VL operand w/o SEW operand?";
1841       return false;
1842     }
1843   }
1844   if (RISCVII::hasSEWOp(TSFlags)) {
1845     unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
1846     if (!MI.getOperand(OpIdx).isImm()) {
1847       ErrInfo = "SEW value expected to be an immediate";
1848       return false;
1849     }
1850     uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
1851     if (Log2SEW > 31) {
1852       ErrInfo = "Unexpected SEW value";
1853       return false;
1854     }
1855     unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
1856     if (!RISCVVType::isValidSEW(SEW)) {
1857       ErrInfo = "Unexpected SEW value";
1858       return false;
1859     }
1860   }
1861   if (RISCVII::hasVecPolicyOp(TSFlags)) {
1862     unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
1863     if (!MI.getOperand(OpIdx).isImm()) {
1864       ErrInfo = "Policy operand expected to be an immediate";
1865       return false;
1866     }
1867     uint64_t Policy = MI.getOperand(OpIdx).getImm();
1868     if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
1869       ErrInfo = "Invalid Policy Value";
1870       return false;
1871     }
1872     if (!RISCVII::hasVLOp(TSFlags)) {
1873       ErrInfo = "policy operand w/o VL operand?";
1874       return false;
1875     }
1876 
1877     // VecPolicy operands can only exist on instructions with passthru/merge
1878     // arguments. Note that not all arguments with passthru have vec policy
1879     // operands- some instructions have implicit policies.
1880     unsigned UseOpIdx;
1881     if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
1882       ErrInfo = "policy operand w/o tied operand?";
1883       return false;
1884     }
1885   }
1886 
1887   return true;
1888 }
1889 
1890 // Return true if get the base operand, byte offset of an instruction and the
1891 // memory width. Width is the size of memory that is being loaded/stored.
1892 bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
1893     const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
1894     unsigned &Width, const TargetRegisterInfo *TRI) const {
1895   if (!LdSt.mayLoadOrStore())
1896     return false;
1897 
1898   // Here we assume the standard RISC-V ISA, which uses a base+offset
1899   // addressing mode. You'll need to relax these conditions to support custom
1900   // load/stores instructions.
1901   if (LdSt.getNumExplicitOperands() != 3)
1902     return false;
1903   if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
1904     return false;
1905 
1906   if (!LdSt.hasOneMemOperand())
1907     return false;
1908 
1909   Width = (*LdSt.memoperands_begin())->getSize();
1910   BaseReg = &LdSt.getOperand(1);
1911   Offset = LdSt.getOperand(2).getImm();
1912   return true;
1913 }
1914 
1915 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
1916     const MachineInstr &MIa, const MachineInstr &MIb) const {
1917   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1918   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1919 
1920   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1921       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1922     return false;
1923 
1924   // Retrieve the base register, offset from the base register and width. Width
1925   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4).  If
1926   // base registers are identical, and the offset of a lower memory access +
1927   // the width doesn't overlap the offset of a higher memory access,
1928   // then the memory accesses are different.
1929   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
1930   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1931   int64_t OffsetA = 0, OffsetB = 0;
1932   unsigned int WidthA = 0, WidthB = 0;
1933   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
1934       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
1935     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
1936       int LowOffset = std::min(OffsetA, OffsetB);
1937       int HighOffset = std::max(OffsetA, OffsetB);
1938       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1939       if (LowOffset + LowWidth <= HighOffset)
1940         return true;
1941     }
1942   }
1943   return false;
1944 }
1945 
1946 std::pair<unsigned, unsigned>
1947 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
1948   const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
1949   return std::make_pair(TF & Mask, TF & ~Mask);
1950 }
1951 
1952 ArrayRef<std::pair<unsigned, const char *>>
1953 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
1954   using namespace RISCVII;
1955   static const std::pair<unsigned, const char *> TargetFlags[] = {
1956       {MO_CALL, "riscv-call"},
1957       {MO_PLT, "riscv-plt"},
1958       {MO_LO, "riscv-lo"},
1959       {MO_HI, "riscv-hi"},
1960       {MO_PCREL_LO, "riscv-pcrel-lo"},
1961       {MO_PCREL_HI, "riscv-pcrel-hi"},
1962       {MO_GOT_HI, "riscv-got-hi"},
1963       {MO_TPREL_LO, "riscv-tprel-lo"},
1964       {MO_TPREL_HI, "riscv-tprel-hi"},
1965       {MO_TPREL_ADD, "riscv-tprel-add"},
1966       {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
1967       {MO_TLS_GD_HI, "riscv-tls-gd-hi"}};
1968   return ArrayRef(TargetFlags);
1969 }
1970 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
1971     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
1972   const Function &F = MF.getFunction();
1973 
1974   // Can F be deduplicated by the linker? If it can, don't outline from it.
1975   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
1976     return false;
1977 
1978   // Don't outline from functions with section markings; the program could
1979   // expect that all the code is in the named section.
1980   if (F.hasSection())
1981     return false;
1982 
1983   // It's safe to outline from MF.
1984   return true;
1985 }
1986 
1987 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
1988                                             unsigned &Flags) const {
1989   // More accurate safety checking is done in getOutliningCandidateInfo.
1990   return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
1991 }
1992 
1993 // Enum values indicating how an outlined call should be constructed.
1994 enum MachineOutlinerConstructionID {
1995   MachineOutlinerDefault
1996 };
1997 
1998 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
1999     MachineFunction &MF) const {
2000   return MF.getFunction().hasMinSize();
2001 }
2002 
2003 std::optional<outliner::OutlinedFunction>
2004 RISCVInstrInfo::getOutliningCandidateInfo(
2005     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2006 
2007   // First we need to filter out candidates where the X5 register (IE t0) can't
2008   // be used to setup the function call.
2009   auto CannotInsertCall = [](outliner::Candidate &C) {
2010     const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2011     return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2012   };
2013 
2014   llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
2015 
2016   // If the sequence doesn't have enough candidates left, then we're done.
2017   if (RepeatedSequenceLocs.size() < 2)
2018     return std::nullopt;
2019 
2020   unsigned SequenceSize = 0;
2021 
2022   auto I = RepeatedSequenceLocs[0].front();
2023   auto E = std::next(RepeatedSequenceLocs[0].back());
2024   for (; I != E; ++I)
2025     SequenceSize += getInstSizeInBytes(*I);
2026 
2027   // call t0, function = 8 bytes.
2028   unsigned CallOverhead = 8;
2029   for (auto &C : RepeatedSequenceLocs)
2030     C.setCallInfo(MachineOutlinerDefault, CallOverhead);
2031 
2032   // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2033   unsigned FrameOverhead = 4;
2034   if (RepeatedSequenceLocs[0]
2035           .getMF()
2036           ->getSubtarget<RISCVSubtarget>()
2037           .hasStdExtCOrZca())
2038     FrameOverhead = 2;
2039 
2040   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
2041                                     FrameOverhead, MachineOutlinerDefault);
2042 }
2043 
2044 outliner::InstrType
2045 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
2046                                  unsigned Flags) const {
2047   MachineInstr &MI = *MBBI;
2048   MachineBasicBlock *MBB = MI.getParent();
2049   const TargetRegisterInfo *TRI =
2050       MBB->getParent()->getSubtarget().getRegisterInfo();
2051   const auto &F = MI.getMF()->getFunction();
2052 
2053   // We can manually strip out CFI instructions later.
2054   if (MI.isCFIInstruction())
2055     // If current function has exception handling code, we can't outline &
2056     // strip these CFI instructions since it may break .eh_frame section
2057     // needed in unwinding.
2058     return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2059                                      : outliner::InstrType::Invisible;
2060 
2061   // We need support for tail calls to outlined functions before return
2062   // statements can be allowed.
2063   if (MI.isReturn())
2064     return outliner::InstrType::Illegal;
2065 
2066   // Don't allow modifying the X5 register which we use for return addresses for
2067   // these outlined functions.
2068   if (MI.modifiesRegister(RISCV::X5, TRI) ||
2069       MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2070     return outliner::InstrType::Illegal;
2071 
2072   // Make sure the operands don't reference something unsafe.
2073   for (const auto &MO : MI.operands()) {
2074 
2075     // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2076     // if any possible.
2077     if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2078         (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
2079          F.hasSection()))
2080       return outliner::InstrType::Illegal;
2081   }
2082 
2083   return outliner::InstrType::Legal;
2084 }
2085 
2086 void RISCVInstrInfo::buildOutlinedFrame(
2087     MachineBasicBlock &MBB, MachineFunction &MF,
2088     const outliner::OutlinedFunction &OF) const {
2089 
2090   // Strip out any CFI instructions
2091   bool Changed = true;
2092   while (Changed) {
2093     Changed = false;
2094     auto I = MBB.begin();
2095     auto E = MBB.end();
2096     for (; I != E; ++I) {
2097       if (I->isCFIInstruction()) {
2098         I->removeFromParent();
2099         Changed = true;
2100         break;
2101       }
2102     }
2103   }
2104 
2105   MBB.addLiveIn(RISCV::X5);
2106 
2107   // Add in a return instruction to the end of the outlined frame.
2108   MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2109       .addReg(RISCV::X0, RegState::Define)
2110       .addReg(RISCV::X5)
2111       .addImm(0));
2112 }
2113 
2114 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
2115     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
2116     MachineFunction &MF, outliner::Candidate &C) const {
2117 
2118   // Add in a call instruction to the outlined function at the given location.
2119   It = MBB.insert(It,
2120                   BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2121                       .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
2122                                         RISCVII::MO_CALL));
2123   return It;
2124 }
2125 
2126 // MIR printer helper function to annotate Operands with a comment.
2127 std::string RISCVInstrInfo::createMIROperandComment(
2128     const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2129     const TargetRegisterInfo *TRI) const {
2130   // Print a generic comment for this operand if there is one.
2131   std::string GenericComment =
2132       TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
2133   if (!GenericComment.empty())
2134     return GenericComment;
2135 
2136   // If not, we must have an immediate operand.
2137   if (!Op.isImm())
2138     return std::string();
2139 
2140   std::string Comment;
2141   raw_string_ostream OS(Comment);
2142 
2143   uint64_t TSFlags = MI.getDesc().TSFlags;
2144 
2145   // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2146   // operand of vector codegen pseudos.
2147   if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
2148        MI.getOpcode() == RISCV::PseudoVSETVLI ||
2149        MI.getOpcode() == RISCV::PseudoVSETIVLI ||
2150        MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2151       OpIdx == 2) {
2152     unsigned Imm = MI.getOperand(OpIdx).getImm();
2153     RISCVVType::printVType(Imm, OS);
2154   } else if (RISCVII::hasSEWOp(TSFlags) &&
2155              OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {
2156     unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
2157     unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2158     assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2159     OS << "e" << SEW;
2160   } else if (RISCVII::hasVecPolicyOp(TSFlags) &&
2161              OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
2162     unsigned Policy = MI.getOperand(OpIdx).getImm();
2163     assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
2164            "Invalid Policy Value");
2165     OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
2166        << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
2167   }
2168 
2169   OS.flush();
2170   return Comment;
2171 }
2172 
2173 // clang-format off
2174 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL)                                \
2175   RISCV::PseudoV##OP##_##TYPE##_##LMUL
2176 
2177 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)                                    \
2178   CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1):                                       \
2179   case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2):                                  \
2180   case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4):                                  \
2181   case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)
2182 
2183 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)                                   \
2184   CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2):                                      \
2185   case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)
2186 
2187 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)                                   \
2188   CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4):                                      \
2189   case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)
2190 
2191 #define CASE_VFMA_OPCODE_LMULS(OP, TYPE)                                       \
2192   CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8):                                      \
2193   case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
2194 
2195 #define CASE_VFMA_SPLATS(OP)                                                   \
2196   CASE_VFMA_OPCODE_LMULS_MF4(OP, VF16):                                        \
2197   case CASE_VFMA_OPCODE_LMULS_MF2(OP, VF32):                                   \
2198   case CASE_VFMA_OPCODE_LMULS_M1(OP, VF64)
2199 // clang-format on
2200 
2201 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2202                                            unsigned &SrcOpIdx1,
2203                                            unsigned &SrcOpIdx2) const {
2204   const MCInstrDesc &Desc = MI.getDesc();
2205   if (!Desc.isCommutable())
2206     return false;
2207 
2208   switch (MI.getOpcode()) {
2209   case RISCV::TH_MVEQZ:
2210   case RISCV::TH_MVNEZ:
2211     // We can't commute operands if operand 2 (i.e., rs1 in
2212     // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
2213     // not valid as the in/out-operand 1).
2214     if (MI.getOperand(2).getReg() == RISCV::X0)
2215       return false;
2216     // Operands 1 and 2 are commutable, if we switch the opcode.
2217     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
2218   case RISCV::TH_MULA:
2219   case RISCV::TH_MULAW:
2220   case RISCV::TH_MULAH:
2221   case RISCV::TH_MULS:
2222   case RISCV::TH_MULSW:
2223   case RISCV::TH_MULSH:
2224     // Operands 2 and 3 are commutable.
2225     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
2226   case RISCV::PseudoCCMOVGPR:
2227     // Operands 4 and 5 are commutable.
2228     return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
2229   case CASE_VFMA_SPLATS(FMADD):
2230   case CASE_VFMA_SPLATS(FMSUB):
2231   case CASE_VFMA_SPLATS(FMACC):
2232   case CASE_VFMA_SPLATS(FMSAC):
2233   case CASE_VFMA_SPLATS(FNMADD):
2234   case CASE_VFMA_SPLATS(FNMSUB):
2235   case CASE_VFMA_SPLATS(FNMACC):
2236   case CASE_VFMA_SPLATS(FNMSAC):
2237   case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
2238   case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
2239   case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
2240   case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
2241   case CASE_VFMA_OPCODE_LMULS(MADD, VX):
2242   case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
2243   case CASE_VFMA_OPCODE_LMULS(MACC, VX):
2244   case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
2245   case CASE_VFMA_OPCODE_LMULS(MACC, VV):
2246   case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
2247     // If the tail policy is undisturbed we can't commute.
2248     assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2249     if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2250       return false;
2251 
2252     // For these instructions we can only swap operand 1 and operand 3 by
2253     // changing the opcode.
2254     unsigned CommutableOpIdx1 = 1;
2255     unsigned CommutableOpIdx2 = 3;
2256     if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2257                               CommutableOpIdx2))
2258       return false;
2259     return true;
2260   }
2261   case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
2262   case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
2263   case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
2264   case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
2265   case CASE_VFMA_OPCODE_LMULS(MADD, VV):
2266   case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
2267     // If the tail policy is undisturbed we can't commute.
2268     assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2269     if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2270       return false;
2271 
2272     // For these instructions we have more freedom. We can commute with the
2273     // other multiplicand or with the addend/subtrahend/minuend.
2274 
2275     // Any fixed operand must be from source 1, 2 or 3.
2276     if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
2277       return false;
2278     if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
2279       return false;
2280 
2281     // It both ops are fixed one must be the tied source.
2282     if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2283         SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
2284       return false;
2285 
2286     // Look for two different register operands assumed to be commutable
2287     // regardless of the FMA opcode. The FMA opcode is adjusted later if
2288     // needed.
2289     if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2290         SrcOpIdx2 == CommuteAnyOperandIndex) {
2291       // At least one of operands to be commuted is not specified and
2292       // this method is free to choose appropriate commutable operands.
2293       unsigned CommutableOpIdx1 = SrcOpIdx1;
2294       if (SrcOpIdx1 == SrcOpIdx2) {
2295         // Both of operands are not fixed. Set one of commutable
2296         // operands to the tied source.
2297         CommutableOpIdx1 = 1;
2298       } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
2299         // Only one of the operands is not fixed.
2300         CommutableOpIdx1 = SrcOpIdx2;
2301       }
2302 
2303       // CommutableOpIdx1 is well defined now. Let's choose another commutable
2304       // operand and assign its index to CommutableOpIdx2.
2305       unsigned CommutableOpIdx2;
2306       if (CommutableOpIdx1 != 1) {
2307         // If we haven't already used the tied source, we must use it now.
2308         CommutableOpIdx2 = 1;
2309       } else {
2310         Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
2311 
2312         // The commuted operands should have different registers.
2313         // Otherwise, the commute transformation does not change anything and
2314         // is useless. We use this as a hint to make our decision.
2315         if (Op1Reg != MI.getOperand(2).getReg())
2316           CommutableOpIdx2 = 2;
2317         else
2318           CommutableOpIdx2 = 3;
2319       }
2320 
2321       // Assign the found pair of commutable indices to SrcOpIdx1 and
2322       // SrcOpIdx2 to return those values.
2323       if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2324                                 CommutableOpIdx2))
2325         return false;
2326     }
2327 
2328     return true;
2329   }
2330   }
2331 
2332   return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2333 }
2334 
2335 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL)               \
2336   case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL:                                \
2337     Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL;                             \
2338     break;
2339 
2340 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)                   \
2341   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1)                       \
2342   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2)                       \
2343   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4)                       \
2344   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
2345 
2346 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)                  \
2347   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2)                      \
2348   CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
2349 
2350 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)                  \
2351   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4)                      \
2352   CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
2353 
2354 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE)                      \
2355   CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8)                      \
2356   CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
2357 
2358 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP)                           \
2359   CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VF16)                        \
2360   CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VF32)                        \
2361   CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VF64)
2362 
2363 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
2364                                                      bool NewMI,
2365                                                      unsigned OpIdx1,
2366                                                      unsigned OpIdx2) const {
2367   auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
2368     if (NewMI)
2369       return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
2370     return MI;
2371   };
2372 
2373   switch (MI.getOpcode()) {
2374   case RISCV::TH_MVEQZ:
2375   case RISCV::TH_MVNEZ: {
2376     auto &WorkingMI = cloneIfNew(MI);
2377     WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
2378                                                             : RISCV::TH_MVEQZ));
2379     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
2380                                                    OpIdx2);
2381   }
2382   case RISCV::PseudoCCMOVGPR: {
2383     // CCMOV can be commuted by inverting the condition.
2384     auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
2385     CC = RISCVCC::getOppositeBranchCondition(CC);
2386     auto &WorkingMI = cloneIfNew(MI);
2387     WorkingMI.getOperand(3).setImm(CC);
2388     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
2389                                                    OpIdx1, OpIdx2);
2390   }
2391   case CASE_VFMA_SPLATS(FMACC):
2392   case CASE_VFMA_SPLATS(FMADD):
2393   case CASE_VFMA_SPLATS(FMSAC):
2394   case CASE_VFMA_SPLATS(FMSUB):
2395   case CASE_VFMA_SPLATS(FNMACC):
2396   case CASE_VFMA_SPLATS(FNMADD):
2397   case CASE_VFMA_SPLATS(FNMSAC):
2398   case CASE_VFMA_SPLATS(FNMSUB):
2399   case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
2400   case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
2401   case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
2402   case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
2403   case CASE_VFMA_OPCODE_LMULS(MADD, VX):
2404   case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
2405   case CASE_VFMA_OPCODE_LMULS(MACC, VX):
2406   case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
2407   case CASE_VFMA_OPCODE_LMULS(MACC, VV):
2408   case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
2409     // It only make sense to toggle these between clobbering the
2410     // addend/subtrahend/minuend one of the multiplicands.
2411     assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
2412     assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
2413     unsigned Opc;
2414     switch (MI.getOpcode()) {
2415       default:
2416         llvm_unreachable("Unexpected opcode");
2417       CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
2418       CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
2419       CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
2420       CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
2421       CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
2422       CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
2423       CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
2424       CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
2425       CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV)
2426       CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV)
2427       CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV)
2428       CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV)
2429       CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
2430       CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
2431       CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
2432       CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
2433       CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
2434       CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
2435     }
2436 
2437     auto &WorkingMI = cloneIfNew(MI);
2438     WorkingMI.setDesc(get(Opc));
2439     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2440                                                    OpIdx1, OpIdx2);
2441   }
2442   case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
2443   case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
2444   case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
2445   case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
2446   case CASE_VFMA_OPCODE_LMULS(MADD, VV):
2447   case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
2448     assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
2449     // If one of the operands, is the addend we need to change opcode.
2450     // Otherwise we're just swapping 2 of the multiplicands.
2451     if (OpIdx1 == 3 || OpIdx2 == 3) {
2452       unsigned Opc;
2453       switch (MI.getOpcode()) {
2454         default:
2455           llvm_unreachable("Unexpected opcode");
2456         CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV)
2457         CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV)
2458         CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV)
2459         CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV)
2460         CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
2461         CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
2462       }
2463 
2464       auto &WorkingMI = cloneIfNew(MI);
2465       WorkingMI.setDesc(get(Opc));
2466       return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
2467                                                      OpIdx1, OpIdx2);
2468     }
2469     // Let the default code handle it.
2470     break;
2471   }
2472   }
2473 
2474   return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
2475 }
2476 
2477 #undef CASE_VFMA_CHANGE_OPCODE_SPLATS
2478 #undef CASE_VFMA_CHANGE_OPCODE_LMULS
2479 #undef CASE_VFMA_CHANGE_OPCODE_COMMON
2480 #undef CASE_VFMA_SPLATS
2481 #undef CASE_VFMA_OPCODE_LMULS
2482 #undef CASE_VFMA_OPCODE_COMMON
2483 
2484 // clang-format off
2485 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL)                                    \
2486   RISCV::PseudoV##OP##_##LMUL##_TIED
2487 
2488 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP)                                       \
2489   CASE_WIDEOP_OPCODE_COMMON(OP, MF4):                                          \
2490   case CASE_WIDEOP_OPCODE_COMMON(OP, MF2):                                     \
2491   case CASE_WIDEOP_OPCODE_COMMON(OP, M1):                                      \
2492   case CASE_WIDEOP_OPCODE_COMMON(OP, M2):                                      \
2493   case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
2494 
2495 #define CASE_WIDEOP_OPCODE_LMULS(OP)                                           \
2496   CASE_WIDEOP_OPCODE_COMMON(OP, MF8):                                          \
2497   case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
2498 // clang-format on
2499 
2500 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL)                             \
2501   case RISCV::PseudoV##OP##_##LMUL##_TIED:                                     \
2502     NewOpc = RISCV::PseudoV##OP##_##LMUL;                                      \
2503     break;
2504 
2505 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)                                 \
2506   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4)                                    \
2507   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2)                                    \
2508   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1)                                     \
2509   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2)                                     \
2510   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
2511 
2512 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP)                                    \
2513   CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8)                                    \
2514   CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
2515 
2516 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
2517                                                     LiveVariables *LV,
2518                                                     LiveIntervals *LIS) const {
2519   MachineInstrBuilder MIB;
2520   switch (MI.getOpcode()) {
2521   default:
2522     return nullptr;
2523   case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
2524   case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
2525     assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
2526            MI.getNumExplicitOperands() == 7 &&
2527            "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
2528     // If the tail policy is undisturbed we can't convert.
2529     if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
2530          1) == 0)
2531       return nullptr;
2532     // clang-format off
2533     unsigned NewOpc;
2534     switch (MI.getOpcode()) {
2535     default:
2536       llvm_unreachable("Unexpected opcode");
2537     CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
2538     CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
2539     }
2540     // clang-format on
2541 
2542     MachineBasicBlock &MBB = *MI.getParent();
2543     MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
2544               .add(MI.getOperand(0))
2545               .addReg(MI.getOperand(0).getReg(), RegState::Undef)
2546               .add(MI.getOperand(1))
2547               .add(MI.getOperand(2))
2548               .add(MI.getOperand(3))
2549               .add(MI.getOperand(4))
2550               .add(MI.getOperand(5))
2551               .add(MI.getOperand(6));
2552     break;
2553   }
2554   case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
2555   case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
2556   case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
2557   case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
2558     // If the tail policy is undisturbed we can't convert.
2559     assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
2560            MI.getNumExplicitOperands() == 6);
2561     if ((MI.getOperand(5).getImm() & 1) == 0)
2562       return nullptr;
2563 
2564     // clang-format off
2565     unsigned NewOpc;
2566     switch (MI.getOpcode()) {
2567     default:
2568       llvm_unreachable("Unexpected opcode");
2569     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
2570     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
2571     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
2572     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
2573     }
2574     // clang-format on
2575 
2576     MachineBasicBlock &MBB = *MI.getParent();
2577     MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
2578               .add(MI.getOperand(0))
2579               .addReg(MI.getOperand(0).getReg(), RegState::Undef)
2580               .add(MI.getOperand(1))
2581               .add(MI.getOperand(2))
2582               .add(MI.getOperand(3))
2583               .add(MI.getOperand(4))
2584               .add(MI.getOperand(5));
2585   }
2586   }
2587   MIB.copyImplicitOps(MI);
2588 
2589   if (LV) {
2590     unsigned NumOps = MI.getNumOperands();
2591     for (unsigned I = 1; I < NumOps; ++I) {
2592       MachineOperand &Op = MI.getOperand(I);
2593       if (Op.isReg() && Op.isKill())
2594         LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
2595     }
2596   }
2597 
2598   if (LIS) {
2599     SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
2600 
2601     if (MI.getOperand(0).isEarlyClobber()) {
2602       // Use operand 1 was tied to early-clobber def operand 0, so its live
2603       // interval could have ended at an early-clobber slot. Now they are not
2604       // tied we need to update it to the normal register slot.
2605       LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
2606       LiveRange::Segment *S = LI.getSegmentContaining(Idx);
2607       if (S->end == Idx.getRegSlot(true))
2608         S->end = Idx.getRegSlot();
2609     }
2610   }
2611 
2612   return MIB;
2613 }
2614 
2615 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
2616 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
2617 #undef CASE_WIDEOP_OPCODE_LMULS
2618 #undef CASE_WIDEOP_OPCODE_COMMON
2619 
2620 void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
2621                                            MachineBasicBlock &MBB,
2622                                            MachineBasicBlock::iterator II,
2623                                            const DebugLoc &DL, Register DestReg,
2624                                            int64_t Amount,
2625                                            MachineInstr::MIFlag Flag) const {
2626   assert(Amount > 0 && "There is no need to get VLEN scaled value.");
2627   assert(Amount % 8 == 0 &&
2628          "Reserve the stack by the multiple of one vector size.");
2629 
2630   MachineRegisterInfo &MRI = MF.getRegInfo();
2631   int64_t NumOfVReg = Amount / 8;
2632 
2633   BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag);
2634   assert(isInt<32>(NumOfVReg) &&
2635          "Expect the number of vector registers within 32-bits.");
2636   if (llvm::has_single_bit<uint32_t>(NumOfVReg)) {
2637     uint32_t ShiftAmount = Log2_32(NumOfVReg);
2638     if (ShiftAmount == 0)
2639       return;
2640     BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
2641         .addReg(DestReg, RegState::Kill)
2642         .addImm(ShiftAmount)
2643         .setMIFlag(Flag);
2644   } else if (STI.hasStdExtZba() &&
2645              ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) ||
2646               (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) ||
2647               (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) {
2648     // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
2649     unsigned Opc;
2650     uint32_t ShiftAmount;
2651     if (NumOfVReg % 9 == 0) {
2652       Opc = RISCV::SH3ADD;
2653       ShiftAmount = Log2_64(NumOfVReg / 9);
2654     } else if (NumOfVReg % 5 == 0) {
2655       Opc = RISCV::SH2ADD;
2656       ShiftAmount = Log2_64(NumOfVReg / 5);
2657     } else if (NumOfVReg % 3 == 0) {
2658       Opc = RISCV::SH1ADD;
2659       ShiftAmount = Log2_64(NumOfVReg / 3);
2660     } else {
2661       llvm_unreachable("Unexpected number of vregs");
2662     }
2663     if (ShiftAmount)
2664       BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
2665           .addReg(DestReg, RegState::Kill)
2666           .addImm(ShiftAmount)
2667           .setMIFlag(Flag);
2668     BuildMI(MBB, II, DL, get(Opc), DestReg)
2669         .addReg(DestReg, RegState::Kill)
2670         .addReg(DestReg)
2671         .setMIFlag(Flag);
2672   } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) {
2673     Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2674     uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
2675     BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
2676         .addReg(DestReg)
2677         .addImm(ShiftAmount)
2678         .setMIFlag(Flag);
2679     BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
2680         .addReg(ScaledRegister, RegState::Kill)
2681         .addReg(DestReg, RegState::Kill)
2682         .setMIFlag(Flag);
2683   } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) {
2684     Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2685     uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
2686     BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
2687         .addReg(DestReg)
2688         .addImm(ShiftAmount)
2689         .setMIFlag(Flag);
2690     BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
2691         .addReg(ScaledRegister, RegState::Kill)
2692         .addReg(DestReg, RegState::Kill)
2693         .setMIFlag(Flag);
2694   } else {
2695     Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2696     movImm(MBB, II, DL, N, NumOfVReg, Flag);
2697     if (!STI.hasStdExtM() && !STI.hasStdExtZmmul())
2698       MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
2699           MF.getFunction(),
2700           "M- or Zmmul-extension must be enabled to calculate the vscaled size/"
2701           "offset."});
2702     BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
2703         .addReg(DestReg, RegState::Kill)
2704         .addReg(N, RegState::Kill)
2705         .setMIFlag(Flag);
2706   }
2707 }
2708 
2709 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
2710 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
2711   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
2712       {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
2713        {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
2714   return ArrayRef(TargetFlags);
2715 }
2716 
2717 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
2718 bool RISCV::isSEXT_W(const MachineInstr &MI) {
2719   return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
2720          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
2721 }
2722 
2723 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
2724 bool RISCV::isZEXT_W(const MachineInstr &MI) {
2725   return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
2726          MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
2727 }
2728 
2729 // Returns true if this is the zext.b pattern, andi rd, rs1, 255.
2730 bool RISCV::isZEXT_B(const MachineInstr &MI) {
2731   return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
2732          MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
2733 }
2734 
2735 static bool isRVVWholeLoadStore(unsigned Opcode) {
2736   switch (Opcode) {
2737   default:
2738     return false;
2739   case RISCV::VS1R_V:
2740   case RISCV::VS2R_V:
2741   case RISCV::VS4R_V:
2742   case RISCV::VS8R_V:
2743   case RISCV::VL1RE8_V:
2744   case RISCV::VL2RE8_V:
2745   case RISCV::VL4RE8_V:
2746   case RISCV::VL8RE8_V:
2747   case RISCV::VL1RE16_V:
2748   case RISCV::VL2RE16_V:
2749   case RISCV::VL4RE16_V:
2750   case RISCV::VL8RE16_V:
2751   case RISCV::VL1RE32_V:
2752   case RISCV::VL2RE32_V:
2753   case RISCV::VL4RE32_V:
2754   case RISCV::VL8RE32_V:
2755   case RISCV::VL1RE64_V:
2756   case RISCV::VL2RE64_V:
2757   case RISCV::VL4RE64_V:
2758   case RISCV::VL8RE64_V:
2759     return true;
2760   }
2761 }
2762 
2763 bool RISCV::isRVVSpill(const MachineInstr &MI) {
2764   // RVV lacks any support for immediate addressing for stack addresses, so be
2765   // conservative.
2766   unsigned Opcode = MI.getOpcode();
2767   if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
2768       !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
2769     return false;
2770   return true;
2771 }
2772 
2773 std::optional<std::pair<unsigned, unsigned>>
2774 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
2775   switch (Opcode) {
2776   default:
2777     return std::nullopt;
2778   case RISCV::PseudoVSPILL2_M1:
2779   case RISCV::PseudoVRELOAD2_M1:
2780     return std::make_pair(2u, 1u);
2781   case RISCV::PseudoVSPILL2_M2:
2782   case RISCV::PseudoVRELOAD2_M2:
2783     return std::make_pair(2u, 2u);
2784   case RISCV::PseudoVSPILL2_M4:
2785   case RISCV::PseudoVRELOAD2_M4:
2786     return std::make_pair(2u, 4u);
2787   case RISCV::PseudoVSPILL3_M1:
2788   case RISCV::PseudoVRELOAD3_M1:
2789     return std::make_pair(3u, 1u);
2790   case RISCV::PseudoVSPILL3_M2:
2791   case RISCV::PseudoVRELOAD3_M2:
2792     return std::make_pair(3u, 2u);
2793   case RISCV::PseudoVSPILL4_M1:
2794   case RISCV::PseudoVRELOAD4_M1:
2795     return std::make_pair(4u, 1u);
2796   case RISCV::PseudoVSPILL4_M2:
2797   case RISCV::PseudoVRELOAD4_M2:
2798     return std::make_pair(4u, 2u);
2799   case RISCV::PseudoVSPILL5_M1:
2800   case RISCV::PseudoVRELOAD5_M1:
2801     return std::make_pair(5u, 1u);
2802   case RISCV::PseudoVSPILL6_M1:
2803   case RISCV::PseudoVRELOAD6_M1:
2804     return std::make_pair(6u, 1u);
2805   case RISCV::PseudoVSPILL7_M1:
2806   case RISCV::PseudoVRELOAD7_M1:
2807     return std::make_pair(7u, 1u);
2808   case RISCV::PseudoVSPILL8_M1:
2809   case RISCV::PseudoVRELOAD8_M1:
2810     return std::make_pair(8u, 1u);
2811   }
2812 }
2813 
2814 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
2815   return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) &&
2816          !MI.isInlineAsm();
2817 }
2818 
2819 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
2820   int16_t MI1FrmOpIdx =
2821       RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
2822   int16_t MI2FrmOpIdx =
2823       RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
2824   if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
2825     return false;
2826   MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
2827   MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
2828   return FrmOp1.getImm() == FrmOp2.getImm();
2829 }
2830