1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the RISC-V implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "RISCVInstrInfo.h"
14 #include "MCTargetDesc/RISCVMatInt.h"
15 #include "RISCV.h"
16 #include "RISCVMachineFunctionInfo.h"
17 #include "RISCVSubtarget.h"
18 #include "RISCVTargetMachine.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Analysis/MemoryLocation.h"
22 #include "llvm/Analysis/ValueTracking.h"
23 #include "llvm/CodeGen/LiveIntervals.h"
24 #include "llvm/CodeGen/LiveVariables.h"
25 #include "llvm/CodeGen/MachineCombinerPattern.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineTraceMetrics.h"
30 #include "llvm/CodeGen/RegisterScavenging.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/IR/DebugInfoMetadata.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/MC/MCInstBuilder.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/ErrorHandling.h"
37
38 using namespace llvm;
39
40 #define GEN_CHECK_COMPRESS_INSTR
41 #include "RISCVGenCompressInstEmitter.inc"
42
43 #define GET_INSTRINFO_CTOR_DTOR
44 #define GET_INSTRINFO_NAMED_OPS
45 #include "RISCVGenInstrInfo.inc"
46
47 static cl::opt<bool> PreferWholeRegisterMove(
48 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
49 cl::desc("Prefer whole register move for vector registers."));
50
51 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
52 "riscv-force-machine-combiner-strategy", cl::Hidden,
53 cl::desc("Force machine combiner to use a specific strategy for machine "
54 "trace metrics evaluation."),
55 cl::init(MachineTraceStrategy::TS_NumStrategies),
56 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
57 "Local strategy."),
58 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
59 "MinInstrCount strategy.")));
60
61 namespace llvm::RISCVVPseudosTable {
62
63 using namespace RISCV;
64
65 #define GET_RISCVVPseudosTable_IMPL
66 #include "RISCVGenSearchableTables.inc"
67
68 } // namespace llvm::RISCVVPseudosTable
69
70 namespace llvm::RISCV {
71
72 #define GET_RISCVMaskedPseudosTable_IMPL
73 #include "RISCVGenSearchableTables.inc"
74
75 } // end namespace llvm::RISCV
76
RISCVInstrInfo(RISCVSubtarget & STI)77 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
78 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
79 STI(STI) {}
80
getNop() const81 MCInst RISCVInstrInfo::getNop() const {
82 if (STI.hasStdExtCOrZca())
83 return MCInstBuilder(RISCV::C_NOP);
84 return MCInstBuilder(RISCV::ADDI)
85 .addReg(RISCV::X0)
86 .addReg(RISCV::X0)
87 .addImm(0);
88 }
89
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const90 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
91 int &FrameIndex) const {
92 unsigned Dummy;
93 return isLoadFromStackSlot(MI, FrameIndex, Dummy);
94 }
95
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex,unsigned & MemBytes) const96 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
97 int &FrameIndex,
98 unsigned &MemBytes) const {
99 switch (MI.getOpcode()) {
100 default:
101 return 0;
102 case RISCV::LB:
103 case RISCV::LBU:
104 MemBytes = 1;
105 break;
106 case RISCV::LH:
107 case RISCV::LHU:
108 case RISCV::FLH:
109 MemBytes = 2;
110 break;
111 case RISCV::LW:
112 case RISCV::FLW:
113 case RISCV::LWU:
114 MemBytes = 4;
115 break;
116 case RISCV::LD:
117 case RISCV::FLD:
118 MemBytes = 8;
119 break;
120 }
121
122 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
123 MI.getOperand(2).getImm() == 0) {
124 FrameIndex = MI.getOperand(1).getIndex();
125 return MI.getOperand(0).getReg();
126 }
127
128 return 0;
129 }
130
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const131 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
132 int &FrameIndex) const {
133 unsigned Dummy;
134 return isStoreToStackSlot(MI, FrameIndex, Dummy);
135 }
136
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex,unsigned & MemBytes) const137 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
138 int &FrameIndex,
139 unsigned &MemBytes) const {
140 switch (MI.getOpcode()) {
141 default:
142 return 0;
143 case RISCV::SB:
144 MemBytes = 1;
145 break;
146 case RISCV::SH:
147 case RISCV::FSH:
148 MemBytes = 2;
149 break;
150 case RISCV::SW:
151 case RISCV::FSW:
152 MemBytes = 4;
153 break;
154 case RISCV::SD:
155 case RISCV::FSD:
156 MemBytes = 8;
157 break;
158 }
159
160 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
161 MI.getOperand(2).getImm() == 0) {
162 FrameIndex = MI.getOperand(1).getIndex();
163 return MI.getOperand(0).getReg();
164 }
165
166 return 0;
167 }
168
isReallyTriviallyReMaterializable(const MachineInstr & MI) const169 bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
170 const MachineInstr &MI) const {
171 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
172 MI.getOperand(1).isUndef() &&
173 /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
174 vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
175 i.e. -riscv-vsetvl-after-rvv-regalloc=true */
176 !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
177 return true;
178 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
179 }
180
forwardCopyWillClobberTuple(unsigned DstReg,unsigned SrcReg,unsigned NumRegs)181 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
182 unsigned NumRegs) {
183 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
184 }
185
isConvertibleToVMV_V_V(const RISCVSubtarget & STI,const MachineBasicBlock & MBB,MachineBasicBlock::const_iterator MBBI,MachineBasicBlock::const_iterator & DefMBBI,RISCVII::VLMUL LMul)186 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
187 const MachineBasicBlock &MBB,
188 MachineBasicBlock::const_iterator MBBI,
189 MachineBasicBlock::const_iterator &DefMBBI,
190 RISCVII::VLMUL LMul) {
191 if (PreferWholeRegisterMove)
192 return false;
193
194 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
195 "Unexpected COPY instruction.");
196 Register SrcReg = MBBI->getOperand(1).getReg();
197 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
198
199 bool FoundDef = false;
200 bool FirstVSetVLI = false;
201 unsigned FirstSEW = 0;
202 while (MBBI != MBB.begin()) {
203 --MBBI;
204 if (MBBI->isMetaInstruction())
205 continue;
206
207 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
208 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
209 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
210 // There is a vsetvli between COPY and source define instruction.
211 // vy = def_vop ... (producing instruction)
212 // ...
213 // vsetvli
214 // ...
215 // vx = COPY vy
216 if (!FoundDef) {
217 if (!FirstVSetVLI) {
218 FirstVSetVLI = true;
219 unsigned FirstVType = MBBI->getOperand(2).getImm();
220 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
221 FirstSEW = RISCVVType::getSEW(FirstVType);
222 // The first encountered vsetvli must have the same lmul as the
223 // register class of COPY.
224 if (FirstLMul != LMul)
225 return false;
226 }
227 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
228 // define instruction.
229 if (MBBI->getOperand(0).getReg() != RISCV::X0)
230 return false;
231 if (MBBI->getOperand(1).isImm())
232 return false;
233 if (MBBI->getOperand(1).getReg() != RISCV::X0)
234 return false;
235 continue;
236 }
237
238 // MBBI is the first vsetvli before the producing instruction.
239 unsigned VType = MBBI->getOperand(2).getImm();
240 // If there is a vsetvli between COPY and the producing instruction.
241 if (FirstVSetVLI) {
242 // If SEW is different, return false.
243 if (RISCVVType::getSEW(VType) != FirstSEW)
244 return false;
245 }
246
247 // If the vsetvli is tail undisturbed, keep the whole register move.
248 if (!RISCVVType::isTailAgnostic(VType))
249 return false;
250
251 // The checking is conservative. We only have register classes for
252 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
253 // for fractional LMUL operations. However, we could not use the vsetvli
254 // lmul for widening operations. The result of widening operation is
255 // 2 x LMUL.
256 return LMul == RISCVVType::getVLMUL(VType);
257 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
258 return false;
259 } else if (MBBI->getNumDefs()) {
260 // Check all the instructions which will change VL.
261 // For example, vleff has implicit def VL.
262 if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr))
263 return false;
264
265 // Only converting whole register copies to vmv.v.v when the defining
266 // value appears in the explicit operands.
267 for (const MachineOperand &MO : MBBI->explicit_operands()) {
268 if (!MO.isReg() || !MO.isDef())
269 continue;
270 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
271 // We only permit the source of COPY has the same LMUL as the defined
272 // operand.
273 // There are cases we need to keep the whole register copy if the LMUL
274 // is different.
275 // For example,
276 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
277 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
278 // # The COPY may be created by vlmul_trunc intrinsic.
279 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
280 //
281 // After widening, the valid value will be 4 x e32 elements. If we
282 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
283 // FIXME: The COPY of subregister of Zvlsseg register will not be able
284 // to convert to vmv.v.[v|i] under the constraint.
285 if (MO.getReg() != SrcReg)
286 return false;
287
288 // In widening reduction instructions with LMUL_1 input vector case,
289 // only checking the LMUL is insufficient due to reduction result is
290 // always LMUL_1.
291 // For example,
292 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
293 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
294 // $v26 = COPY killed renamable $v8
295 // After widening, The valid value will be 1 x e16 elements. If we
296 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
297 uint64_t TSFlags = MBBI->getDesc().TSFlags;
298 if (RISCVII::isRVVWideningReduction(TSFlags))
299 return false;
300
301 // If the producing instruction does not depend on vsetvli, do not
302 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
303 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
304 return false;
305
306 // Found the definition.
307 FoundDef = true;
308 DefMBBI = MBBI;
309 break;
310 }
311 }
312 }
313 }
314
315 return false;
316 }
317
copyPhysRegVector(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,MCRegister DstReg,MCRegister SrcReg,bool KillSrc,const TargetRegisterClass * RegClass) const318 void RISCVInstrInfo::copyPhysRegVector(
319 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
320 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
321 const TargetRegisterClass *RegClass) const {
322 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
323 RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
324 unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
325
326 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
327 uint16_t DstEncoding = TRI->getEncodingValue(DstReg);
328 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul);
329 assert(!Fractional && "It is impossible be fractional lmul here.");
330 unsigned NumRegs = NF * LMulVal;
331 bool ReversedCopy =
332 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs);
333 if (ReversedCopy) {
334 // If the src and dest overlap when copying a tuple, we need to copy the
335 // registers in reverse.
336 SrcEncoding += NumRegs - 1;
337 DstEncoding += NumRegs - 1;
338 }
339
340 unsigned I = 0;
341 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
342 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
343 unsigned, unsigned> {
344 if (ReversedCopy) {
345 // For reversed copying, if there are enough aligned registers(8/4/2), we
346 // can do a larger copy(LMUL8/4/2).
347 // Besides, we have already known that DstEncoding is larger than
348 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
349 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
350 // avoid clobbering.
351 uint16_t Diff = DstEncoding - SrcEncoding;
352 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
353 DstEncoding % 8 == 7)
354 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
355 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
356 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
357 DstEncoding % 4 == 3)
358 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
359 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
360 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
361 DstEncoding % 2 == 1)
362 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
363 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
364 // Or we should do LMUL1 copying.
365 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
366 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
367 }
368
369 // For forward copying, if source register encoding and destination register
370 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
371 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
372 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
373 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
374 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
375 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
376 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
377 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
378 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
379 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
380 // Or we should do LMUL1 copying.
381 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
382 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
383 };
384 auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
385 uint16_t Encoding) {
386 MCRegister Reg = RISCV::V0 + Encoding;
387 if (&RegClass == &RISCV::VRRegClass)
388 return Reg;
389 return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
390 };
391 while (I != NumRegs) {
392 // For non-segment copying, we only do this once as the registers are always
393 // aligned.
394 // For segment copying, we may do this several times. If the registers are
395 // aligned to larger LMUL, we can eliminate some copyings.
396 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
397 GetCopyInfo(SrcEncoding, DstEncoding);
398 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied);
399
400 MachineBasicBlock::const_iterator DefMBBI;
401 if (LMul == LMulCopied &&
402 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
403 Opc = VVOpc;
404 if (DefMBBI->getOpcode() == VIOpc)
405 Opc = VIOpc;
406 }
407
408 // Emit actual copying.
409 // For reversed copying, the encoding should be decreased.
410 MCRegister ActualSrcReg = FindRegWithEncoding(
411 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
412 MCRegister ActualDstReg = FindRegWithEncoding(
413 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
414
415 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
416 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
417 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
418 if (UseVMV)
419 MIB.addReg(ActualDstReg, RegState::Undef);
420 if (UseVMV_V_I)
421 MIB = MIB.add(DefMBBI->getOperand(2));
422 else
423 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc));
424 if (UseVMV) {
425 const MCInstrDesc &Desc = DefMBBI->getDesc();
426 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
427 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
428 MIB.addImm(0); // tu, mu
429 MIB.addReg(RISCV::VL, RegState::Implicit);
430 MIB.addReg(RISCV::VTYPE, RegState::Implicit);
431 }
432
433 // If we are copying reversely, we should decrease the encoding.
434 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
435 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
436 I += NumCopied;
437 }
438 }
439
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,MCRegister DstReg,MCRegister SrcReg,bool KillSrc) const440 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
441 MachineBasicBlock::iterator MBBI,
442 const DebugLoc &DL, MCRegister DstReg,
443 MCRegister SrcReg, bool KillSrc) const {
444 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
445
446 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
447 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
448 .addReg(SrcReg, getKillRegState(KillSrc))
449 .addImm(0);
450 return;
451 }
452
453 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
454 // Emit an ADDI for both parts of GPRPair.
455 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
456 TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
457 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
458 getKillRegState(KillSrc))
459 .addImm(0);
460 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
461 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
462 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
463 getKillRegState(KillSrc))
464 .addImm(0);
465 return;
466 }
467
468 // Handle copy from csr
469 if (RISCV::VCSRRegClass.contains(SrcReg) &&
470 RISCV::GPRRegClass.contains(DstReg)) {
471 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
472 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
473 .addReg(RISCV::X0);
474 return;
475 }
476
477 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
478 unsigned Opc;
479 if (STI.hasStdExtZfh()) {
480 Opc = RISCV::FSGNJ_H;
481 } else {
482 assert(STI.hasStdExtF() &&
483 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
484 "Unexpected extensions");
485 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
486 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
487 &RISCV::FPR32RegClass);
488 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
489 &RISCV::FPR32RegClass);
490 Opc = RISCV::FSGNJ_S;
491 }
492 BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
493 .addReg(SrcReg, getKillRegState(KillSrc))
494 .addReg(SrcReg, getKillRegState(KillSrc));
495 return;
496 }
497
498 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
499 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
500 .addReg(SrcReg, getKillRegState(KillSrc))
501 .addReg(SrcReg, getKillRegState(KillSrc));
502 return;
503 }
504
505 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
506 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
507 .addReg(SrcReg, getKillRegState(KillSrc))
508 .addReg(SrcReg, getKillRegState(KillSrc));
509 return;
510 }
511
512 if (RISCV::FPR32RegClass.contains(DstReg) &&
513 RISCV::GPRRegClass.contains(SrcReg)) {
514 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
515 .addReg(SrcReg, getKillRegState(KillSrc));
516 return;
517 }
518
519 if (RISCV::GPRRegClass.contains(DstReg) &&
520 RISCV::FPR32RegClass.contains(SrcReg)) {
521 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
522 .addReg(SrcReg, getKillRegState(KillSrc));
523 return;
524 }
525
526 if (RISCV::FPR64RegClass.contains(DstReg) &&
527 RISCV::GPRRegClass.contains(SrcReg)) {
528 assert(STI.getXLen() == 64 && "Unexpected GPR size");
529 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
530 .addReg(SrcReg, getKillRegState(KillSrc));
531 return;
532 }
533
534 if (RISCV::GPRRegClass.contains(DstReg) &&
535 RISCV::FPR64RegClass.contains(SrcReg)) {
536 assert(STI.getXLen() == 64 && "Unexpected GPR size");
537 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
538 .addReg(SrcReg, getKillRegState(KillSrc));
539 return;
540 }
541
542 // VR->VR copies.
543 static const TargetRegisterClass *RVVRegClasses[] = {
544 &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
545 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
546 &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
547 &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
548 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
549 for (const auto &RegClass : RVVRegClasses) {
550 if (RegClass->contains(DstReg, SrcReg)) {
551 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
552 return;
553 }
554 }
555
556 llvm_unreachable("Impossible reg-to-reg copy");
557 }
558
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register SrcReg,bool IsKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI,Register VReg) const559 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
560 MachineBasicBlock::iterator I,
561 Register SrcReg, bool IsKill, int FI,
562 const TargetRegisterClass *RC,
563 const TargetRegisterInfo *TRI,
564 Register VReg) const {
565 MachineFunction *MF = MBB.getParent();
566 MachineFrameInfo &MFI = MF->getFrameInfo();
567
568 unsigned Opcode;
569 bool IsScalableVector = true;
570 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
571 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
572 RISCV::SW : RISCV::SD;
573 IsScalableVector = false;
574 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
575 Opcode = RISCV::PseudoRV32ZdinxSD;
576 IsScalableVector = false;
577 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
578 Opcode = RISCV::FSH;
579 IsScalableVector = false;
580 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
581 Opcode = RISCV::FSW;
582 IsScalableVector = false;
583 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
584 Opcode = RISCV::FSD;
585 IsScalableVector = false;
586 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
587 Opcode = RISCV::VS1R_V;
588 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
589 Opcode = RISCV::VS2R_V;
590 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
591 Opcode = RISCV::VS4R_V;
592 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
593 Opcode = RISCV::VS8R_V;
594 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
595 Opcode = RISCV::PseudoVSPILL2_M1;
596 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
597 Opcode = RISCV::PseudoVSPILL2_M2;
598 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
599 Opcode = RISCV::PseudoVSPILL2_M4;
600 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
601 Opcode = RISCV::PseudoVSPILL3_M1;
602 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
603 Opcode = RISCV::PseudoVSPILL3_M2;
604 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
605 Opcode = RISCV::PseudoVSPILL4_M1;
606 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
607 Opcode = RISCV::PseudoVSPILL4_M2;
608 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
609 Opcode = RISCV::PseudoVSPILL5_M1;
610 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
611 Opcode = RISCV::PseudoVSPILL6_M1;
612 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
613 Opcode = RISCV::PseudoVSPILL7_M1;
614 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
615 Opcode = RISCV::PseudoVSPILL8_M1;
616 else
617 llvm_unreachable("Can't store this register to stack slot");
618
619 if (IsScalableVector) {
620 MachineMemOperand *MMO = MF->getMachineMemOperand(
621 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
622 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));
623
624 MFI.setStackID(FI, TargetStackID::ScalableVector);
625 BuildMI(MBB, I, DebugLoc(), get(Opcode))
626 .addReg(SrcReg, getKillRegState(IsKill))
627 .addFrameIndex(FI)
628 .addMemOperand(MMO);
629 } else {
630 MachineMemOperand *MMO = MF->getMachineMemOperand(
631 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
632 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
633
634 BuildMI(MBB, I, DebugLoc(), get(Opcode))
635 .addReg(SrcReg, getKillRegState(IsKill))
636 .addFrameIndex(FI)
637 .addImm(0)
638 .addMemOperand(MMO);
639 }
640 }
641
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,Register DstReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI,Register VReg) const642 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
643 MachineBasicBlock::iterator I,
644 Register DstReg, int FI,
645 const TargetRegisterClass *RC,
646 const TargetRegisterInfo *TRI,
647 Register VReg) const {
648 MachineFunction *MF = MBB.getParent();
649 MachineFrameInfo &MFI = MF->getFrameInfo();
650
651 unsigned Opcode;
652 bool IsScalableVector = true;
653 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
654 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
655 RISCV::LW : RISCV::LD;
656 IsScalableVector = false;
657 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
658 Opcode = RISCV::PseudoRV32ZdinxLD;
659 IsScalableVector = false;
660 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
661 Opcode = RISCV::FLH;
662 IsScalableVector = false;
663 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
664 Opcode = RISCV::FLW;
665 IsScalableVector = false;
666 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
667 Opcode = RISCV::FLD;
668 IsScalableVector = false;
669 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
670 Opcode = RISCV::VL1RE8_V;
671 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
672 Opcode = RISCV::VL2RE8_V;
673 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
674 Opcode = RISCV::VL4RE8_V;
675 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
676 Opcode = RISCV::VL8RE8_V;
677 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
678 Opcode = RISCV::PseudoVRELOAD2_M1;
679 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
680 Opcode = RISCV::PseudoVRELOAD2_M2;
681 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
682 Opcode = RISCV::PseudoVRELOAD2_M4;
683 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
684 Opcode = RISCV::PseudoVRELOAD3_M1;
685 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
686 Opcode = RISCV::PseudoVRELOAD3_M2;
687 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
688 Opcode = RISCV::PseudoVRELOAD4_M1;
689 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
690 Opcode = RISCV::PseudoVRELOAD4_M2;
691 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
692 Opcode = RISCV::PseudoVRELOAD5_M1;
693 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
694 Opcode = RISCV::PseudoVRELOAD6_M1;
695 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
696 Opcode = RISCV::PseudoVRELOAD7_M1;
697 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
698 Opcode = RISCV::PseudoVRELOAD8_M1;
699 else
700 llvm_unreachable("Can't load this register from stack slot");
701
702 if (IsScalableVector) {
703 MachineMemOperand *MMO = MF->getMachineMemOperand(
704 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
705 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI));
706
707 MFI.setStackID(FI, TargetStackID::ScalableVector);
708 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
709 .addFrameIndex(FI)
710 .addMemOperand(MMO);
711 } else {
712 MachineMemOperand *MMO = MF->getMachineMemOperand(
713 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
714 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
715
716 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
717 .addFrameIndex(FI)
718 .addImm(0)
719 .addMemOperand(MMO);
720 }
721 }
722
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS,VirtRegMap * VRM) const723 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
724 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
725 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
726 VirtRegMap *VRM) const {
727 const MachineFrameInfo &MFI = MF.getFrameInfo();
728
729 // The below optimizations narrow the load so they are only valid for little
730 // endian.
731 // TODO: Support big endian by adding an offset into the frame object?
732 if (MF.getDataLayout().isBigEndian())
733 return nullptr;
734
735 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
736 if (Ops.size() != 1 || Ops[0] != 1)
737 return nullptr;
738
739 unsigned LoadOpc;
740 switch (MI.getOpcode()) {
741 default:
742 if (RISCV::isSEXT_W(MI)) {
743 LoadOpc = RISCV::LW;
744 break;
745 }
746 if (RISCV::isZEXT_W(MI)) {
747 LoadOpc = RISCV::LWU;
748 break;
749 }
750 if (RISCV::isZEXT_B(MI)) {
751 LoadOpc = RISCV::LBU;
752 break;
753 }
754 return nullptr;
755 case RISCV::SEXT_H:
756 LoadOpc = RISCV::LH;
757 break;
758 case RISCV::SEXT_B:
759 LoadOpc = RISCV::LB;
760 break;
761 case RISCV::ZEXT_H_RV32:
762 case RISCV::ZEXT_H_RV64:
763 LoadOpc = RISCV::LHU;
764 break;
765 }
766
767 MachineMemOperand *MMO = MF.getMachineMemOperand(
768 MachinePointerInfo::getFixedStack(MF, FrameIndex),
769 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
770 MFI.getObjectAlign(FrameIndex));
771
772 Register DstReg = MI.getOperand(0).getReg();
773 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
774 DstReg)
775 .addFrameIndex(FrameIndex)
776 .addImm(0)
777 .addMemOperand(MMO);
778 }
779
movImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,Register DstReg,uint64_t Val,MachineInstr::MIFlag Flag,bool DstRenamable,bool DstIsDead) const780 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
781 MachineBasicBlock::iterator MBBI,
782 const DebugLoc &DL, Register DstReg, uint64_t Val,
783 MachineInstr::MIFlag Flag, bool DstRenamable,
784 bool DstIsDead) const {
785 Register SrcReg = RISCV::X0;
786
787 // For RV32, allow a sign or unsigned 32 bit value.
788 if (!STI.is64Bit() && !isInt<32>(Val)) {
789 // If have a uimm32 it will still fit in a register so we can allow it.
790 if (!isUInt<32>(Val))
791 report_fatal_error("Should only materialize 32-bit constants for RV32");
792
793 // Sign extend for generateInstSeq.
794 Val = SignExtend64<32>(Val);
795 }
796
797 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
798 assert(!Seq.empty());
799
800 bool SrcRenamable = false;
801 unsigned Num = 0;
802
803 for (const RISCVMatInt::Inst &Inst : Seq) {
804 bool LastItem = ++Num == Seq.size();
805 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
806 getRenamableRegState(DstRenamable);
807 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
808 getRenamableRegState(SrcRenamable);
809 switch (Inst.getOpndKind()) {
810 case RISCVMatInt::Imm:
811 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
812 .addReg(DstReg, RegState::Define | DstRegState)
813 .addImm(Inst.getImm())
814 .setMIFlag(Flag);
815 break;
816 case RISCVMatInt::RegX0:
817 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
818 .addReg(DstReg, RegState::Define | DstRegState)
819 .addReg(SrcReg, SrcRegState)
820 .addReg(RISCV::X0)
821 .setMIFlag(Flag);
822 break;
823 case RISCVMatInt::RegReg:
824 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
825 .addReg(DstReg, RegState::Define | DstRegState)
826 .addReg(SrcReg, SrcRegState)
827 .addReg(SrcReg, SrcRegState)
828 .setMIFlag(Flag);
829 break;
830 case RISCVMatInt::RegImm:
831 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
832 .addReg(DstReg, RegState::Define | DstRegState)
833 .addReg(SrcReg, SrcRegState)
834 .addImm(Inst.getImm())
835 .setMIFlag(Flag);
836 break;
837 }
838
839 // Only the first instruction has X0 as its source.
840 SrcReg = DstReg;
841 SrcRenamable = DstRenamable;
842 }
843 }
844
getCondFromBranchOpc(unsigned Opc)845 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
846 switch (Opc) {
847 default:
848 return RISCVCC::COND_INVALID;
849 case RISCV::CV_BEQIMM:
850 return RISCVCC::COND_EQ;
851 case RISCV::CV_BNEIMM:
852 return RISCVCC::COND_NE;
853 case RISCV::BEQ:
854 return RISCVCC::COND_EQ;
855 case RISCV::BNE:
856 return RISCVCC::COND_NE;
857 case RISCV::BLT:
858 return RISCVCC::COND_LT;
859 case RISCV::BGE:
860 return RISCVCC::COND_GE;
861 case RISCV::BLTU:
862 return RISCVCC::COND_LTU;
863 case RISCV::BGEU:
864 return RISCVCC::COND_GEU;
865 }
866 }
867
868 // The contents of values added to Cond are not examined outside of
869 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
870 // push BranchOpcode, Reg1, Reg2.
parseCondBranch(MachineInstr & LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)871 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
872 SmallVectorImpl<MachineOperand> &Cond) {
873 // Block ends with fall-through condbranch.
874 assert(LastInst.getDesc().isConditionalBranch() &&
875 "Unknown conditional branch");
876 Target = LastInst.getOperand(2).getMBB();
877 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
878 Cond.push_back(MachineOperand::CreateImm(CC));
879 Cond.push_back(LastInst.getOperand(0));
880 Cond.push_back(LastInst.getOperand(1));
881 }
882
getBrCond(RISCVCC::CondCode CC,bool Imm)883 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) {
884 switch (CC) {
885 default:
886 llvm_unreachable("Unknown condition code!");
887 case RISCVCC::COND_EQ:
888 return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ;
889 case RISCVCC::COND_NE:
890 return Imm ? RISCV::CV_BNEIMM : RISCV::BNE;
891 case RISCVCC::COND_LT:
892 return RISCV::BLT;
893 case RISCVCC::COND_GE:
894 return RISCV::BGE;
895 case RISCVCC::COND_LTU:
896 return RISCV::BLTU;
897 case RISCVCC::COND_GEU:
898 return RISCV::BGEU;
899 }
900 }
901
getBrCond(RISCVCC::CondCode CC,bool Imm) const902 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC,
903 bool Imm) const {
904 return get(RISCVCC::getBrCond(CC, Imm));
905 }
906
getOppositeBranchCondition(RISCVCC::CondCode CC)907 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
908 switch (CC) {
909 default:
910 llvm_unreachable("Unrecognized conditional branch");
911 case RISCVCC::COND_EQ:
912 return RISCVCC::COND_NE;
913 case RISCVCC::COND_NE:
914 return RISCVCC::COND_EQ;
915 case RISCVCC::COND_LT:
916 return RISCVCC::COND_GE;
917 case RISCVCC::COND_GE:
918 return RISCVCC::COND_LT;
919 case RISCVCC::COND_LTU:
920 return RISCVCC::COND_GEU;
921 case RISCVCC::COND_GEU:
922 return RISCVCC::COND_LTU;
923 }
924 }
925
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const926 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
927 MachineBasicBlock *&TBB,
928 MachineBasicBlock *&FBB,
929 SmallVectorImpl<MachineOperand> &Cond,
930 bool AllowModify) const {
931 TBB = FBB = nullptr;
932 Cond.clear();
933
934 // If the block has no terminators, it just falls into the block after it.
935 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
936 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
937 return false;
938
939 // Count the number of terminators and find the first unconditional or
940 // indirect branch.
941 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
942 int NumTerminators = 0;
943 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
944 J++) {
945 NumTerminators++;
946 if (J->getDesc().isUnconditionalBranch() ||
947 J->getDesc().isIndirectBranch()) {
948 FirstUncondOrIndirectBr = J.getReverse();
949 }
950 }
951
952 // If AllowModify is true, we can erase any terminators after
953 // FirstUncondOrIndirectBR.
954 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
955 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) {
956 std::next(FirstUncondOrIndirectBr)->eraseFromParent();
957 NumTerminators--;
958 }
959 I = FirstUncondOrIndirectBr;
960 }
961
962 // We can't handle blocks that end in an indirect branch.
963 if (I->getDesc().isIndirectBranch())
964 return true;
965
966 // We can't handle Generic branch opcodes from Global ISel.
967 if (I->isPreISelOpcode())
968 return true;
969
970 // We can't handle blocks with more than 2 terminators.
971 if (NumTerminators > 2)
972 return true;
973
974 // Handle a single unconditional branch.
975 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
976 TBB = getBranchDestBlock(*I);
977 return false;
978 }
979
980 // Handle a single conditional branch.
981 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
982 parseCondBranch(*I, TBB, Cond);
983 return false;
984 }
985
986 // Handle a conditional branch followed by an unconditional branch.
987 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
988 I->getDesc().isUnconditionalBranch()) {
989 parseCondBranch(*std::prev(I), TBB, Cond);
990 FBB = getBranchDestBlock(*I);
991 return false;
992 }
993
994 // Otherwise, we can't handle this.
995 return true;
996 }
997
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const998 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
999 int *BytesRemoved) const {
1000 if (BytesRemoved)
1001 *BytesRemoved = 0;
1002 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1003 if (I == MBB.end())
1004 return 0;
1005
1006 if (!I->getDesc().isUnconditionalBranch() &&
1007 !I->getDesc().isConditionalBranch())
1008 return 0;
1009
1010 // Remove the branch.
1011 if (BytesRemoved)
1012 *BytesRemoved += getInstSizeInBytes(*I);
1013 I->eraseFromParent();
1014
1015 I = MBB.end();
1016
1017 if (I == MBB.begin())
1018 return 1;
1019 --I;
1020 if (!I->getDesc().isConditionalBranch())
1021 return 1;
1022
1023 // Remove the branch.
1024 if (BytesRemoved)
1025 *BytesRemoved += getInstSizeInBytes(*I);
1026 I->eraseFromParent();
1027 return 2;
1028 }
1029
1030 // Inserts a branch into the end of the specific MachineBasicBlock, returning
1031 // the number of instructions inserted.
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const1032 unsigned RISCVInstrInfo::insertBranch(
1033 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1034 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1035 if (BytesAdded)
1036 *BytesAdded = 0;
1037
1038 // Shouldn't be a fall through.
1039 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1040 assert((Cond.size() == 3 || Cond.size() == 0) &&
1041 "RISC-V branch conditions have two components!");
1042
1043 // Unconditional branch.
1044 if (Cond.empty()) {
1045 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1046 if (BytesAdded)
1047 *BytesAdded += getInstSizeInBytes(MI);
1048 return 1;
1049 }
1050
1051 // Either a one or two-way conditional branch.
1052 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1053 MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm()))
1054 .add(Cond[1])
1055 .add(Cond[2])
1056 .addMBB(TBB);
1057 if (BytesAdded)
1058 *BytesAdded += getInstSizeInBytes(CondMI);
1059
1060 // One-way conditional branch.
1061 if (!FBB)
1062 return 1;
1063
1064 // Two-way conditional branch.
1065 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1066 if (BytesAdded)
1067 *BytesAdded += getInstSizeInBytes(MI);
1068 return 2;
1069 }
1070
insertIndirectBranch(MachineBasicBlock & MBB,MachineBasicBlock & DestBB,MachineBasicBlock & RestoreBB,const DebugLoc & DL,int64_t BrOffset,RegScavenger * RS) const1071 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1072 MachineBasicBlock &DestBB,
1073 MachineBasicBlock &RestoreBB,
1074 const DebugLoc &DL, int64_t BrOffset,
1075 RegScavenger *RS) const {
1076 assert(RS && "RegScavenger required for long branching");
1077 assert(MBB.empty() &&
1078 "new block should be inserted for expanding unconditional branch");
1079 assert(MBB.pred_size() == 1);
1080 assert(RestoreBB.empty() &&
1081 "restore block should be inserted for restoring clobbered registers");
1082
1083 MachineFunction *MF = MBB.getParent();
1084 MachineRegisterInfo &MRI = MF->getRegInfo();
1085 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1086 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1087
1088 if (!isInt<32>(BrOffset))
1089 report_fatal_error(
1090 "Branch offsets outside of the signed 32-bit range not supported");
1091
1092 // FIXME: A virtual register must be used initially, as the register
1093 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1094 // uses the same workaround).
1095 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1096 auto II = MBB.end();
1097 // We may also update the jump target to RestoreBB later.
1098 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1099 .addReg(ScratchReg, RegState::Define | RegState::Dead)
1100 .addMBB(&DestBB, RISCVII::MO_CALL);
1101
1102 RS->enterBasicBlockEnd(MBB);
1103 Register TmpGPR =
1104 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1105 /*RestoreAfter=*/false, /*SpAdj=*/0,
1106 /*AllowSpill=*/false);
1107 if (TmpGPR != RISCV::NoRegister)
1108 RS->setRegUsed(TmpGPR);
1109 else {
1110 // The case when there is no scavenged register needs special handling.
1111
1112 // Pick s11 because it doesn't make a difference.
1113 TmpGPR = RISCV::X27;
1114
1115 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1116 if (FrameIndex == -1)
1117 report_fatal_error("underestimated function size");
1118
1119 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1120 &RISCV::GPRRegClass, TRI, Register());
1121 TRI->eliminateFrameIndex(std::prev(MI.getIterator()),
1122 /*SpAdj=*/0, /*FIOperandNum=*/1);
1123
1124 MI.getOperand(1).setMBB(&RestoreBB);
1125
1126 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1127 &RISCV::GPRRegClass, TRI, Register());
1128 TRI->eliminateFrameIndex(RestoreBB.back(),
1129 /*SpAdj=*/0, /*FIOperandNum=*/1);
1130 }
1131
1132 MRI.replaceRegWith(ScratchReg, TmpGPR);
1133 MRI.clearVirtRegs();
1134 }
1135
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const1136 bool RISCVInstrInfo::reverseBranchCondition(
1137 SmallVectorImpl<MachineOperand> &Cond) const {
1138 assert((Cond.size() == 3) && "Invalid branch condition!");
1139 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1140 Cond[0].setImm(getOppositeBranchCondition(CC));
1141 return false;
1142 }
1143
optimizeCondBranch(MachineInstr & MI) const1144 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1145 MachineBasicBlock *MBB = MI.getParent();
1146 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1147
1148 MachineBasicBlock *TBB, *FBB;
1149 SmallVector<MachineOperand, 3> Cond;
1150 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1151 return false;
1152
1153 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1154 assert(CC != RISCVCC::COND_INVALID);
1155
1156 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
1157 return false;
1158
1159 // For two constants C0 and C1 from
1160 // ```
1161 // li Y, C0
1162 // li Z, C1
1163 // ```
1164 // 1. if C1 = C0 + 1
1165 // we can turn:
1166 // (a) blt Y, X -> bge X, Z
1167 // (b) bge Y, X -> blt X, Z
1168 //
1169 // 2. if C1 = C0 - 1
1170 // we can turn:
1171 // (a) blt X, Y -> bge Z, X
1172 // (b) bge X, Y -> blt Z, X
1173 //
1174 // To make sure this optimization is really beneficial, we only
1175 // optimize for cases where Y had only one use (i.e. only used by the branch).
1176
1177 // Right now we only care about LI (i.e. ADDI x0, imm)
1178 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1179 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1180 MI->getOperand(1).getReg() == RISCV::X0) {
1181 Imm = MI->getOperand(2).getImm();
1182 return true;
1183 }
1184 return false;
1185 };
1186 // Either a load from immediate instruction or X0.
1187 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1188 if (!Op.isReg())
1189 return false;
1190 Register Reg = Op.getReg();
1191 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
1192 };
1193
1194 MachineOperand &LHS = MI.getOperand(0);
1195 MachineOperand &RHS = MI.getOperand(1);
1196 // Try to find the register for constant Z; return
1197 // invalid register otherwise.
1198 auto searchConst = [&](int64_t C1) -> Register {
1199 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1200 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool {
1201 int64_t Imm;
1202 return isLoadImm(&I, Imm) && Imm == C1 &&
1203 I.getOperand(0).getReg().isVirtual();
1204 });
1205 if (DefC1 != E)
1206 return DefC1->getOperand(0).getReg();
1207
1208 return Register();
1209 };
1210
1211 bool Modify = false;
1212 int64_t C0;
1213 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) {
1214 // Might be case 1.
1215 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1216 // to worry about unsigned overflow here)
1217 if (C0 < INT64_MAX)
1218 if (Register RegZ = searchConst(C0 + 1)) {
1219 reverseBranchCondition(Cond);
1220 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false);
1221 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1222 // We might extend the live range of Z, clear its kill flag to
1223 // account for this.
1224 MRI.clearKillFlags(RegZ);
1225 Modify = true;
1226 }
1227 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) {
1228 // Might be case 2.
1229 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1230 // when C0 is zero.
1231 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1232 if (Register RegZ = searchConst(C0 - 1)) {
1233 reverseBranchCondition(Cond);
1234 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false);
1235 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false);
1236 // We might extend the live range of Z, clear its kill flag to
1237 // account for this.
1238 MRI.clearKillFlags(RegZ);
1239 Modify = true;
1240 }
1241 }
1242
1243 if (!Modify)
1244 return false;
1245
1246 // Build the new branch and remove the old one.
1247 BuildMI(*MBB, MI, MI.getDebugLoc(),
1248 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1249 .add(Cond[1])
1250 .add(Cond[2])
1251 .addMBB(TBB);
1252 MI.eraseFromParent();
1253
1254 return true;
1255 }
1256
1257 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const1258 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1259 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1260 // The branch target is always the last operand.
1261 int NumOp = MI.getNumExplicitOperands();
1262 return MI.getOperand(NumOp - 1).getMBB();
1263 }
1264
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const1265 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1266 int64_t BrOffset) const {
1267 unsigned XLen = STI.getXLen();
1268 // Ideally we could determine the supported branch offset from the
1269 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1270 // PseudoBR.
1271 switch (BranchOp) {
1272 default:
1273 llvm_unreachable("Unexpected opcode!");
1274 case RISCV::BEQ:
1275 case RISCV::BNE:
1276 case RISCV::BLT:
1277 case RISCV::BGE:
1278 case RISCV::BLTU:
1279 case RISCV::BGEU:
1280 case RISCV::CV_BEQIMM:
1281 case RISCV::CV_BNEIMM:
1282 return isIntN(13, BrOffset);
1283 case RISCV::JAL:
1284 case RISCV::PseudoBR:
1285 return isIntN(21, BrOffset);
1286 case RISCV::PseudoJump:
1287 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
1288 }
1289 }
1290
1291 // If the operation has a predicated pseudo instruction, return the pseudo
1292 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1293 // TODO: Support more operations.
getPredicatedOpcode(unsigned Opcode)1294 unsigned getPredicatedOpcode(unsigned Opcode) {
1295 switch (Opcode) {
1296 case RISCV::ADD: return RISCV::PseudoCCADD; break;
1297 case RISCV::SUB: return RISCV::PseudoCCSUB; break;
1298 case RISCV::SLL: return RISCV::PseudoCCSLL; break;
1299 case RISCV::SRL: return RISCV::PseudoCCSRL; break;
1300 case RISCV::SRA: return RISCV::PseudoCCSRA; break;
1301 case RISCV::AND: return RISCV::PseudoCCAND; break;
1302 case RISCV::OR: return RISCV::PseudoCCOR; break;
1303 case RISCV::XOR: return RISCV::PseudoCCXOR; break;
1304
1305 case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
1306 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
1307 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
1308 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
1309 case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
1310 case RISCV::ORI: return RISCV::PseudoCCORI; break;
1311 case RISCV::XORI: return RISCV::PseudoCCXORI; break;
1312
1313 case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
1314 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
1315 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
1316 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
1317 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
1318
1319 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1320 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1321 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1322 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1323
1324 case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
1325 case RISCV::ORN: return RISCV::PseudoCCORN; break;
1326 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1327 }
1328
1329 return RISCV::INSTRUCTION_LIST_END;
1330 }
1331
1332 /// Identify instructions that can be folded into a CCMOV instruction, and
1333 /// return the defining instruction.
canFoldAsPredicatedOp(Register Reg,const MachineRegisterInfo & MRI,const TargetInstrInfo * TII)1334 static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1335 const MachineRegisterInfo &MRI,
1336 const TargetInstrInfo *TII) {
1337 if (!Reg.isVirtual())
1338 return nullptr;
1339 if (!MRI.hasOneNonDBGUse(Reg))
1340 return nullptr;
1341 MachineInstr *MI = MRI.getVRegDef(Reg);
1342 if (!MI)
1343 return nullptr;
1344 // Check if MI can be predicated and folded into the CCMOV.
1345 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1346 return nullptr;
1347 // Don't predicate li idiom.
1348 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1349 MI->getOperand(1).getReg() == RISCV::X0)
1350 return nullptr;
1351 // Check if MI has any other defs or physreg uses.
1352 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
1353 // Reject frame index operands, PEI can't handle the predicated pseudos.
1354 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1355 return nullptr;
1356 if (!MO.isReg())
1357 continue;
1358 // MI can't have any tied operands, that would conflict with predication.
1359 if (MO.isTied())
1360 return nullptr;
1361 if (MO.isDef())
1362 return nullptr;
1363 // Allow constant physregs.
1364 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg()))
1365 return nullptr;
1366 }
1367 bool DontMoveAcrossStores = true;
1368 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
1369 return nullptr;
1370 return MI;
1371 }
1372
analyzeSelect(const MachineInstr & MI,SmallVectorImpl<MachineOperand> & Cond,unsigned & TrueOp,unsigned & FalseOp,bool & Optimizable) const1373 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1374 SmallVectorImpl<MachineOperand> &Cond,
1375 unsigned &TrueOp, unsigned &FalseOp,
1376 bool &Optimizable) const {
1377 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1378 "Unknown select instruction");
1379 // CCMOV operands:
1380 // 0: Def.
1381 // 1: LHS of compare.
1382 // 2: RHS of compare.
1383 // 3: Condition code.
1384 // 4: False use.
1385 // 5: True use.
1386 TrueOp = 5;
1387 FalseOp = 4;
1388 Cond.push_back(MI.getOperand(1));
1389 Cond.push_back(MI.getOperand(2));
1390 Cond.push_back(MI.getOperand(3));
1391 // We can only fold when we support short forward branch opt.
1392 Optimizable = STI.hasShortForwardBranchOpt();
1393 return false;
1394 }
1395
1396 MachineInstr *
optimizeSelect(MachineInstr & MI,SmallPtrSetImpl<MachineInstr * > & SeenMIs,bool PreferFalse) const1397 RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1398 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1399 bool PreferFalse) const {
1400 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1401 "Unknown select instruction");
1402 if (!STI.hasShortForwardBranchOpt())
1403 return nullptr;
1404
1405 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1406 MachineInstr *DefMI =
1407 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
1408 bool Invert = !DefMI;
1409 if (!DefMI)
1410 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
1411 if (!DefMI)
1412 return nullptr;
1413
1414 // Find new register class to use.
1415 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
1416 Register DestReg = MI.getOperand(0).getReg();
1417 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1418 if (!MRI.constrainRegClass(DestReg, PreviousClass))
1419 return nullptr;
1420
1421 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
1422 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1423
1424 // Create a new predicated version of DefMI.
1425 MachineInstrBuilder NewMI =
1426 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1427
1428 // Copy the condition portion.
1429 NewMI.add(MI.getOperand(1));
1430 NewMI.add(MI.getOperand(2));
1431
1432 // Add condition code, inverting if necessary.
1433 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
1434 if (Invert)
1435 CC = RISCVCC::getOppositeBranchCondition(CC);
1436 NewMI.addImm(CC);
1437
1438 // Copy the false register.
1439 NewMI.add(FalseReg);
1440
1441 // Copy all the DefMI operands.
1442 const MCInstrDesc &DefDesc = DefMI->getDesc();
1443 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1444 NewMI.add(DefMI->getOperand(i));
1445
1446 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1447 SeenMIs.insert(NewMI);
1448 SeenMIs.erase(DefMI);
1449
1450 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1451 // DefMI would be invalid when tranferred inside the loop. Checking for a
1452 // loop is expensive, but at least remove kill flags if they are in different
1453 // BBs.
1454 if (DefMI->getParent() != MI.getParent())
1455 NewMI->clearKillInfo();
1456
1457 // The caller will erase MI, but not DefMI.
1458 DefMI->eraseFromParent();
1459 return NewMI;
1460 }
1461
getInstSizeInBytes(const MachineInstr & MI) const1462 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1463 if (MI.isMetaInstruction())
1464 return 0;
1465
1466 unsigned Opcode = MI.getOpcode();
1467
1468 if (Opcode == TargetOpcode::INLINEASM ||
1469 Opcode == TargetOpcode::INLINEASM_BR) {
1470 const MachineFunction &MF = *MI.getParent()->getParent();
1471 return getInlineAsmLength(MI.getOperand(0).getSymbolName(),
1472 *MF.getTarget().getMCAsmInfo());
1473 }
1474
1475 if (!MI.memoperands_empty()) {
1476 MachineMemOperand *MMO = *(MI.memoperands_begin());
1477 if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1478 if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) {
1479 if (isCompressibleInst(MI, STI))
1480 return 4; // c.ntl.all + c.load/c.store
1481 return 6; // c.ntl.all + load/store
1482 }
1483 return 8; // ntl.all + load/store
1484 }
1485 }
1486
1487 if (Opcode == TargetOpcode::BUNDLE)
1488 return getInstBundleLength(MI);
1489
1490 if (MI.getParent() && MI.getParent()->getParent()) {
1491 if (isCompressibleInst(MI, STI))
1492 return 2;
1493 }
1494
1495 switch (Opcode) {
1496 case TargetOpcode::STACKMAP:
1497 // The upper bound for a stackmap intrinsic is the full length of its shadow
1498 return StackMapOpers(&MI).getNumPatchBytes();
1499 case TargetOpcode::PATCHPOINT:
1500 // The size of the patchpoint intrinsic is the number of bytes requested
1501 return PatchPointOpers(&MI).getNumPatchBytes();
1502 case TargetOpcode::STATEPOINT: {
1503 // The size of the statepoint intrinsic is the number of bytes requested
1504 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
1505 // No patch bytes means at most a PseudoCall is emitted
1506 return std::max(NumBytes, 8U);
1507 }
1508 default:
1509 return get(Opcode).getSize();
1510 }
1511 }
1512
getInstBundleLength(const MachineInstr & MI) const1513 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1514 unsigned Size = 0;
1515 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1516 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1517 while (++I != E && I->isInsideBundle()) {
1518 assert(!I->isBundle() && "No nested bundle!");
1519 Size += getInstSizeInBytes(*I);
1520 }
1521 return Size;
1522 }
1523
isAsCheapAsAMove(const MachineInstr & MI) const1524 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1525 const unsigned Opcode = MI.getOpcode();
1526 switch (Opcode) {
1527 default:
1528 break;
1529 case RISCV::FSGNJ_D:
1530 case RISCV::FSGNJ_S:
1531 case RISCV::FSGNJ_H:
1532 case RISCV::FSGNJ_D_INX:
1533 case RISCV::FSGNJ_D_IN32X:
1534 case RISCV::FSGNJ_S_INX:
1535 case RISCV::FSGNJ_H_INX:
1536 // The canonical floating-point move is fsgnj rd, rs, rs.
1537 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1538 MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
1539 case RISCV::ADDI:
1540 case RISCV::ORI:
1541 case RISCV::XORI:
1542 return (MI.getOperand(1).isReg() &&
1543 MI.getOperand(1).getReg() == RISCV::X0) ||
1544 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1545 }
1546 return MI.isAsCheapAsAMove();
1547 }
1548
1549 std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr & MI) const1550 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1551 if (MI.isMoveReg())
1552 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1553 switch (MI.getOpcode()) {
1554 default:
1555 break;
1556 case RISCV::ADDI:
1557 // Operand 1 can be a frameindex but callers expect registers
1558 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() &&
1559 MI.getOperand(2).getImm() == 0)
1560 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1561 break;
1562 case RISCV::FSGNJ_D:
1563 case RISCV::FSGNJ_S:
1564 case RISCV::FSGNJ_H:
1565 case RISCV::FSGNJ_D_INX:
1566 case RISCV::FSGNJ_D_IN32X:
1567 case RISCV::FSGNJ_S_INX:
1568 case RISCV::FSGNJ_H_INX:
1569 // The canonical floating-point move is fsgnj rd, rs, rs.
1570 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
1571 MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
1572 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
1573 break;
1574 }
1575 return std::nullopt;
1576 }
1577
getMachineCombinerTraceStrategy() const1578 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1579 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1580 // The option is unused. Choose Local strategy only for in-order cores. When
1581 // scheduling model is unspecified, use MinInstrCount strategy as more
1582 // generic one.
1583 const auto &SchedModel = STI.getSchedModel();
1584 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1585 ? MachineTraceStrategy::TS_MinInstrCount
1586 : MachineTraceStrategy::TS_Local;
1587 }
1588 // The strategy was forced by the option.
1589 return ForceMachineCombinerStrategy;
1590 }
1591
finalizeInsInstrs(MachineInstr & Root,unsigned & Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs) const1592 void RISCVInstrInfo::finalizeInsInstrs(
1593 MachineInstr &Root, unsigned &Pattern,
1594 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1595 int16_t FrmOpIdx =
1596 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1597 if (FrmOpIdx < 0) {
1598 assert(all_of(InsInstrs,
1599 [](MachineInstr *MI) {
1600 return RISCV::getNamedOperandIdx(MI->getOpcode(),
1601 RISCV::OpName::frm) < 0;
1602 }) &&
1603 "New instructions require FRM whereas the old one does not have it");
1604 return;
1605 }
1606
1607 const MachineOperand &FRM = Root.getOperand(FrmOpIdx);
1608 MachineFunction &MF = *Root.getMF();
1609
1610 for (auto *NewMI : InsInstrs) {
1611 // We'd already added the FRM operand.
1612 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
1613 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
1614 continue;
1615 MachineInstrBuilder MIB(MF, NewMI);
1616 MIB.add(FRM);
1617 if (FRM.getImm() == RISCVFPRndMode::DYN)
1618 MIB.addUse(RISCV::FRM, RegState::Implicit);
1619 }
1620 }
1621
isFADD(unsigned Opc)1622 static bool isFADD(unsigned Opc) {
1623 switch (Opc) {
1624 default:
1625 return false;
1626 case RISCV::FADD_H:
1627 case RISCV::FADD_S:
1628 case RISCV::FADD_D:
1629 return true;
1630 }
1631 }
1632
isFSUB(unsigned Opc)1633 static bool isFSUB(unsigned Opc) {
1634 switch (Opc) {
1635 default:
1636 return false;
1637 case RISCV::FSUB_H:
1638 case RISCV::FSUB_S:
1639 case RISCV::FSUB_D:
1640 return true;
1641 }
1642 }
1643
isFMUL(unsigned Opc)1644 static bool isFMUL(unsigned Opc) {
1645 switch (Opc) {
1646 default:
1647 return false;
1648 case RISCV::FMUL_H:
1649 case RISCV::FMUL_S:
1650 case RISCV::FMUL_D:
1651 return true;
1652 }
1653 }
1654
isVectorAssociativeAndCommutative(const MachineInstr & Inst,bool Invert) const1655 bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst,
1656 bool Invert) const {
1657 #define OPCODE_LMUL_CASE(OPC) \
1658 case RISCV::OPC##_M1: \
1659 case RISCV::OPC##_M2: \
1660 case RISCV::OPC##_M4: \
1661 case RISCV::OPC##_M8: \
1662 case RISCV::OPC##_MF2: \
1663 case RISCV::OPC##_MF4: \
1664 case RISCV::OPC##_MF8
1665
1666 #define OPCODE_LMUL_MASK_CASE(OPC) \
1667 case RISCV::OPC##_M1_MASK: \
1668 case RISCV::OPC##_M2_MASK: \
1669 case RISCV::OPC##_M4_MASK: \
1670 case RISCV::OPC##_M8_MASK: \
1671 case RISCV::OPC##_MF2_MASK: \
1672 case RISCV::OPC##_MF4_MASK: \
1673 case RISCV::OPC##_MF8_MASK
1674
1675 unsigned Opcode = Inst.getOpcode();
1676 if (Invert) {
1677 if (auto InvOpcode = getInverseOpcode(Opcode))
1678 Opcode = *InvOpcode;
1679 else
1680 return false;
1681 }
1682
1683 // clang-format off
1684 switch (Opcode) {
1685 default:
1686 return false;
1687 OPCODE_LMUL_CASE(PseudoVADD_VV):
1688 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV):
1689 OPCODE_LMUL_CASE(PseudoVMUL_VV):
1690 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV):
1691 return true;
1692 }
1693 // clang-format on
1694
1695 #undef OPCODE_LMUL_MASK_CASE
1696 #undef OPCODE_LMUL_CASE
1697 }
1698
areRVVInstsReassociable(const MachineInstr & Root,const MachineInstr & Prev) const1699 bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root,
1700 const MachineInstr &Prev) const {
1701 if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode()))
1702 return false;
1703
1704 assert(Root.getMF() == Prev.getMF());
1705 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo();
1706 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1707
1708 // Make sure vtype operands are also the same.
1709 const MCInstrDesc &Desc = get(Root.getOpcode());
1710 const uint64_t TSFlags = Desc.TSFlags;
1711
1712 auto checkImmOperand = [&](unsigned OpIdx) {
1713 return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm();
1714 };
1715
1716 auto checkRegOperand = [&](unsigned OpIdx) {
1717 return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg();
1718 };
1719
1720 // PassThru
1721 // TODO: Potentially we can loosen the condition to consider Root to be
1722 // associable with Prev if Root has NoReg as passthru. In which case we
1723 // also need to loosen the condition on vector policies between these.
1724 if (!checkRegOperand(1))
1725 return false;
1726
1727 // SEW
1728 if (RISCVII::hasSEWOp(TSFlags) &&
1729 !checkImmOperand(RISCVII::getSEWOpNum(Desc)))
1730 return false;
1731
1732 // Mask
1733 if (RISCVII::usesMaskPolicy(TSFlags)) {
1734 const MachineBasicBlock *MBB = Root.getParent();
1735 const MachineBasicBlock::const_reverse_iterator It1(&Root);
1736 const MachineBasicBlock::const_reverse_iterator It2(&Prev);
1737 Register MI1VReg;
1738
1739 bool SeenMI2 = false;
1740 for (auto End = MBB->rend(), It = It1; It != End; ++It) {
1741 if (It == It2) {
1742 SeenMI2 = true;
1743 if (!MI1VReg.isValid())
1744 // There is no V0 def between Root and Prev; they're sharing the
1745 // same V0.
1746 break;
1747 }
1748
1749 if (It->modifiesRegister(RISCV::V0, TRI)) {
1750 Register SrcReg = It->getOperand(1).getReg();
1751 // If it's not VReg it'll be more difficult to track its defs, so
1752 // bailing out here just to be safe.
1753 if (!SrcReg.isVirtual())
1754 return false;
1755
1756 if (!MI1VReg.isValid()) {
1757 // This is the V0 def for Root.
1758 MI1VReg = SrcReg;
1759 continue;
1760 }
1761
1762 // Some random mask updates.
1763 if (!SeenMI2)
1764 continue;
1765
1766 // This is the V0 def for Prev; check if it's the same as that of
1767 // Root.
1768 if (MI1VReg != SrcReg)
1769 return false;
1770 else
1771 break;
1772 }
1773 }
1774
1775 // If we haven't encountered Prev, it's likely that this function was
1776 // called in a wrong way (e.g. Root is before Prev).
1777 assert(SeenMI2 && "Prev is expected to appear before Root");
1778 }
1779
1780 // Tail / Mask policies
1781 if (RISCVII::hasVecPolicyOp(TSFlags) &&
1782 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc)))
1783 return false;
1784
1785 // VL
1786 if (RISCVII::hasVLOp(TSFlags)) {
1787 unsigned OpIdx = RISCVII::getVLOpNum(Desc);
1788 const MachineOperand &Op1 = Root.getOperand(OpIdx);
1789 const MachineOperand &Op2 = Prev.getOperand(OpIdx);
1790 if (Op1.getType() != Op2.getType())
1791 return false;
1792 switch (Op1.getType()) {
1793 case MachineOperand::MO_Register:
1794 if (Op1.getReg() != Op2.getReg())
1795 return false;
1796 break;
1797 case MachineOperand::MO_Immediate:
1798 if (Op1.getImm() != Op2.getImm())
1799 return false;
1800 break;
1801 default:
1802 llvm_unreachable("Unrecognized VL operand type");
1803 }
1804 }
1805
1806 // Rounding modes
1807 if (RISCVII::hasRoundModeOp(TSFlags) &&
1808 !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1))
1809 return false;
1810
1811 return true;
1812 }
1813
1814 // Most of our RVV pseudos have passthru operand, so the real operands
1815 // start from index = 2.
hasReassociableVectorSibling(const MachineInstr & Inst,bool & Commuted) const1816 bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst,
1817 bool &Commuted) const {
1818 const MachineBasicBlock *MBB = Inst.getParent();
1819 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1820 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) &&
1821 "Expect the present of passthrough operand.");
1822 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
1823 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg());
1824
1825 // If only one operand has the same or inverse opcode and it's the second
1826 // source operand, the operands must be commuted.
1827 Commuted = !areRVVInstsReassociable(Inst, *MI1) &&
1828 areRVVInstsReassociable(Inst, *MI2);
1829 if (Commuted)
1830 std::swap(MI1, MI2);
1831
1832 return areRVVInstsReassociable(Inst, *MI1) &&
1833 (isVectorAssociativeAndCommutative(*MI1) ||
1834 isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) &&
1835 hasReassociableOperands(*MI1, MBB) &&
1836 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
1837 }
1838
hasReassociableOperands(const MachineInstr & Inst,const MachineBasicBlock * MBB) const1839 bool RISCVInstrInfo::hasReassociableOperands(
1840 const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
1841 if (!isVectorAssociativeAndCommutative(Inst) &&
1842 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
1843 return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
1844
1845 const MachineOperand &Op1 = Inst.getOperand(2);
1846 const MachineOperand &Op2 = Inst.getOperand(3);
1847 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1848
1849 // We need virtual register definitions for the operands that we will
1850 // reassociate.
1851 MachineInstr *MI1 = nullptr;
1852 MachineInstr *MI2 = nullptr;
1853 if (Op1.isReg() && Op1.getReg().isVirtual())
1854 MI1 = MRI.getUniqueVRegDef(Op1.getReg());
1855 if (Op2.isReg() && Op2.getReg().isVirtual())
1856 MI2 = MRI.getUniqueVRegDef(Op2.getReg());
1857
1858 // And at least one operand must be defined in MBB.
1859 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB);
1860 }
1861
getReassociateOperandIndices(const MachineInstr & Root,unsigned Pattern,std::array<unsigned,5> & OperandIndices) const1862 void RISCVInstrInfo::getReassociateOperandIndices(
1863 const MachineInstr &Root, unsigned Pattern,
1864 std::array<unsigned, 5> &OperandIndices) const {
1865 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices);
1866 if (RISCV::getRVVMCOpcode(Root.getOpcode())) {
1867 // Skip the passthrough operand, so increment all indices by one.
1868 for (unsigned I = 0; I < 5; ++I)
1869 ++OperandIndices[I];
1870 }
1871 }
1872
hasReassociableSibling(const MachineInstr & Inst,bool & Commuted) const1873 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1874 bool &Commuted) const {
1875 if (isVectorAssociativeAndCommutative(Inst) ||
1876 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true))
1877 return hasReassociableVectorSibling(Inst, Commuted);
1878
1879 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1880 return false;
1881
1882 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1883 unsigned OperandIdx = Commuted ? 2 : 1;
1884 const MachineInstr &Sibling =
1885 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg());
1886
1887 int16_t InstFrmOpIdx =
1888 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1889 int16_t SiblingFrmOpIdx =
1890 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1891
1892 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1893 RISCV::hasEqualFRM(Inst, Sibling);
1894 }
1895
isAssociativeAndCommutative(const MachineInstr & Inst,bool Invert) const1896 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1897 bool Invert) const {
1898 if (isVectorAssociativeAndCommutative(Inst, Invert))
1899 return true;
1900
1901 unsigned Opc = Inst.getOpcode();
1902 if (Invert) {
1903 auto InverseOpcode = getInverseOpcode(Opc);
1904 if (!InverseOpcode)
1905 return false;
1906 Opc = *InverseOpcode;
1907 }
1908
1909 if (isFADD(Opc) || isFMUL(Opc))
1910 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
1911 Inst.getFlag(MachineInstr::MIFlag::FmNsz);
1912
1913 switch (Opc) {
1914 default:
1915 return false;
1916 case RISCV::ADD:
1917 case RISCV::ADDW:
1918 case RISCV::AND:
1919 case RISCV::OR:
1920 case RISCV::XOR:
1921 // From RISC-V ISA spec, if both the high and low bits of the same product
1922 // are required, then the recommended code sequence is:
1923 //
1924 // MULH[[S]U] rdh, rs1, rs2
1925 // MUL rdl, rs1, rs2
1926 // (source register specifiers must be in same order and rdh cannot be the
1927 // same as rs1 or rs2)
1928 //
1929 // Microarchitectures can then fuse these into a single multiply operation
1930 // instead of performing two separate multiplies.
1931 // MachineCombiner may reassociate MUL operands and lose the fusion
1932 // opportunity.
1933 case RISCV::MUL:
1934 case RISCV::MULW:
1935 case RISCV::MIN:
1936 case RISCV::MINU:
1937 case RISCV::MAX:
1938 case RISCV::MAXU:
1939 case RISCV::FMIN_H:
1940 case RISCV::FMIN_S:
1941 case RISCV::FMIN_D:
1942 case RISCV::FMAX_H:
1943 case RISCV::FMAX_S:
1944 case RISCV::FMAX_D:
1945 return true;
1946 }
1947
1948 return false;
1949 }
1950
1951 std::optional<unsigned>
getInverseOpcode(unsigned Opcode) const1952 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1953 #define RVV_OPC_LMUL_CASE(OPC, INV) \
1954 case RISCV::OPC##_M1: \
1955 return RISCV::INV##_M1; \
1956 case RISCV::OPC##_M2: \
1957 return RISCV::INV##_M2; \
1958 case RISCV::OPC##_M4: \
1959 return RISCV::INV##_M4; \
1960 case RISCV::OPC##_M8: \
1961 return RISCV::INV##_M8; \
1962 case RISCV::OPC##_MF2: \
1963 return RISCV::INV##_MF2; \
1964 case RISCV::OPC##_MF4: \
1965 return RISCV::INV##_MF4; \
1966 case RISCV::OPC##_MF8: \
1967 return RISCV::INV##_MF8
1968
1969 #define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \
1970 case RISCV::OPC##_M1_MASK: \
1971 return RISCV::INV##_M1_MASK; \
1972 case RISCV::OPC##_M2_MASK: \
1973 return RISCV::INV##_M2_MASK; \
1974 case RISCV::OPC##_M4_MASK: \
1975 return RISCV::INV##_M4_MASK; \
1976 case RISCV::OPC##_M8_MASK: \
1977 return RISCV::INV##_M8_MASK; \
1978 case RISCV::OPC##_MF2_MASK: \
1979 return RISCV::INV##_MF2_MASK; \
1980 case RISCV::OPC##_MF4_MASK: \
1981 return RISCV::INV##_MF4_MASK; \
1982 case RISCV::OPC##_MF8_MASK: \
1983 return RISCV::INV##_MF8_MASK
1984
1985 switch (Opcode) {
1986 default:
1987 return std::nullopt;
1988 case RISCV::FADD_H:
1989 return RISCV::FSUB_H;
1990 case RISCV::FADD_S:
1991 return RISCV::FSUB_S;
1992 case RISCV::FADD_D:
1993 return RISCV::FSUB_D;
1994 case RISCV::FSUB_H:
1995 return RISCV::FADD_H;
1996 case RISCV::FSUB_S:
1997 return RISCV::FADD_S;
1998 case RISCV::FSUB_D:
1999 return RISCV::FADD_D;
2000 case RISCV::ADD:
2001 return RISCV::SUB;
2002 case RISCV::SUB:
2003 return RISCV::ADD;
2004 case RISCV::ADDW:
2005 return RISCV::SUBW;
2006 case RISCV::SUBW:
2007 return RISCV::ADDW;
2008 // clang-format off
2009 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2010 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV);
2011 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2012 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV);
2013 // clang-format on
2014 }
2015
2016 #undef RVV_OPC_LMUL_MASK_CASE
2017 #undef RVV_OPC_LMUL_CASE
2018 }
2019
canCombineFPFusedMultiply(const MachineInstr & Root,const MachineOperand & MO,bool DoRegPressureReduce)2020 static bool canCombineFPFusedMultiply(const MachineInstr &Root,
2021 const MachineOperand &MO,
2022 bool DoRegPressureReduce) {
2023 if (!MO.isReg() || !MO.getReg().isVirtual())
2024 return false;
2025 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2026 MachineInstr *MI = MRI.getVRegDef(MO.getReg());
2027 if (!MI || !isFMUL(MI->getOpcode()))
2028 return false;
2029
2030 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) ||
2031 !MI->getFlag(MachineInstr::MIFlag::FmContract))
2032 return false;
2033
2034 // Try combining even if fmul has more than one use as it eliminates
2035 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
2036 // for fmul operands, so reject the transformation in register pressure
2037 // reduction mode.
2038 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2039 return false;
2040
2041 // Do not combine instructions from different basic blocks.
2042 if (Root.getParent() != MI->getParent())
2043 return false;
2044 return RISCV::hasEqualFRM(Root, *MI);
2045 }
2046
getFPFusedMultiplyPatterns(MachineInstr & Root,SmallVectorImpl<unsigned> & Patterns,bool DoRegPressureReduce)2047 static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
2048 SmallVectorImpl<unsigned> &Patterns,
2049 bool DoRegPressureReduce) {
2050 unsigned Opc = Root.getOpcode();
2051 bool IsFAdd = isFADD(Opc);
2052 if (!IsFAdd && !isFSUB(Opc))
2053 return false;
2054 bool Added = false;
2055 if (canCombineFPFusedMultiply(Root, Root.getOperand(1),
2056 DoRegPressureReduce)) {
2057 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
2058 : RISCVMachineCombinerPattern::FMSUB);
2059 Added = true;
2060 }
2061 if (canCombineFPFusedMultiply(Root, Root.getOperand(2),
2062 DoRegPressureReduce)) {
2063 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
2064 : RISCVMachineCombinerPattern::FNMSUB);
2065 Added = true;
2066 }
2067 return Added;
2068 }
2069
getFPPatterns(MachineInstr & Root,SmallVectorImpl<unsigned> & Patterns,bool DoRegPressureReduce)2070 static bool getFPPatterns(MachineInstr &Root,
2071 SmallVectorImpl<unsigned> &Patterns,
2072 bool DoRegPressureReduce) {
2073 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
2074 }
2075
2076 /// Utility routine that checks if \param MO is defined by an
2077 /// \param CombineOpc instruction in the basic block \param MBB
canCombine(const MachineBasicBlock & MBB,const MachineOperand & MO,unsigned CombineOpc)2078 static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
2079 const MachineOperand &MO,
2080 unsigned CombineOpc) {
2081 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2082 const MachineInstr *MI = nullptr;
2083
2084 if (MO.isReg() && MO.getReg().isVirtual())
2085 MI = MRI.getUniqueVRegDef(MO.getReg());
2086 // And it needs to be in the trace (otherwise, it won't have a depth).
2087 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
2088 return nullptr;
2089 // Must only used by the user we combine with.
2090 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
2091 return nullptr;
2092
2093 return MI;
2094 }
2095
2096 /// Utility routine that checks if \param MO is defined by a SLLI in \param
2097 /// MBB that can be combined by splitting across 2 SHXADD instructions. The
2098 /// first SHXADD shift amount is given by \param OuterShiftAmt.
canCombineShiftIntoShXAdd(const MachineBasicBlock & MBB,const MachineOperand & MO,unsigned OuterShiftAmt)2099 static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
2100 const MachineOperand &MO,
2101 unsigned OuterShiftAmt) {
2102 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
2103 if (!ShiftMI)
2104 return false;
2105
2106 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2107 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
2108 return false;
2109
2110 return true;
2111 }
2112
2113 // Returns the shift amount from a SHXADD instruction. Returns 0 if the
2114 // instruction is not a SHXADD.
getSHXADDShiftAmount(unsigned Opc)2115 static unsigned getSHXADDShiftAmount(unsigned Opc) {
2116 switch (Opc) {
2117 default:
2118 return 0;
2119 case RISCV::SH1ADD:
2120 return 1;
2121 case RISCV::SH2ADD:
2122 return 2;
2123 case RISCV::SH3ADD:
2124 return 3;
2125 }
2126 }
2127
2128 // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
2129 // (sh3add (sh2add Y, Z), X).
getSHXADDPatterns(const MachineInstr & Root,SmallVectorImpl<unsigned> & Patterns)2130 static bool getSHXADDPatterns(const MachineInstr &Root,
2131 SmallVectorImpl<unsigned> &Patterns) {
2132 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2133 if (!ShiftAmt)
2134 return false;
2135
2136 const MachineBasicBlock &MBB = *Root.getParent();
2137
2138 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
2139 if (!AddMI)
2140 return false;
2141
2142 bool Found = false;
2143 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) {
2144 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
2145 Found = true;
2146 }
2147 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) {
2148 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
2149 Found = true;
2150 }
2151
2152 return Found;
2153 }
2154
getCombinerObjective(unsigned Pattern) const2155 CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
2156 switch (Pattern) {
2157 case RISCVMachineCombinerPattern::FMADD_AX:
2158 case RISCVMachineCombinerPattern::FMADD_XA:
2159 case RISCVMachineCombinerPattern::FMSUB:
2160 case RISCVMachineCombinerPattern::FNMSUB:
2161 return CombinerObjective::MustReduceDepth;
2162 default:
2163 return TargetInstrInfo::getCombinerObjective(Pattern);
2164 }
2165 }
2166
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<unsigned> & Patterns,bool DoRegPressureReduce) const2167 bool RISCVInstrInfo::getMachineCombinerPatterns(
2168 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
2169 bool DoRegPressureReduce) const {
2170
2171 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
2172 return true;
2173
2174 if (getSHXADDPatterns(Root, Patterns))
2175 return true;
2176
2177 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
2178 DoRegPressureReduce);
2179 }
2180
getFPFusedMultiplyOpcode(unsigned RootOpc,unsigned Pattern)2181 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
2182 switch (RootOpc) {
2183 default:
2184 llvm_unreachable("Unexpected opcode");
2185 case RISCV::FADD_H:
2186 return RISCV::FMADD_H;
2187 case RISCV::FADD_S:
2188 return RISCV::FMADD_S;
2189 case RISCV::FADD_D:
2190 return RISCV::FMADD_D;
2191 case RISCV::FSUB_H:
2192 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
2193 : RISCV::FNMSUB_H;
2194 case RISCV::FSUB_S:
2195 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
2196 : RISCV::FNMSUB_S;
2197 case RISCV::FSUB_D:
2198 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
2199 : RISCV::FNMSUB_D;
2200 }
2201 }
2202
getAddendOperandIdx(unsigned Pattern)2203 static unsigned getAddendOperandIdx(unsigned Pattern) {
2204 switch (Pattern) {
2205 default:
2206 llvm_unreachable("Unexpected pattern");
2207 case RISCVMachineCombinerPattern::FMADD_AX:
2208 case RISCVMachineCombinerPattern::FMSUB:
2209 return 2;
2210 case RISCVMachineCombinerPattern::FMADD_XA:
2211 case RISCVMachineCombinerPattern::FNMSUB:
2212 return 1;
2213 }
2214 }
2215
combineFPFusedMultiply(MachineInstr & Root,MachineInstr & Prev,unsigned Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs)2216 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
2217 unsigned Pattern,
2218 SmallVectorImpl<MachineInstr *> &InsInstrs,
2219 SmallVectorImpl<MachineInstr *> &DelInstrs) {
2220 MachineFunction *MF = Root.getMF();
2221 MachineRegisterInfo &MRI = MF->getRegInfo();
2222 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2223
2224 MachineOperand &Mul1 = Prev.getOperand(1);
2225 MachineOperand &Mul2 = Prev.getOperand(2);
2226 MachineOperand &Dst = Root.getOperand(0);
2227 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern));
2228
2229 Register DstReg = Dst.getReg();
2230 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
2231 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
2232 DebugLoc MergedLoc =
2233 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
2234
2235 bool Mul1IsKill = Mul1.isKill();
2236 bool Mul2IsKill = Mul2.isKill();
2237 bool AddendIsKill = Addend.isKill();
2238
2239 // We need to clear kill flags since we may be extending the live range past
2240 // a kill. If the mul had kill flags, we can preserve those since we know
2241 // where the previous range stopped.
2242 MRI.clearKillFlags(Mul1.getReg());
2243 MRI.clearKillFlags(Mul2.getReg());
2244
2245 MachineInstrBuilder MIB =
2246 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
2247 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
2248 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
2249 .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
2250 .setMIFlags(IntersectedFlags);
2251
2252 InsInstrs.push_back(MIB);
2253 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
2254 DelInstrs.push_back(&Prev);
2255 DelInstrs.push_back(&Root);
2256 }
2257
2258 // Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
2259 // (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
2260 // shXadd instructions. The outer shXadd keeps its original opcode.
2261 static void
genShXAddAddShift(MachineInstr & Root,unsigned AddOpIdx,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg)2262 genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
2263 SmallVectorImpl<MachineInstr *> &InsInstrs,
2264 SmallVectorImpl<MachineInstr *> &DelInstrs,
2265 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
2266 MachineFunction *MF = Root.getMF();
2267 MachineRegisterInfo &MRI = MF->getRegInfo();
2268 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2269
2270 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode());
2271 assert(OuterShiftAmt != 0 && "Unexpected opcode");
2272
2273 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
2274 MachineInstr *ShiftMI =
2275 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg());
2276
2277 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm();
2278 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
2279
2280 unsigned InnerOpc;
2281 switch (InnerShiftAmt - OuterShiftAmt) {
2282 default:
2283 llvm_unreachable("Unexpected shift amount");
2284 case 0:
2285 InnerOpc = RISCV::ADD;
2286 break;
2287 case 1:
2288 InnerOpc = RISCV::SH1ADD;
2289 break;
2290 case 2:
2291 InnerOpc = RISCV::SH2ADD;
2292 break;
2293 case 3:
2294 InnerOpc = RISCV::SH3ADD;
2295 break;
2296 }
2297
2298 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx);
2299 const MachineOperand &Y = ShiftMI->getOperand(1);
2300 const MachineOperand &Z = Root.getOperand(1);
2301
2302 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2303
2304 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR)
2305 .addReg(Y.getReg(), getKillRegState(Y.isKill()))
2306 .addReg(Z.getReg(), getKillRegState(Z.isKill()));
2307 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()),
2308 Root.getOperand(0).getReg())
2309 .addReg(NewVR, RegState::Kill)
2310 .addReg(X.getReg(), getKillRegState(X.isKill()));
2311
2312 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
2313 InsInstrs.push_back(MIB1);
2314 InsInstrs.push_back(MIB2);
2315 DelInstrs.push_back(ShiftMI);
2316 DelInstrs.push_back(AddMI);
2317 DelInstrs.push_back(&Root);
2318 }
2319
genAlternativeCodeSequence(MachineInstr & Root,unsigned Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const2320 void RISCVInstrInfo::genAlternativeCodeSequence(
2321 MachineInstr &Root, unsigned Pattern,
2322 SmallVectorImpl<MachineInstr *> &InsInstrs,
2323 SmallVectorImpl<MachineInstr *> &DelInstrs,
2324 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2325 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2326 switch (Pattern) {
2327 default:
2328 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2329 DelInstrs, InstrIdxForVirtReg);
2330 return;
2331 case RISCVMachineCombinerPattern::FMADD_AX:
2332 case RISCVMachineCombinerPattern::FMSUB: {
2333 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg());
2334 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2335 return;
2336 }
2337 case RISCVMachineCombinerPattern::FMADD_XA:
2338 case RISCVMachineCombinerPattern::FNMSUB: {
2339 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg());
2340 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2341 return;
2342 }
2343 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2344 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2345 return;
2346 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2347 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2348 return;
2349 }
2350 }
2351
verifyInstruction(const MachineInstr & MI,StringRef & ErrInfo) const2352 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2353 StringRef &ErrInfo) const {
2354 MCInstrDesc const &Desc = MI.getDesc();
2355
2356 for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
2357 unsigned OpType = Operand.OperandType;
2358 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2359 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2360 const MachineOperand &MO = MI.getOperand(Index);
2361 if (MO.isImm()) {
2362 int64_t Imm = MO.getImm();
2363 bool Ok;
2364 switch (OpType) {
2365 default:
2366 llvm_unreachable("Unexpected operand type");
2367
2368 // clang-format off
2369 #define CASE_OPERAND_UIMM(NUM) \
2370 case RISCVOp::OPERAND_UIMM##NUM: \
2371 Ok = isUInt<NUM>(Imm); \
2372 break;
2373 CASE_OPERAND_UIMM(1)
2374 CASE_OPERAND_UIMM(2)
2375 CASE_OPERAND_UIMM(3)
2376 CASE_OPERAND_UIMM(4)
2377 CASE_OPERAND_UIMM(5)
2378 CASE_OPERAND_UIMM(6)
2379 CASE_OPERAND_UIMM(7)
2380 CASE_OPERAND_UIMM(8)
2381 CASE_OPERAND_UIMM(12)
2382 CASE_OPERAND_UIMM(20)
2383 // clang-format on
2384 case RISCVOp::OPERAND_UIMM2_LSB0:
2385 Ok = isShiftedUInt<1, 1>(Imm);
2386 break;
2387 case RISCVOp::OPERAND_UIMM5_LSB0:
2388 Ok = isShiftedUInt<4, 1>(Imm);
2389 break;
2390 case RISCVOp::OPERAND_UIMM6_LSB0:
2391 Ok = isShiftedUInt<5, 1>(Imm);
2392 break;
2393 case RISCVOp::OPERAND_UIMM7_LSB00:
2394 Ok = isShiftedUInt<5, 2>(Imm);
2395 break;
2396 case RISCVOp::OPERAND_UIMM8_LSB00:
2397 Ok = isShiftedUInt<6, 2>(Imm);
2398 break;
2399 case RISCVOp::OPERAND_UIMM8_LSB000:
2400 Ok = isShiftedUInt<5, 3>(Imm);
2401 break;
2402 case RISCVOp::OPERAND_UIMM8_GE32:
2403 Ok = isUInt<8>(Imm) && Imm >= 32;
2404 break;
2405 case RISCVOp::OPERAND_UIMM9_LSB000:
2406 Ok = isShiftedUInt<6, 3>(Imm);
2407 break;
2408 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
2409 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
2410 break;
2411 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
2412 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
2413 break;
2414 case RISCVOp::OPERAND_ZERO:
2415 Ok = Imm == 0;
2416 break;
2417 case RISCVOp::OPERAND_SIMM5:
2418 Ok = isInt<5>(Imm);
2419 break;
2420 case RISCVOp::OPERAND_SIMM5_PLUS1:
2421 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16;
2422 break;
2423 case RISCVOp::OPERAND_SIMM6:
2424 Ok = isInt<6>(Imm);
2425 break;
2426 case RISCVOp::OPERAND_SIMM6_NONZERO:
2427 Ok = Imm != 0 && isInt<6>(Imm);
2428 break;
2429 case RISCVOp::OPERAND_VTYPEI10:
2430 Ok = isUInt<10>(Imm);
2431 break;
2432 case RISCVOp::OPERAND_VTYPEI11:
2433 Ok = isUInt<11>(Imm);
2434 break;
2435 case RISCVOp::OPERAND_SIMM12:
2436 Ok = isInt<12>(Imm);
2437 break;
2438 case RISCVOp::OPERAND_SIMM12_LSB00000:
2439 Ok = isShiftedInt<7, 5>(Imm);
2440 break;
2441 case RISCVOp::OPERAND_UIMMLOG2XLEN:
2442 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2443 break;
2444 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
2445 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
2446 Ok = Ok && Imm != 0;
2447 break;
2448 case RISCVOp::OPERAND_CLUI_IMM:
2449 Ok = (isUInt<5>(Imm) && Imm != 0) ||
2450 (Imm >= 0xfffe0 && Imm <= 0xfffff);
2451 break;
2452 case RISCVOp::OPERAND_RVKRNUM:
2453 Ok = Imm >= 0 && Imm <= 10;
2454 break;
2455 case RISCVOp::OPERAND_RVKRNUM_0_7:
2456 Ok = Imm >= 0 && Imm <= 7;
2457 break;
2458 case RISCVOp::OPERAND_RVKRNUM_1_10:
2459 Ok = Imm >= 1 && Imm <= 10;
2460 break;
2461 case RISCVOp::OPERAND_RVKRNUM_2_14:
2462 Ok = Imm >= 2 && Imm <= 14;
2463 break;
2464 case RISCVOp::OPERAND_SPIMM:
2465 Ok = (Imm & 0xf) == 0;
2466 break;
2467 }
2468 if (!Ok) {
2469 ErrInfo = "Invalid immediate";
2470 return false;
2471 }
2472 }
2473 }
2474 }
2475
2476 const uint64_t TSFlags = Desc.TSFlags;
2477 if (RISCVII::hasVLOp(TSFlags)) {
2478 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
2479 if (!Op.isImm() && !Op.isReg()) {
2480 ErrInfo = "Invalid operand type for VL operand";
2481 return false;
2482 }
2483 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2484 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2485 auto *RC = MRI.getRegClass(Op.getReg());
2486 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2487 ErrInfo = "Invalid register class for VL operand";
2488 return false;
2489 }
2490 }
2491 if (!RISCVII::hasSEWOp(TSFlags)) {
2492 ErrInfo = "VL operand w/o SEW operand?";
2493 return false;
2494 }
2495 }
2496 if (RISCVII::hasSEWOp(TSFlags)) {
2497 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2498 if (!MI.getOperand(OpIdx).isImm()) {
2499 ErrInfo = "SEW value expected to be an immediate";
2500 return false;
2501 }
2502 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
2503 if (Log2SEW > 31) {
2504 ErrInfo = "Unexpected SEW value";
2505 return false;
2506 }
2507 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2508 if (!RISCVVType::isValidSEW(SEW)) {
2509 ErrInfo = "Unexpected SEW value";
2510 return false;
2511 }
2512 }
2513 if (RISCVII::hasVecPolicyOp(TSFlags)) {
2514 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2515 if (!MI.getOperand(OpIdx).isImm()) {
2516 ErrInfo = "Policy operand expected to be an immediate";
2517 return false;
2518 }
2519 uint64_t Policy = MI.getOperand(OpIdx).getImm();
2520 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
2521 ErrInfo = "Invalid Policy Value";
2522 return false;
2523 }
2524 if (!RISCVII::hasVLOp(TSFlags)) {
2525 ErrInfo = "policy operand w/o VL operand?";
2526 return false;
2527 }
2528
2529 // VecPolicy operands can only exist on instructions with passthru/merge
2530 // arguments. Note that not all arguments with passthru have vec policy
2531 // operands- some instructions have implicit policies.
2532 unsigned UseOpIdx;
2533 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
2534 ErrInfo = "policy operand w/o tied operand?";
2535 return false;
2536 }
2537 }
2538
2539 return true;
2540 }
2541
canFoldIntoAddrMode(const MachineInstr & MemI,Register Reg,const MachineInstr & AddrI,ExtAddrMode & AM) const2542 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
2543 const MachineInstr &AddrI,
2544 ExtAddrMode &AM) const {
2545 switch (MemI.getOpcode()) {
2546 default:
2547 return false;
2548 case RISCV::LB:
2549 case RISCV::LBU:
2550 case RISCV::LH:
2551 case RISCV::LHU:
2552 case RISCV::LW:
2553 case RISCV::LWU:
2554 case RISCV::LD:
2555 case RISCV::FLH:
2556 case RISCV::FLW:
2557 case RISCV::FLD:
2558 case RISCV::SB:
2559 case RISCV::SH:
2560 case RISCV::SW:
2561 case RISCV::SD:
2562 case RISCV::FSH:
2563 case RISCV::FSW:
2564 case RISCV::FSD:
2565 break;
2566 }
2567
2568 if (MemI.getOperand(0).getReg() == Reg)
2569 return false;
2570
2571 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
2572 !AddrI.getOperand(2).isImm())
2573 return false;
2574
2575 int64_t OldOffset = MemI.getOperand(2).getImm();
2576 int64_t Disp = AddrI.getOperand(2).getImm();
2577 int64_t NewOffset = OldOffset + Disp;
2578 if (!STI.is64Bit())
2579 NewOffset = SignExtend64<32>(NewOffset);
2580
2581 if (!isInt<12>(NewOffset))
2582 return false;
2583
2584 AM.BaseReg = AddrI.getOperand(1).getReg();
2585 AM.ScaledReg = 0;
2586 AM.Scale = 0;
2587 AM.Displacement = NewOffset;
2588 AM.Form = ExtAddrMode::Formula::Basic;
2589 return true;
2590 }
2591
emitLdStWithAddr(MachineInstr & MemI,const ExtAddrMode & AM) const2592 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
2593 const ExtAddrMode &AM) const {
2594
2595 const DebugLoc &DL = MemI.getDebugLoc();
2596 MachineBasicBlock &MBB = *MemI.getParent();
2597
2598 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
2599 "Addressing mode not supported for folding");
2600
2601 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2602 .addReg(MemI.getOperand(0).getReg(),
2603 MemI.mayLoad() ? RegState::Define : 0)
2604 .addReg(AM.BaseReg)
2605 .addImm(AM.Displacement)
2606 .setMemRefs(MemI.memoperands())
2607 .setMIFlags(MemI.getFlags());
2608 }
2609
getMemOperandsWithOffsetWidth(const MachineInstr & LdSt,SmallVectorImpl<const MachineOperand * > & BaseOps,int64_t & Offset,bool & OffsetIsScalable,LocationSize & Width,const TargetRegisterInfo * TRI) const2610 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2611 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2612 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2613 const TargetRegisterInfo *TRI) const {
2614 if (!LdSt.mayLoadOrStore())
2615 return false;
2616
2617 // Conservatively, only handle scalar loads/stores for now.
2618 switch (LdSt.getOpcode()) {
2619 case RISCV::LB:
2620 case RISCV::LBU:
2621 case RISCV::SB:
2622 case RISCV::LH:
2623 case RISCV::LHU:
2624 case RISCV::FLH:
2625 case RISCV::SH:
2626 case RISCV::FSH:
2627 case RISCV::LW:
2628 case RISCV::LWU:
2629 case RISCV::FLW:
2630 case RISCV::SW:
2631 case RISCV::FSW:
2632 case RISCV::LD:
2633 case RISCV::FLD:
2634 case RISCV::SD:
2635 case RISCV::FSD:
2636 break;
2637 default:
2638 return false;
2639 }
2640 const MachineOperand *BaseOp;
2641 OffsetIsScalable = false;
2642 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2643 return false;
2644 BaseOps.push_back(BaseOp);
2645 return true;
2646 }
2647
2648 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2649 // helper?
memOpsHaveSameBasePtr(const MachineInstr & MI1,ArrayRef<const MachineOperand * > BaseOps1,const MachineInstr & MI2,ArrayRef<const MachineOperand * > BaseOps2)2650 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2651 ArrayRef<const MachineOperand *> BaseOps1,
2652 const MachineInstr &MI2,
2653 ArrayRef<const MachineOperand *> BaseOps2) {
2654 // Only examine the first "base" operand of each instruction, on the
2655 // assumption that it represents the real base address of the memory access.
2656 // Other operands are typically offsets or indices from this base address.
2657 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
2658 return true;
2659
2660 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2661 return false;
2662
2663 auto MO1 = *MI1.memoperands_begin();
2664 auto MO2 = *MI2.memoperands_begin();
2665 if (MO1->getAddrSpace() != MO2->getAddrSpace())
2666 return false;
2667
2668 auto Base1 = MO1->getValue();
2669 auto Base2 = MO2->getValue();
2670 if (!Base1 || !Base2)
2671 return false;
2672 Base1 = getUnderlyingObject(Base1);
2673 Base2 = getUnderlyingObject(Base2);
2674
2675 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
2676 return false;
2677
2678 return Base1 == Base2;
2679 }
2680
shouldClusterMemOps(ArrayRef<const MachineOperand * > BaseOps1,int64_t Offset1,bool OffsetIsScalable1,ArrayRef<const MachineOperand * > BaseOps2,int64_t Offset2,bool OffsetIsScalable2,unsigned ClusterSize,unsigned NumBytes) const2681 bool RISCVInstrInfo::shouldClusterMemOps(
2682 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2683 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2684 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2685 unsigned NumBytes) const {
2686 // If the mem ops (to be clustered) do not have the same base ptr, then they
2687 // should not be clustered
2688 if (!BaseOps1.empty() && !BaseOps2.empty()) {
2689 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2690 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2691 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
2692 return false;
2693 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2694 // If only one base op is empty, they do not have the same base ptr
2695 return false;
2696 }
2697
2698 unsigned CacheLineSize =
2699 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2700 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2701 CacheLineSize = CacheLineSize ? CacheLineSize : 64;
2702 // Cluster if the memory operations are on the same or a neighbouring cache
2703 // line, but limit the maximum ClusterSize to avoid creating too much
2704 // additional register pressure.
2705 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize;
2706 }
2707
2708 // Set BaseReg (the base register operand), Offset (the byte offset being
2709 // accessed) and the access Width of the passed instruction that reads/writes
2710 // memory. Returns false if the instruction does not read/write memory or the
2711 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2712 // recognise base operands and offsets in all cases.
2713 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2714 // function) and set it as appropriate.
getMemOperandWithOffsetWidth(const MachineInstr & LdSt,const MachineOperand * & BaseReg,int64_t & Offset,LocationSize & Width,const TargetRegisterInfo * TRI) const2715 bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2716 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2717 LocationSize &Width, const TargetRegisterInfo *TRI) const {
2718 if (!LdSt.mayLoadOrStore())
2719 return false;
2720
2721 // Here we assume the standard RISC-V ISA, which uses a base+offset
2722 // addressing mode. You'll need to relax these conditions to support custom
2723 // load/store instructions.
2724 if (LdSt.getNumExplicitOperands() != 3)
2725 return false;
2726 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2727 !LdSt.getOperand(2).isImm())
2728 return false;
2729
2730 if (!LdSt.hasOneMemOperand())
2731 return false;
2732
2733 Width = (*LdSt.memoperands_begin())->getSize();
2734 BaseReg = &LdSt.getOperand(1);
2735 Offset = LdSt.getOperand(2).getImm();
2736 return true;
2737 }
2738
areMemAccessesTriviallyDisjoint(const MachineInstr & MIa,const MachineInstr & MIb) const2739 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2740 const MachineInstr &MIa, const MachineInstr &MIb) const {
2741 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2742 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2743
2744 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
2745 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2746 return false;
2747
2748 // Retrieve the base register, offset from the base register and width. Width
2749 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2750 // base registers are identical, and the offset of a lower memory access +
2751 // the width doesn't overlap the offset of a higher memory access,
2752 // then the memory accesses are different.
2753 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2754 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
2755 int64_t OffsetA = 0, OffsetB = 0;
2756 LocationSize WidthA = 0, WidthB = 0;
2757 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
2758 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
2759 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
2760 int LowOffset = std::min(OffsetA, OffsetB);
2761 int HighOffset = std::max(OffsetA, OffsetB);
2762 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2763 if (LowWidth.hasValue() &&
2764 LowOffset + (int)LowWidth.getValue() <= HighOffset)
2765 return true;
2766 }
2767 }
2768 return false;
2769 }
2770
2771 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const2772 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2773 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2774 return std::make_pair(TF & Mask, TF & ~Mask);
2775 }
2776
2777 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const2778 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2779 using namespace RISCVII;
2780 static const std::pair<unsigned, const char *> TargetFlags[] = {
2781 {MO_CALL, "riscv-call"},
2782 {MO_LO, "riscv-lo"},
2783 {MO_HI, "riscv-hi"},
2784 {MO_PCREL_LO, "riscv-pcrel-lo"},
2785 {MO_PCREL_HI, "riscv-pcrel-hi"},
2786 {MO_GOT_HI, "riscv-got-hi"},
2787 {MO_TPREL_LO, "riscv-tprel-lo"},
2788 {MO_TPREL_HI, "riscv-tprel-hi"},
2789 {MO_TPREL_ADD, "riscv-tprel-add"},
2790 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2791 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2792 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2793 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2794 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2795 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2796 return ArrayRef(TargetFlags);
2797 }
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const2798 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2799 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2800 const Function &F = MF.getFunction();
2801
2802 // Can F be deduplicated by the linker? If it can, don't outline from it.
2803 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2804 return false;
2805
2806 // Don't outline from functions with section markings; the program could
2807 // expect that all the code is in the named section.
2808 if (F.hasSection())
2809 return false;
2810
2811 // It's safe to outline from MF.
2812 return true;
2813 }
2814
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const2815 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
2816 unsigned &Flags) const {
2817 // More accurate safety checking is done in getOutliningCandidateInfo.
2818 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
2819 }
2820
2821 // Enum values indicating how an outlined call should be constructed.
2822 enum MachineOutlinerConstructionID {
2823 MachineOutlinerDefault
2824 };
2825
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const2826 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2827 MachineFunction &MF) const {
2828 return MF.getFunction().hasMinSize();
2829 }
2830
2831 std::optional<outliner::OutlinedFunction>
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const2832 RISCVInstrInfo::getOutliningCandidateInfo(
2833 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2834
2835 // First we need to filter out candidates where the X5 register (IE t0) can't
2836 // be used to setup the function call.
2837 auto CannotInsertCall = [](outliner::Candidate &C) {
2838 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2839 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2840 };
2841
2842 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
2843
2844 // If the sequence doesn't have enough candidates left, then we're done.
2845 if (RepeatedSequenceLocs.size() < 2)
2846 return std::nullopt;
2847
2848 unsigned SequenceSize = 0;
2849
2850 for (auto &MI : RepeatedSequenceLocs[0])
2851 SequenceSize += getInstSizeInBytes(MI);
2852
2853 // call t0, function = 8 bytes.
2854 unsigned CallOverhead = 8;
2855 for (auto &C : RepeatedSequenceLocs)
2856 C.setCallInfo(MachineOutlinerDefault, CallOverhead);
2857
2858 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2859 unsigned FrameOverhead = 4;
2860 if (RepeatedSequenceLocs[0]
2861 .getMF()
2862 ->getSubtarget<RISCVSubtarget>()
2863 .hasStdExtCOrZca())
2864 FrameOverhead = 2;
2865
2866 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
2867 FrameOverhead, MachineOutlinerDefault);
2868 }
2869
2870 outliner::InstrType
getOutliningTypeImpl(MachineBasicBlock::iterator & MBBI,unsigned Flags) const2871 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
2872 unsigned Flags) const {
2873 MachineInstr &MI = *MBBI;
2874 MachineBasicBlock *MBB = MI.getParent();
2875 const TargetRegisterInfo *TRI =
2876 MBB->getParent()->getSubtarget().getRegisterInfo();
2877 const auto &F = MI.getMF()->getFunction();
2878
2879 // We can manually strip out CFI instructions later.
2880 if (MI.isCFIInstruction())
2881 // If current function has exception handling code, we can't outline &
2882 // strip these CFI instructions since it may break .eh_frame section
2883 // needed in unwinding.
2884 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2885 : outliner::InstrType::Invisible;
2886
2887 // We need support for tail calls to outlined functions before return
2888 // statements can be allowed.
2889 if (MI.isReturn())
2890 return outliner::InstrType::Illegal;
2891
2892 // Don't allow modifying the X5 register which we use for return addresses for
2893 // these outlined functions.
2894 if (MI.modifiesRegister(RISCV::X5, TRI) ||
2895 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2896 return outliner::InstrType::Illegal;
2897
2898 // Make sure the operands don't reference something unsafe.
2899 for (const auto &MO : MI.operands()) {
2900
2901 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2902 // if any possible.
2903 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2904 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
2905 F.hasSection() || F.getSectionPrefix()))
2906 return outliner::InstrType::Illegal;
2907 }
2908
2909 return outliner::InstrType::Legal;
2910 }
2911
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const2912 void RISCVInstrInfo::buildOutlinedFrame(
2913 MachineBasicBlock &MBB, MachineFunction &MF,
2914 const outliner::OutlinedFunction &OF) const {
2915
2916 // Strip out any CFI instructions
2917 bool Changed = true;
2918 while (Changed) {
2919 Changed = false;
2920 auto I = MBB.begin();
2921 auto E = MBB.end();
2922 for (; I != E; ++I) {
2923 if (I->isCFIInstruction()) {
2924 I->removeFromParent();
2925 Changed = true;
2926 break;
2927 }
2928 }
2929 }
2930
2931 MBB.addLiveIn(RISCV::X5);
2932
2933 // Add in a return instruction to the end of the outlined frame.
2934 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2935 .addReg(RISCV::X0, RegState::Define)
2936 .addReg(RISCV::X5)
2937 .addImm(0));
2938 }
2939
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,outliner::Candidate & C) const2940 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
2941 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
2942 MachineFunction &MF, outliner::Candidate &C) const {
2943
2944 // Add in a call instruction to the outlined function at the given location.
2945 It = MBB.insert(It,
2946 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2947 .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
2948 RISCVII::MO_CALL));
2949 return It;
2950 }
2951
isAddImmediate(const MachineInstr & MI,Register Reg) const2952 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
2953 Register Reg) const {
2954 // TODO: Handle cases where Reg is a super- or sub-register of the
2955 // destination register.
2956 const MachineOperand &Op0 = MI.getOperand(0);
2957 if (!Op0.isReg() || Reg != Op0.getReg())
2958 return std::nullopt;
2959
2960 // Don't consider ADDIW as a candidate because the caller may not be aware
2961 // of its sign extension behaviour.
2962 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
2963 MI.getOperand(2).isImm())
2964 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()};
2965
2966 return std::nullopt;
2967 }
2968
2969 // MIR printer helper function to annotate Operands with a comment.
createMIROperandComment(const MachineInstr & MI,const MachineOperand & Op,unsigned OpIdx,const TargetRegisterInfo * TRI) const2970 std::string RISCVInstrInfo::createMIROperandComment(
2971 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2972 const TargetRegisterInfo *TRI) const {
2973 // Print a generic comment for this operand if there is one.
2974 std::string GenericComment =
2975 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
2976 if (!GenericComment.empty())
2977 return GenericComment;
2978
2979 // If not, we must have an immediate operand.
2980 if (!Op.isImm())
2981 return std::string();
2982
2983 std::string Comment;
2984 raw_string_ostream OS(Comment);
2985
2986 uint64_t TSFlags = MI.getDesc().TSFlags;
2987
2988 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2989 // operand of vector codegen pseudos.
2990 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
2991 MI.getOpcode() == RISCV::PseudoVSETVLI ||
2992 MI.getOpcode() == RISCV::PseudoVSETIVLI ||
2993 MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2994 OpIdx == 2) {
2995 unsigned Imm = MI.getOperand(OpIdx).getImm();
2996 RISCVVType::printVType(Imm, OS);
2997 } else if (RISCVII::hasSEWOp(TSFlags) &&
2998 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) {
2999 unsigned Log2SEW = MI.getOperand(OpIdx).getImm();
3000 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
3001 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
3002 OS << "e" << SEW;
3003 } else if (RISCVII::hasVecPolicyOp(TSFlags) &&
3004 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
3005 unsigned Policy = MI.getOperand(OpIdx).getImm();
3006 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
3007 "Invalid Policy Value");
3008 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
3009 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
3010 }
3011
3012 OS.flush();
3013 return Comment;
3014 }
3015
3016 // clang-format off
3017 #define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
3018 RISCV::Pseudo##OP##_##LMUL
3019
3020 #define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
3021 RISCV::Pseudo##OP##_##LMUL##_MASK
3022
3023 #define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
3024 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
3025 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
3026
3027 #define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
3028 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
3029 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
3030 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
3031 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
3032 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
3033 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
3034
3035 #define CASE_RVV_OPCODE_UNMASK(OP) \
3036 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3037 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
3038
3039 #define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
3040 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
3041 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
3042 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
3043 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
3044 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
3045 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
3046
3047 #define CASE_RVV_OPCODE_MASK(OP) \
3048 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
3049 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
3050
3051 #define CASE_RVV_OPCODE_WIDEN(OP) \
3052 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
3053 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
3054
3055 #define CASE_RVV_OPCODE(OP) \
3056 CASE_RVV_OPCODE_UNMASK(OP): \
3057 case CASE_RVV_OPCODE_MASK(OP)
3058 // clang-format on
3059
3060 // clang-format off
3061 #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
3062 RISCV::PseudoV##OP##_##TYPE##_##LMUL
3063
3064 #define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
3065 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
3066 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
3067 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
3068 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
3069
3070 #define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
3071 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
3072 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
3073
3074 #define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
3075 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
3076 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
3077
3078 #define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
3079 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
3080 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
3081
3082 // VFMA instructions are SEW specific.
3083 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
3084 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
3085
3086 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
3087 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
3088 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
3089 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
3090 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
3091
3092 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
3093 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
3094 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
3095
3096 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
3097 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
3098 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
3099
3100 #define CASE_VFMA_OPCODE_VV(OP) \
3101 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
3102 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
3103 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
3104
3105 #define CASE_VFMA_SPLATS(OP) \
3106 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
3107 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
3108 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
3109 // clang-format on
3110
findCommutedOpIndices(const MachineInstr & MI,unsigned & SrcOpIdx1,unsigned & SrcOpIdx2) const3111 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
3112 unsigned &SrcOpIdx1,
3113 unsigned &SrcOpIdx2) const {
3114 const MCInstrDesc &Desc = MI.getDesc();
3115 if (!Desc.isCommutable())
3116 return false;
3117
3118 switch (MI.getOpcode()) {
3119 case RISCV::TH_MVEQZ:
3120 case RISCV::TH_MVNEZ:
3121 // We can't commute operands if operand 2 (i.e., rs1 in
3122 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
3123 // not valid as the in/out-operand 1).
3124 if (MI.getOperand(2).getReg() == RISCV::X0)
3125 return false;
3126 // Operands 1 and 2 are commutable, if we switch the opcode.
3127 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
3128 case RISCV::TH_MULA:
3129 case RISCV::TH_MULAW:
3130 case RISCV::TH_MULAH:
3131 case RISCV::TH_MULS:
3132 case RISCV::TH_MULSW:
3133 case RISCV::TH_MULSH:
3134 // Operands 2 and 3 are commutable.
3135 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
3136 case RISCV::PseudoCCMOVGPRNoX0:
3137 case RISCV::PseudoCCMOVGPR:
3138 // Operands 4 and 5 are commutable.
3139 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
3140 case CASE_RVV_OPCODE(VADD_VV):
3141 case CASE_RVV_OPCODE(VAND_VV):
3142 case CASE_RVV_OPCODE(VOR_VV):
3143 case CASE_RVV_OPCODE(VXOR_VV):
3144 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
3145 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
3146 case CASE_RVV_OPCODE(VMIN_VV):
3147 case CASE_RVV_OPCODE(VMINU_VV):
3148 case CASE_RVV_OPCODE(VMAX_VV):
3149 case CASE_RVV_OPCODE(VMAXU_VV):
3150 case CASE_RVV_OPCODE(VMUL_VV):
3151 case CASE_RVV_OPCODE(VMULH_VV):
3152 case CASE_RVV_OPCODE(VMULHU_VV):
3153 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
3154 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
3155 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
3156 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
3157 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
3158 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
3159 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
3160 case CASE_RVV_OPCODE(VSADD_VV):
3161 case CASE_RVV_OPCODE(VSADDU_VV):
3162 case CASE_RVV_OPCODE(VAADD_VV):
3163 case CASE_RVV_OPCODE(VAADDU_VV):
3164 case CASE_RVV_OPCODE(VSMUL_VV):
3165 // Operands 2 and 3 are commutable.
3166 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
3167 case CASE_VFMA_SPLATS(FMADD):
3168 case CASE_VFMA_SPLATS(FMSUB):
3169 case CASE_VFMA_SPLATS(FMACC):
3170 case CASE_VFMA_SPLATS(FMSAC):
3171 case CASE_VFMA_SPLATS(FNMADD):
3172 case CASE_VFMA_SPLATS(FNMSUB):
3173 case CASE_VFMA_SPLATS(FNMACC):
3174 case CASE_VFMA_SPLATS(FNMSAC):
3175 case CASE_VFMA_OPCODE_VV(FMACC):
3176 case CASE_VFMA_OPCODE_VV(FMSAC):
3177 case CASE_VFMA_OPCODE_VV(FNMACC):
3178 case CASE_VFMA_OPCODE_VV(FNMSAC):
3179 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3180 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3181 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3182 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3183 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3184 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3185 // If the tail policy is undisturbed we can't commute.
3186 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
3187 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3188 return false;
3189
3190 // For these instructions we can only swap operand 1 and operand 3 by
3191 // changing the opcode.
3192 unsigned CommutableOpIdx1 = 1;
3193 unsigned CommutableOpIdx2 = 3;
3194 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3195 CommutableOpIdx2))
3196 return false;
3197 return true;
3198 }
3199 case CASE_VFMA_OPCODE_VV(FMADD):
3200 case CASE_VFMA_OPCODE_VV(FMSUB):
3201 case CASE_VFMA_OPCODE_VV(FNMADD):
3202 case CASE_VFMA_OPCODE_VV(FNMSUB):
3203 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3204 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3205 // If the tail policy is undisturbed we can't commute.
3206 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
3207 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
3208 return false;
3209
3210 // For these instructions we have more freedom. We can commute with the
3211 // other multiplicand or with the addend/subtrahend/minuend.
3212
3213 // Any fixed operand must be from source 1, 2 or 3.
3214 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
3215 return false;
3216 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
3217 return false;
3218
3219 // It both ops are fixed one must be the tied source.
3220 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
3221 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
3222 return false;
3223
3224 // Look for two different register operands assumed to be commutable
3225 // regardless of the FMA opcode. The FMA opcode is adjusted later if
3226 // needed.
3227 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
3228 SrcOpIdx2 == CommuteAnyOperandIndex) {
3229 // At least one of operands to be commuted is not specified and
3230 // this method is free to choose appropriate commutable operands.
3231 unsigned CommutableOpIdx1 = SrcOpIdx1;
3232 if (SrcOpIdx1 == SrcOpIdx2) {
3233 // Both of operands are not fixed. Set one of commutable
3234 // operands to the tied source.
3235 CommutableOpIdx1 = 1;
3236 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
3237 // Only one of the operands is not fixed.
3238 CommutableOpIdx1 = SrcOpIdx2;
3239 }
3240
3241 // CommutableOpIdx1 is well defined now. Let's choose another commutable
3242 // operand and assign its index to CommutableOpIdx2.
3243 unsigned CommutableOpIdx2;
3244 if (CommutableOpIdx1 != 1) {
3245 // If we haven't already used the tied source, we must use it now.
3246 CommutableOpIdx2 = 1;
3247 } else {
3248 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg();
3249
3250 // The commuted operands should have different registers.
3251 // Otherwise, the commute transformation does not change anything and
3252 // is useless. We use this as a hint to make our decision.
3253 if (Op1Reg != MI.getOperand(2).getReg())
3254 CommutableOpIdx2 = 2;
3255 else
3256 CommutableOpIdx2 = 3;
3257 }
3258
3259 // Assign the found pair of commutable indices to SrcOpIdx1 and
3260 // SrcOpIdx2 to return those values.
3261 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
3262 CommutableOpIdx2))
3263 return false;
3264 }
3265
3266 return true;
3267 }
3268 }
3269
3270 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
3271 }
3272
3273 // clang-format off
3274 #define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
3275 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
3276 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
3277 break;
3278
3279 #define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
3280 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
3281 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
3282 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
3283 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
3284
3285 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
3286 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
3287 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
3288
3289 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
3290 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
3291 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
3292
3293 #define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
3294 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
3295 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
3296
3297 #define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3298 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
3299 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
3300 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
3301
3302 // VFMA depends on SEW.
3303 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
3304 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
3305 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
3306 break;
3307
3308 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
3309 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
3310 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
3311 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
3312 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
3313
3314 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
3315 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
3316 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
3317
3318 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
3319 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
3320 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
3321 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
3322
3323 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
3324 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
3325 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
3326
3327 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
3328 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
3329 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
3330
3331 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3332 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
3333 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
3334 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
3335
commuteInstructionImpl(MachineInstr & MI,bool NewMI,unsigned OpIdx1,unsigned OpIdx2) const3336 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
3337 bool NewMI,
3338 unsigned OpIdx1,
3339 unsigned OpIdx2) const {
3340 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
3341 if (NewMI)
3342 return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
3343 return MI;
3344 };
3345
3346 switch (MI.getOpcode()) {
3347 case RISCV::TH_MVEQZ:
3348 case RISCV::TH_MVNEZ: {
3349 auto &WorkingMI = cloneIfNew(MI);
3350 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
3351 : RISCV::TH_MVEQZ));
3352 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
3353 OpIdx2);
3354 }
3355 case RISCV::PseudoCCMOVGPRNoX0:
3356 case RISCV::PseudoCCMOVGPR: {
3357 // CCMOV can be commuted by inverting the condition.
3358 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
3359 CC = RISCVCC::getOppositeBranchCondition(CC);
3360 auto &WorkingMI = cloneIfNew(MI);
3361 WorkingMI.getOperand(3).setImm(CC);
3362 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
3363 OpIdx1, OpIdx2);
3364 }
3365 case CASE_VFMA_SPLATS(FMACC):
3366 case CASE_VFMA_SPLATS(FMADD):
3367 case CASE_VFMA_SPLATS(FMSAC):
3368 case CASE_VFMA_SPLATS(FMSUB):
3369 case CASE_VFMA_SPLATS(FNMACC):
3370 case CASE_VFMA_SPLATS(FNMADD):
3371 case CASE_VFMA_SPLATS(FNMSAC):
3372 case CASE_VFMA_SPLATS(FNMSUB):
3373 case CASE_VFMA_OPCODE_VV(FMACC):
3374 case CASE_VFMA_OPCODE_VV(FMSAC):
3375 case CASE_VFMA_OPCODE_VV(FNMACC):
3376 case CASE_VFMA_OPCODE_VV(FNMSAC):
3377 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3378 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3379 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3380 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3381 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3382 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3383 // It only make sense to toggle these between clobbering the
3384 // addend/subtrahend/minuend one of the multiplicands.
3385 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3386 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
3387 unsigned Opc;
3388 switch (MI.getOpcode()) {
3389 default:
3390 llvm_unreachable("Unexpected opcode");
3391 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
3392 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
3393 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
3394 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
3395 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
3396 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
3397 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
3398 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
3399 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
3400 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
3401 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
3402 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
3403 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
3404 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
3405 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
3406 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
3407 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
3408 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
3409 }
3410
3411 auto &WorkingMI = cloneIfNew(MI);
3412 WorkingMI.setDesc(get(Opc));
3413 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3414 OpIdx1, OpIdx2);
3415 }
3416 case CASE_VFMA_OPCODE_VV(FMADD):
3417 case CASE_VFMA_OPCODE_VV(FMSUB):
3418 case CASE_VFMA_OPCODE_VV(FNMADD):
3419 case CASE_VFMA_OPCODE_VV(FNMSUB):
3420 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3421 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3422 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3423 // If one of the operands, is the addend we need to change opcode.
3424 // Otherwise we're just swapping 2 of the multiplicands.
3425 if (OpIdx1 == 3 || OpIdx2 == 3) {
3426 unsigned Opc;
3427 switch (MI.getOpcode()) {
3428 default:
3429 llvm_unreachable("Unexpected opcode");
3430 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
3431 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
3432 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
3433 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
3434 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
3435 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
3436 }
3437
3438 auto &WorkingMI = cloneIfNew(MI);
3439 WorkingMI.setDesc(get(Opc));
3440 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3441 OpIdx1, OpIdx2);
3442 }
3443 // Let the default code handle it.
3444 break;
3445 }
3446 }
3447
3448 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
3449 }
3450
3451 #undef CASE_RVV_OPCODE_UNMASK_LMUL
3452 #undef CASE_RVV_OPCODE_MASK_LMUL
3453 #undef CASE_RVV_OPCODE_LMUL
3454 #undef CASE_RVV_OPCODE_UNMASK_WIDEN
3455 #undef CASE_RVV_OPCODE_UNMASK
3456 #undef CASE_RVV_OPCODE_MASK_WIDEN
3457 #undef CASE_RVV_OPCODE_MASK
3458 #undef CASE_RVV_OPCODE_WIDEN
3459 #undef CASE_RVV_OPCODE
3460
3461 #undef CASE_VMA_OPCODE_COMMON
3462 #undef CASE_VMA_OPCODE_LMULS_M1
3463 #undef CASE_VMA_OPCODE_LMULS_MF2
3464 #undef CASE_VMA_OPCODE_LMULS_MF4
3465 #undef CASE_VMA_OPCODE_LMULS
3466 #undef CASE_VFMA_OPCODE_COMMON
3467 #undef CASE_VFMA_OPCODE_LMULS_M1
3468 #undef CASE_VFMA_OPCODE_LMULS_MF2
3469 #undef CASE_VFMA_OPCODE_LMULS_MF4
3470 #undef CASE_VFMA_OPCODE_VV
3471 #undef CASE_VFMA_SPLATS
3472
3473 // clang-format off
3474 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3475 RISCV::PseudoV##OP##_##LMUL##_TIED
3476
3477 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3478 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3479 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3480 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3481 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3482 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3483
3484 #define CASE_WIDEOP_OPCODE_LMULS(OP) \
3485 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3486 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3487
3488 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3489 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3490 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3491 break;
3492
3493 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3494 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3495 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3496 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3497 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3498 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3499
3500 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3501 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3502 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3503
3504 // FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3505 #define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3506 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3507
3508 #define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3509 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3510 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3511 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3512 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3513 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3514 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3515 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3516 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3517 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3518
3519 #define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3520 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3521 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3522 break;
3523
3524 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3525 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3526 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3527 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3528 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3529 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3530 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3531 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3532 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3533 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3534
3535 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3536 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3537 // clang-format on
3538
convertToThreeAddress(MachineInstr & MI,LiveVariables * LV,LiveIntervals * LIS) const3539 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
3540 LiveVariables *LV,
3541 LiveIntervals *LIS) const {
3542 MachineInstrBuilder MIB;
3543 switch (MI.getOpcode()) {
3544 default:
3545 return nullptr;
3546 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
3547 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
3548 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3549 MI.getNumExplicitOperands() == 7 &&
3550 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3551 // If the tail policy is undisturbed we can't convert.
3552 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
3553 1) == 0)
3554 return nullptr;
3555 // clang-format off
3556 unsigned NewOpc;
3557 switch (MI.getOpcode()) {
3558 default:
3559 llvm_unreachable("Unexpected opcode");
3560 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
3561 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
3562 }
3563 // clang-format on
3564
3565 MachineBasicBlock &MBB = *MI.getParent();
3566 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3567 .add(MI.getOperand(0))
3568 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3569 .add(MI.getOperand(1))
3570 .add(MI.getOperand(2))
3571 .add(MI.getOperand(3))
3572 .add(MI.getOperand(4))
3573 .add(MI.getOperand(5))
3574 .add(MI.getOperand(6));
3575 break;
3576 }
3577 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
3578 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
3579 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
3580 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
3581 // If the tail policy is undisturbed we can't convert.
3582 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3583 MI.getNumExplicitOperands() == 6);
3584 if ((MI.getOperand(5).getImm() & 1) == 0)
3585 return nullptr;
3586
3587 // clang-format off
3588 unsigned NewOpc;
3589 switch (MI.getOpcode()) {
3590 default:
3591 llvm_unreachable("Unexpected opcode");
3592 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
3593 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
3594 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
3595 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
3596 }
3597 // clang-format on
3598
3599 MachineBasicBlock &MBB = *MI.getParent();
3600 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3601 .add(MI.getOperand(0))
3602 .addReg(MI.getOperand(0).getReg(), RegState::Undef)
3603 .add(MI.getOperand(1))
3604 .add(MI.getOperand(2))
3605 .add(MI.getOperand(3))
3606 .add(MI.getOperand(4))
3607 .add(MI.getOperand(5));
3608 break;
3609 }
3610 }
3611 MIB.copyImplicitOps(MI);
3612
3613 if (LV) {
3614 unsigned NumOps = MI.getNumOperands();
3615 for (unsigned I = 1; I < NumOps; ++I) {
3616 MachineOperand &Op = MI.getOperand(I);
3617 if (Op.isReg() && Op.isKill())
3618 LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
3619 }
3620 }
3621
3622 if (LIS) {
3623 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
3624
3625 if (MI.getOperand(0).isEarlyClobber()) {
3626 // Use operand 1 was tied to early-clobber def operand 0, so its live
3627 // interval could have ended at an early-clobber slot. Now they are not
3628 // tied we need to update it to the normal register slot.
3629 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
3630 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
3631 if (S->end == Idx.getRegSlot(true))
3632 S->end = Idx.getRegSlot();
3633 }
3634 }
3635
3636 return MIB;
3637 }
3638
3639 #undef CASE_WIDEOP_OPCODE_COMMON
3640 #undef CASE_WIDEOP_OPCODE_LMULS_MF4
3641 #undef CASE_WIDEOP_OPCODE_LMULS
3642 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3643 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3644 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3645 #undef CASE_FP_WIDEOP_OPCODE_COMMON
3646 #undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3647 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3648 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3649 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3650
mulImm(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator II,const DebugLoc & DL,Register DestReg,uint32_t Amount,MachineInstr::MIFlag Flag) const3651 void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
3652 MachineBasicBlock::iterator II, const DebugLoc &DL,
3653 Register DestReg, uint32_t Amount,
3654 MachineInstr::MIFlag Flag) const {
3655 MachineRegisterInfo &MRI = MF.getRegInfo();
3656 if (llvm::has_single_bit<uint32_t>(Amount)) {
3657 uint32_t ShiftAmount = Log2_32(Amount);
3658 if (ShiftAmount == 0)
3659 return;
3660 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3661 .addReg(DestReg, RegState::Kill)
3662 .addImm(ShiftAmount)
3663 .setMIFlag(Flag);
3664 } else if (STI.hasStdExtZba() &&
3665 ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) ||
3666 (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) ||
3667 (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) {
3668 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3669 unsigned Opc;
3670 uint32_t ShiftAmount;
3671 if (Amount % 9 == 0) {
3672 Opc = RISCV::SH3ADD;
3673 ShiftAmount = Log2_64(Amount / 9);
3674 } else if (Amount % 5 == 0) {
3675 Opc = RISCV::SH2ADD;
3676 ShiftAmount = Log2_64(Amount / 5);
3677 } else if (Amount % 3 == 0) {
3678 Opc = RISCV::SH1ADD;
3679 ShiftAmount = Log2_64(Amount / 3);
3680 } else {
3681 llvm_unreachable("implied by if-clause");
3682 }
3683 if (ShiftAmount)
3684 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3685 .addReg(DestReg, RegState::Kill)
3686 .addImm(ShiftAmount)
3687 .setMIFlag(Flag);
3688 BuildMI(MBB, II, DL, get(Opc), DestReg)
3689 .addReg(DestReg, RegState::Kill)
3690 .addReg(DestReg)
3691 .setMIFlag(Flag);
3692 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) {
3693 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3694 uint32_t ShiftAmount = Log2_32(Amount - 1);
3695 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3696 .addReg(DestReg)
3697 .addImm(ShiftAmount)
3698 .setMIFlag(Flag);
3699 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3700 .addReg(ScaledRegister, RegState::Kill)
3701 .addReg(DestReg, RegState::Kill)
3702 .setMIFlag(Flag);
3703 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) {
3704 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3705 uint32_t ShiftAmount = Log2_32(Amount + 1);
3706 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3707 .addReg(DestReg)
3708 .addImm(ShiftAmount)
3709 .setMIFlag(Flag);
3710 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3711 .addReg(ScaledRegister, RegState::Kill)
3712 .addReg(DestReg, RegState::Kill)
3713 .setMIFlag(Flag);
3714 } else if (STI.hasStdExtZmmul()) {
3715 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3716 movImm(MBB, II, DL, N, Amount, Flag);
3717 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3718 .addReg(DestReg, RegState::Kill)
3719 .addReg(N, RegState::Kill)
3720 .setMIFlag(Flag);
3721 } else {
3722 Register Acc;
3723 uint32_t PrevShiftAmount = 0;
3724 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
3725 if (Amount & (1U << ShiftAmount)) {
3726 if (ShiftAmount)
3727 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3728 .addReg(DestReg, RegState::Kill)
3729 .addImm(ShiftAmount - PrevShiftAmount)
3730 .setMIFlag(Flag);
3731 if (Amount >> (ShiftAmount + 1)) {
3732 // If we don't have an accmulator yet, create it and copy DestReg.
3733 if (!Acc) {
3734 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3735 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3736 .addReg(DestReg)
3737 .setMIFlag(Flag);
3738 } else {
3739 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3740 .addReg(Acc, RegState::Kill)
3741 .addReg(DestReg)
3742 .setMIFlag(Flag);
3743 }
3744 }
3745 PrevShiftAmount = ShiftAmount;
3746 }
3747 }
3748 assert(Acc && "Expected valid accumulator");
3749 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3750 .addReg(DestReg, RegState::Kill)
3751 .addReg(Acc, RegState::Kill)
3752 .setMIFlag(Flag);
3753 }
3754 }
3755
3756 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const3757 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3758 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3759 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3760 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3761 return ArrayRef(TargetFlags);
3762 }
3763
3764 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
isSEXT_W(const MachineInstr & MI)3765 bool RISCV::isSEXT_W(const MachineInstr &MI) {
3766 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
3767 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
3768 }
3769
3770 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
isZEXT_W(const MachineInstr & MI)3771 bool RISCV::isZEXT_W(const MachineInstr &MI) {
3772 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
3773 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
3774 }
3775
3776 // Returns true if this is the zext.b pattern, andi rd, rs1, 255.
isZEXT_B(const MachineInstr & MI)3777 bool RISCV::isZEXT_B(const MachineInstr &MI) {
3778 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
3779 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
3780 }
3781
isRVVWholeLoadStore(unsigned Opcode)3782 static bool isRVVWholeLoadStore(unsigned Opcode) {
3783 switch (Opcode) {
3784 default:
3785 return false;
3786 case RISCV::VS1R_V:
3787 case RISCV::VS2R_V:
3788 case RISCV::VS4R_V:
3789 case RISCV::VS8R_V:
3790 case RISCV::VL1RE8_V:
3791 case RISCV::VL2RE8_V:
3792 case RISCV::VL4RE8_V:
3793 case RISCV::VL8RE8_V:
3794 case RISCV::VL1RE16_V:
3795 case RISCV::VL2RE16_V:
3796 case RISCV::VL4RE16_V:
3797 case RISCV::VL8RE16_V:
3798 case RISCV::VL1RE32_V:
3799 case RISCV::VL2RE32_V:
3800 case RISCV::VL4RE32_V:
3801 case RISCV::VL8RE32_V:
3802 case RISCV::VL1RE64_V:
3803 case RISCV::VL2RE64_V:
3804 case RISCV::VL4RE64_V:
3805 case RISCV::VL8RE64_V:
3806 return true;
3807 }
3808 }
3809
isRVVSpill(const MachineInstr & MI)3810 bool RISCV::isRVVSpill(const MachineInstr &MI) {
3811 // RVV lacks any support for immediate addressing for stack addresses, so be
3812 // conservative.
3813 unsigned Opcode = MI.getOpcode();
3814 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
3815 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
3816 return false;
3817 return true;
3818 }
3819
3820 std::optional<std::pair<unsigned, unsigned>>
isRVVSpillForZvlsseg(unsigned Opcode)3821 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
3822 switch (Opcode) {
3823 default:
3824 return std::nullopt;
3825 case RISCV::PseudoVSPILL2_M1:
3826 case RISCV::PseudoVRELOAD2_M1:
3827 return std::make_pair(2u, 1u);
3828 case RISCV::PseudoVSPILL2_M2:
3829 case RISCV::PseudoVRELOAD2_M2:
3830 return std::make_pair(2u, 2u);
3831 case RISCV::PseudoVSPILL2_M4:
3832 case RISCV::PseudoVRELOAD2_M4:
3833 return std::make_pair(2u, 4u);
3834 case RISCV::PseudoVSPILL3_M1:
3835 case RISCV::PseudoVRELOAD3_M1:
3836 return std::make_pair(3u, 1u);
3837 case RISCV::PseudoVSPILL3_M2:
3838 case RISCV::PseudoVRELOAD3_M2:
3839 return std::make_pair(3u, 2u);
3840 case RISCV::PseudoVSPILL4_M1:
3841 case RISCV::PseudoVRELOAD4_M1:
3842 return std::make_pair(4u, 1u);
3843 case RISCV::PseudoVSPILL4_M2:
3844 case RISCV::PseudoVRELOAD4_M2:
3845 return std::make_pair(4u, 2u);
3846 case RISCV::PseudoVSPILL5_M1:
3847 case RISCV::PseudoVRELOAD5_M1:
3848 return std::make_pair(5u, 1u);
3849 case RISCV::PseudoVSPILL6_M1:
3850 case RISCV::PseudoVRELOAD6_M1:
3851 return std::make_pair(6u, 1u);
3852 case RISCV::PseudoVSPILL7_M1:
3853 case RISCV::PseudoVRELOAD7_M1:
3854 return std::make_pair(7u, 1u);
3855 case RISCV::PseudoVSPILL8_M1:
3856 case RISCV::PseudoVRELOAD8_M1:
3857 return std::make_pair(8u, 1u);
3858 }
3859 }
3860
isFaultFirstLoad(const MachineInstr & MI)3861 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
3862 return MI.getNumExplicitDefs() == 2 &&
3863 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm();
3864 }
3865
hasEqualFRM(const MachineInstr & MI1,const MachineInstr & MI2)3866 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
3867 int16_t MI1FrmOpIdx =
3868 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
3869 int16_t MI2FrmOpIdx =
3870 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
3871 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
3872 return false;
3873 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx);
3874 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx);
3875 return FrmOp1.getImm() == FrmOp2.getImm();
3876 }
3877
3878 std::optional<unsigned>
getVectorLowDemandedScalarBits(uint16_t Opcode,unsigned Log2SEW)3879 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
3880 // TODO: Handle Zvbb instructions
3881 switch (Opcode) {
3882 default:
3883 return std::nullopt;
3884
3885 // 11.6. Vector Single-Width Shift Instructions
3886 case RISCV::VSLL_VX:
3887 case RISCV::VSRL_VX:
3888 case RISCV::VSRA_VX:
3889 // 12.4. Vector Single-Width Scaling Shift Instructions
3890 case RISCV::VSSRL_VX:
3891 case RISCV::VSSRA_VX:
3892 // Only the low lg2(SEW) bits of the shift-amount value are used.
3893 return Log2SEW;
3894
3895 // 11.7 Vector Narrowing Integer Right Shift Instructions
3896 case RISCV::VNSRL_WX:
3897 case RISCV::VNSRA_WX:
3898 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
3899 case RISCV::VNCLIPU_WX:
3900 case RISCV::VNCLIP_WX:
3901 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
3902 return Log2SEW + 1;
3903
3904 // 11.1. Vector Single-Width Integer Add and Subtract
3905 case RISCV::VADD_VX:
3906 case RISCV::VSUB_VX:
3907 case RISCV::VRSUB_VX:
3908 // 11.2. Vector Widening Integer Add/Subtract
3909 case RISCV::VWADDU_VX:
3910 case RISCV::VWSUBU_VX:
3911 case RISCV::VWADD_VX:
3912 case RISCV::VWSUB_VX:
3913 case RISCV::VWADDU_WX:
3914 case RISCV::VWSUBU_WX:
3915 case RISCV::VWADD_WX:
3916 case RISCV::VWSUB_WX:
3917 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
3918 case RISCV::VADC_VXM:
3919 case RISCV::VADC_VIM:
3920 case RISCV::VMADC_VXM:
3921 case RISCV::VMADC_VIM:
3922 case RISCV::VMADC_VX:
3923 case RISCV::VSBC_VXM:
3924 case RISCV::VMSBC_VXM:
3925 case RISCV::VMSBC_VX:
3926 // 11.5 Vector Bitwise Logical Instructions
3927 case RISCV::VAND_VX:
3928 case RISCV::VOR_VX:
3929 case RISCV::VXOR_VX:
3930 // 11.8. Vector Integer Compare Instructions
3931 case RISCV::VMSEQ_VX:
3932 case RISCV::VMSNE_VX:
3933 case RISCV::VMSLTU_VX:
3934 case RISCV::VMSLT_VX:
3935 case RISCV::VMSLEU_VX:
3936 case RISCV::VMSLE_VX:
3937 case RISCV::VMSGTU_VX:
3938 case RISCV::VMSGT_VX:
3939 // 11.9. Vector Integer Min/Max Instructions
3940 case RISCV::VMINU_VX:
3941 case RISCV::VMIN_VX:
3942 case RISCV::VMAXU_VX:
3943 case RISCV::VMAX_VX:
3944 // 11.10. Vector Single-Width Integer Multiply Instructions
3945 case RISCV::VMUL_VX:
3946 case RISCV::VMULH_VX:
3947 case RISCV::VMULHU_VX:
3948 case RISCV::VMULHSU_VX:
3949 // 11.11. Vector Integer Divide Instructions
3950 case RISCV::VDIVU_VX:
3951 case RISCV::VDIV_VX:
3952 case RISCV::VREMU_VX:
3953 case RISCV::VREM_VX:
3954 // 11.12. Vector Widening Integer Multiply Instructions
3955 case RISCV::VWMUL_VX:
3956 case RISCV::VWMULU_VX:
3957 case RISCV::VWMULSU_VX:
3958 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
3959 case RISCV::VMACC_VX:
3960 case RISCV::VNMSAC_VX:
3961 case RISCV::VMADD_VX:
3962 case RISCV::VNMSUB_VX:
3963 // 11.14. Vector Widening Integer Multiply-Add Instructions
3964 case RISCV::VWMACCU_VX:
3965 case RISCV::VWMACC_VX:
3966 case RISCV::VWMACCSU_VX:
3967 case RISCV::VWMACCUS_VX:
3968 // 11.15. Vector Integer Merge Instructions
3969 case RISCV::VMERGE_VXM:
3970 // 11.16. Vector Integer Move Instructions
3971 case RISCV::VMV_V_X:
3972 // 12.1. Vector Single-Width Saturating Add and Subtract
3973 case RISCV::VSADDU_VX:
3974 case RISCV::VSADD_VX:
3975 case RISCV::VSSUBU_VX:
3976 case RISCV::VSSUB_VX:
3977 // 12.2. Vector Single-Width Averaging Add and Subtract
3978 case RISCV::VAADDU_VX:
3979 case RISCV::VAADD_VX:
3980 case RISCV::VASUBU_VX:
3981 case RISCV::VASUB_VX:
3982 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
3983 case RISCV::VSMUL_VX:
3984 // 16.1. Integer Scalar Move Instructions
3985 case RISCV::VMV_S_X:
3986 return 1U << Log2SEW;
3987 }
3988 }
3989
getRVVMCOpcode(unsigned RVVPseudoOpcode)3990 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
3991 const RISCVVPseudosTable::PseudoInfo *RVV =
3992 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
3993 if (!RVV)
3994 return 0;
3995 return RVV->BaseInstr;
3996 }
3997