1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVSubtarget.h" 18 #include "RISCVTargetMachine.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineTraceMetrics.h" 30 #include "llvm/CodeGen/RegisterScavenging.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/MC/MCInstBuilder.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/ErrorHandling.h" 37 38 using namespace llvm; 39 40 #define GEN_CHECK_COMPRESS_INSTR 41 #include "RISCVGenCompressInstEmitter.inc" 42 43 #define GET_INSTRINFO_CTOR_DTOR 44 #define GET_INSTRINFO_NAMED_OPS 45 #include "RISCVGenInstrInfo.inc" 46 47 static cl::opt<bool> PreferWholeRegisterMove( 48 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 49 cl::desc("Prefer whole register move for vector registers.")); 50 51 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 52 "riscv-force-machine-combiner-strategy", cl::Hidden, 53 cl::desc("Force machine combiner to use a specific strategy for machine " 54 "trace metrics evaluation."), 55 cl::init(MachineTraceStrategy::TS_NumStrategies), 56 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 57 "Local strategy."), 58 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 59 "MinInstrCount strategy."))); 60 61 namespace llvm::RISCVVPseudosTable { 62 63 using namespace RISCV; 64 65 #define GET_RISCVVPseudosTable_IMPL 66 #include "RISCVGenSearchableTables.inc" 67 68 } // namespace llvm::RISCVVPseudosTable 69 70 namespace llvm::RISCV { 71 72 #define GET_RISCVMaskedPseudosTable_IMPL 73 #include "RISCVGenSearchableTables.inc" 74 75 } // end namespace llvm::RISCV 76 77 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 78 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 79 STI(STI) {} 80 81 MCInst RISCVInstrInfo::getNop() const { 82 if (STI.hasStdExtCOrZca()) 83 return MCInstBuilder(RISCV::C_NOP); 84 return MCInstBuilder(RISCV::ADDI) 85 .addReg(RISCV::X0) 86 .addReg(RISCV::X0) 87 .addImm(0); 88 } 89 90 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 91 int &FrameIndex) const { 92 unsigned Dummy; 93 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 94 } 95 96 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 97 int &FrameIndex, 98 unsigned &MemBytes) const { 99 switch (MI.getOpcode()) { 100 default: 101 return 0; 102 case RISCV::LB: 103 case RISCV::LBU: 104 MemBytes = 1; 105 break; 106 case RISCV::LH: 107 case RISCV::LHU: 108 case RISCV::FLH: 109 MemBytes = 2; 110 break; 111 case RISCV::LW: 112 case RISCV::FLW: 113 case RISCV::LWU: 114 MemBytes = 4; 115 break; 116 case RISCV::LD: 117 case RISCV::FLD: 118 MemBytes = 8; 119 break; 120 } 121 122 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 123 MI.getOperand(2).getImm() == 0) { 124 FrameIndex = MI.getOperand(1).getIndex(); 125 return MI.getOperand(0).getReg(); 126 } 127 128 return 0; 129 } 130 131 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 132 int &FrameIndex) const { 133 unsigned Dummy; 134 return isStoreToStackSlot(MI, FrameIndex, Dummy); 135 } 136 137 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 138 int &FrameIndex, 139 unsigned &MemBytes) const { 140 switch (MI.getOpcode()) { 141 default: 142 return 0; 143 case RISCV::SB: 144 MemBytes = 1; 145 break; 146 case RISCV::SH: 147 case RISCV::FSH: 148 MemBytes = 2; 149 break; 150 case RISCV::SW: 151 case RISCV::FSW: 152 MemBytes = 4; 153 break; 154 case RISCV::SD: 155 case RISCV::FSD: 156 MemBytes = 8; 157 break; 158 } 159 160 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 161 MI.getOperand(2).getImm() == 0) { 162 FrameIndex = MI.getOperand(1).getIndex(); 163 return MI.getOperand(0).getReg(); 164 } 165 166 return 0; 167 } 168 169 bool RISCVInstrInfo::isReallyTriviallyReMaterializable( 170 const MachineInstr &MI) const { 171 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V && 172 MI.getOperand(1).isUndef() && 173 /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and 174 vtype. Make sure we only rematerialize before RISCVInsertVSETVLI 175 i.e. -riscv-vsetvl-after-rvv-regalloc=true */ 176 !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE)) 177 return true; 178 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); 179 } 180 181 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 182 unsigned NumRegs) { 183 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 184 } 185 186 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 187 const MachineBasicBlock &MBB, 188 MachineBasicBlock::const_iterator MBBI, 189 MachineBasicBlock::const_iterator &DefMBBI, 190 RISCVII::VLMUL LMul) { 191 if (PreferWholeRegisterMove) 192 return false; 193 194 assert(MBBI->getOpcode() == TargetOpcode::COPY && 195 "Unexpected COPY instruction."); 196 Register SrcReg = MBBI->getOperand(1).getReg(); 197 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 198 199 bool FoundDef = false; 200 bool FirstVSetVLI = false; 201 unsigned FirstSEW = 0; 202 while (MBBI != MBB.begin()) { 203 --MBBI; 204 if (MBBI->isMetaInstruction()) 205 continue; 206 207 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 208 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 209 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 210 // There is a vsetvli between COPY and source define instruction. 211 // vy = def_vop ... (producing instruction) 212 // ... 213 // vsetvli 214 // ... 215 // vx = COPY vy 216 if (!FoundDef) { 217 if (!FirstVSetVLI) { 218 FirstVSetVLI = true; 219 unsigned FirstVType = MBBI->getOperand(2).getImm(); 220 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 221 FirstSEW = RISCVVType::getSEW(FirstVType); 222 // The first encountered vsetvli must have the same lmul as the 223 // register class of COPY. 224 if (FirstLMul != LMul) 225 return false; 226 } 227 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 228 // define instruction. 229 if (MBBI->getOperand(0).getReg() != RISCV::X0) 230 return false; 231 if (MBBI->getOperand(1).isImm()) 232 return false; 233 if (MBBI->getOperand(1).getReg() != RISCV::X0) 234 return false; 235 continue; 236 } 237 238 // MBBI is the first vsetvli before the producing instruction. 239 unsigned VType = MBBI->getOperand(2).getImm(); 240 // If there is a vsetvli between COPY and the producing instruction. 241 if (FirstVSetVLI) { 242 // If SEW is different, return false. 243 if (RISCVVType::getSEW(VType) != FirstSEW) 244 return false; 245 } 246 247 // If the vsetvli is tail undisturbed, keep the whole register move. 248 if (!RISCVVType::isTailAgnostic(VType)) 249 return false; 250 251 // The checking is conservative. We only have register classes for 252 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 253 // for fractional LMUL operations. However, we could not use the vsetvli 254 // lmul for widening operations. The result of widening operation is 255 // 2 x LMUL. 256 return LMul == RISCVVType::getVLMUL(VType); 257 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 258 return false; 259 } else if (MBBI->getNumDefs()) { 260 // Check all the instructions which will change VL. 261 // For example, vleff has implicit def VL. 262 if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr)) 263 return false; 264 265 // Only converting whole register copies to vmv.v.v when the defining 266 // value appears in the explicit operands. 267 for (const MachineOperand &MO : MBBI->explicit_operands()) { 268 if (!MO.isReg() || !MO.isDef()) 269 continue; 270 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 271 // We only permit the source of COPY has the same LMUL as the defined 272 // operand. 273 // There are cases we need to keep the whole register copy if the LMUL 274 // is different. 275 // For example, 276 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 277 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 278 // # The COPY may be created by vlmul_trunc intrinsic. 279 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 280 // 281 // After widening, the valid value will be 4 x e32 elements. If we 282 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 283 // FIXME: The COPY of subregister of Zvlsseg register will not be able 284 // to convert to vmv.v.[v|i] under the constraint. 285 if (MO.getReg() != SrcReg) 286 return false; 287 288 // In widening reduction instructions with LMUL_1 input vector case, 289 // only checking the LMUL is insufficient due to reduction result is 290 // always LMUL_1. 291 // For example, 292 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 293 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 294 // $v26 = COPY killed renamable $v8 295 // After widening, The valid value will be 1 x e16 elements. If we 296 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 297 uint64_t TSFlags = MBBI->getDesc().TSFlags; 298 if (RISCVII::isRVVWideningReduction(TSFlags)) 299 return false; 300 301 // If the producing instruction does not depend on vsetvli, do not 302 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 303 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 304 return false; 305 306 // Found the definition. 307 FoundDef = true; 308 DefMBBI = MBBI; 309 break; 310 } 311 } 312 } 313 } 314 315 return false; 316 } 317 318 void RISCVInstrInfo::copyPhysRegVector( 319 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 320 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, 321 const TargetRegisterClass *RegClass) const { 322 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 323 RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags); 324 unsigned NF = RISCVRI::getNF(RegClass->TSFlags); 325 326 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 327 uint16_t DstEncoding = TRI->getEncodingValue(DstReg); 328 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul); 329 assert(!Fractional && "It is impossible be fractional lmul here."); 330 unsigned NumRegs = NF * LMulVal; 331 bool ReversedCopy = 332 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs); 333 if (ReversedCopy) { 334 // If the src and dest overlap when copying a tuple, we need to copy the 335 // registers in reverse. 336 SrcEncoding += NumRegs - 1; 337 DstEncoding += NumRegs - 1; 338 } 339 340 unsigned I = 0; 341 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding) 342 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned, 343 unsigned, unsigned> { 344 if (ReversedCopy) { 345 // For reversed copying, if there are enough aligned registers(8/4/2), we 346 // can do a larger copy(LMUL8/4/2). 347 // Besides, we have already known that DstEncoding is larger than 348 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between 349 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to 350 // avoid clobbering. 351 uint16_t Diff = DstEncoding - SrcEncoding; 352 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 && 353 DstEncoding % 8 == 7) 354 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, 355 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; 356 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 && 357 DstEncoding % 4 == 3) 358 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, 359 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; 360 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 && 361 DstEncoding % 2 == 1) 362 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, 363 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; 364 // Or we should do LMUL1 copying. 365 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, 366 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; 367 } 368 369 // For forward copying, if source register encoding and destination register 370 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying. 371 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0) 372 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, 373 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; 374 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0) 375 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, 376 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; 377 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0) 378 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, 379 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; 380 // Or we should do LMUL1 copying. 381 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, 382 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; 383 }; 384 auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, 385 uint16_t Encoding) { 386 MCRegister Reg = RISCV::V0 + Encoding; 387 if (&RegClass == &RISCV::VRRegClass) 388 return Reg; 389 return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); 390 }; 391 while (I != NumRegs) { 392 // For non-segment copying, we only do this once as the registers are always 393 // aligned. 394 // For segment copying, we may do this several times. If the registers are 395 // aligned to larger LMUL, we can eliminate some copyings. 396 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] = 397 GetCopyInfo(SrcEncoding, DstEncoding); 398 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied); 399 400 MachineBasicBlock::const_iterator DefMBBI; 401 if (LMul == LMulCopied && 402 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 403 Opc = VVOpc; 404 if (DefMBBI->getOpcode() == VIOpc) 405 Opc = VIOpc; 406 } 407 408 // Emit actual copying. 409 // For reversed copying, the encoding should be decreased. 410 MCRegister ActualSrcReg = FindRegWithEncoding( 411 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding); 412 MCRegister ActualDstReg = FindRegWithEncoding( 413 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding); 414 415 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg); 416 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I; 417 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V; 418 if (UseVMV) 419 MIB.addReg(ActualDstReg, RegState::Undef); 420 if (UseVMV_V_I) 421 MIB = MIB.add(DefMBBI->getOperand(2)); 422 else 423 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc)); 424 if (UseVMV) { 425 const MCInstrDesc &Desc = DefMBBI->getDesc(); 426 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 427 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 428 MIB.addImm(0); // tu, mu 429 MIB.addReg(RISCV::VL, RegState::Implicit); 430 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 431 } 432 433 // If we are copying reversely, we should decrease the encoding. 434 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied); 435 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied); 436 I += NumCopied; 437 } 438 } 439 440 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 441 MachineBasicBlock::iterator MBBI, 442 const DebugLoc &DL, MCRegister DstReg, 443 MCRegister SrcReg, bool KillSrc) const { 444 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 445 446 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 447 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 448 .addReg(SrcReg, getKillRegState(KillSrc)) 449 .addImm(0); 450 return; 451 } 452 453 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { 454 // Emit an ADDI for both parts of GPRPair. 455 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 456 TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) 457 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), 458 getKillRegState(KillSrc)) 459 .addImm(0); 460 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 461 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) 462 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), 463 getKillRegState(KillSrc)) 464 .addImm(0); 465 return; 466 } 467 468 // Handle copy from csr 469 if (RISCV::VCSRRegClass.contains(SrcReg) && 470 RISCV::GPRRegClass.contains(DstReg)) { 471 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 472 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 473 .addReg(RISCV::X0); 474 return; 475 } 476 477 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 478 unsigned Opc; 479 if (STI.hasStdExtZfh()) { 480 Opc = RISCV::FSGNJ_H; 481 } else { 482 assert(STI.hasStdExtF() && 483 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 484 "Unexpected extensions"); 485 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 486 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 487 &RISCV::FPR32RegClass); 488 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 489 &RISCV::FPR32RegClass); 490 Opc = RISCV::FSGNJ_S; 491 } 492 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 493 .addReg(SrcReg, getKillRegState(KillSrc)) 494 .addReg(SrcReg, getKillRegState(KillSrc)); 495 return; 496 } 497 498 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 499 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 500 .addReg(SrcReg, getKillRegState(KillSrc)) 501 .addReg(SrcReg, getKillRegState(KillSrc)); 502 return; 503 } 504 505 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 506 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 507 .addReg(SrcReg, getKillRegState(KillSrc)) 508 .addReg(SrcReg, getKillRegState(KillSrc)); 509 return; 510 } 511 512 if (RISCV::FPR32RegClass.contains(DstReg) && 513 RISCV::GPRRegClass.contains(SrcReg)) { 514 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 515 .addReg(SrcReg, getKillRegState(KillSrc)); 516 return; 517 } 518 519 if (RISCV::GPRRegClass.contains(DstReg) && 520 RISCV::FPR32RegClass.contains(SrcReg)) { 521 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 522 .addReg(SrcReg, getKillRegState(KillSrc)); 523 return; 524 } 525 526 if (RISCV::FPR64RegClass.contains(DstReg) && 527 RISCV::GPRRegClass.contains(SrcReg)) { 528 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 529 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 530 .addReg(SrcReg, getKillRegState(KillSrc)); 531 return; 532 } 533 534 if (RISCV::GPRRegClass.contains(DstReg) && 535 RISCV::FPR64RegClass.contains(SrcReg)) { 536 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 537 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 538 .addReg(SrcReg, getKillRegState(KillSrc)); 539 return; 540 } 541 542 // VR->VR copies. 543 static const TargetRegisterClass *RVVRegClasses[] = { 544 &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass, 545 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass, 546 &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass, 547 &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass, 548 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass}; 549 for (const auto &RegClass : RVVRegClasses) { 550 if (RegClass->contains(DstReg, SrcReg)) { 551 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass); 552 return; 553 } 554 } 555 556 llvm_unreachable("Impossible reg-to-reg copy"); 557 } 558 559 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 560 MachineBasicBlock::iterator I, 561 Register SrcReg, bool IsKill, int FI, 562 const TargetRegisterClass *RC, 563 const TargetRegisterInfo *TRI, 564 Register VReg) const { 565 MachineFunction *MF = MBB.getParent(); 566 MachineFrameInfo &MFI = MF->getFrameInfo(); 567 568 unsigned Opcode; 569 bool IsScalableVector = true; 570 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 571 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 572 RISCV::SW : RISCV::SD; 573 IsScalableVector = false; 574 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 575 Opcode = RISCV::PseudoRV32ZdinxSD; 576 IsScalableVector = false; 577 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 578 Opcode = RISCV::FSH; 579 IsScalableVector = false; 580 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 581 Opcode = RISCV::FSW; 582 IsScalableVector = false; 583 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 584 Opcode = RISCV::FSD; 585 IsScalableVector = false; 586 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 587 Opcode = RISCV::VS1R_V; 588 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 589 Opcode = RISCV::VS2R_V; 590 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 591 Opcode = RISCV::VS4R_V; 592 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 593 Opcode = RISCV::VS8R_V; 594 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 595 Opcode = RISCV::PseudoVSPILL2_M1; 596 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 597 Opcode = RISCV::PseudoVSPILL2_M2; 598 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 599 Opcode = RISCV::PseudoVSPILL2_M4; 600 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 601 Opcode = RISCV::PseudoVSPILL3_M1; 602 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 603 Opcode = RISCV::PseudoVSPILL3_M2; 604 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 605 Opcode = RISCV::PseudoVSPILL4_M1; 606 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 607 Opcode = RISCV::PseudoVSPILL4_M2; 608 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 609 Opcode = RISCV::PseudoVSPILL5_M1; 610 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 611 Opcode = RISCV::PseudoVSPILL6_M1; 612 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 613 Opcode = RISCV::PseudoVSPILL7_M1; 614 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 615 Opcode = RISCV::PseudoVSPILL8_M1; 616 else 617 llvm_unreachable("Can't store this register to stack slot"); 618 619 if (IsScalableVector) { 620 MachineMemOperand *MMO = MF->getMachineMemOperand( 621 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 622 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI)); 623 624 MFI.setStackID(FI, TargetStackID::ScalableVector); 625 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 626 .addReg(SrcReg, getKillRegState(IsKill)) 627 .addFrameIndex(FI) 628 .addMemOperand(MMO); 629 } else { 630 MachineMemOperand *MMO = MF->getMachineMemOperand( 631 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 632 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 633 634 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 635 .addReg(SrcReg, getKillRegState(IsKill)) 636 .addFrameIndex(FI) 637 .addImm(0) 638 .addMemOperand(MMO); 639 } 640 } 641 642 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 643 MachineBasicBlock::iterator I, 644 Register DstReg, int FI, 645 const TargetRegisterClass *RC, 646 const TargetRegisterInfo *TRI, 647 Register VReg) const { 648 MachineFunction *MF = MBB.getParent(); 649 MachineFrameInfo &MFI = MF->getFrameInfo(); 650 651 unsigned Opcode; 652 bool IsScalableVector = true; 653 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 654 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 655 RISCV::LW : RISCV::LD; 656 IsScalableVector = false; 657 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 658 Opcode = RISCV::PseudoRV32ZdinxLD; 659 IsScalableVector = false; 660 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 661 Opcode = RISCV::FLH; 662 IsScalableVector = false; 663 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 664 Opcode = RISCV::FLW; 665 IsScalableVector = false; 666 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 667 Opcode = RISCV::FLD; 668 IsScalableVector = false; 669 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 670 Opcode = RISCV::VL1RE8_V; 671 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 672 Opcode = RISCV::VL2RE8_V; 673 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 674 Opcode = RISCV::VL4RE8_V; 675 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 676 Opcode = RISCV::VL8RE8_V; 677 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 678 Opcode = RISCV::PseudoVRELOAD2_M1; 679 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 680 Opcode = RISCV::PseudoVRELOAD2_M2; 681 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 682 Opcode = RISCV::PseudoVRELOAD2_M4; 683 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 684 Opcode = RISCV::PseudoVRELOAD3_M1; 685 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 686 Opcode = RISCV::PseudoVRELOAD3_M2; 687 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 688 Opcode = RISCV::PseudoVRELOAD4_M1; 689 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 690 Opcode = RISCV::PseudoVRELOAD4_M2; 691 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 692 Opcode = RISCV::PseudoVRELOAD5_M1; 693 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 694 Opcode = RISCV::PseudoVRELOAD6_M1; 695 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 696 Opcode = RISCV::PseudoVRELOAD7_M1; 697 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 698 Opcode = RISCV::PseudoVRELOAD8_M1; 699 else 700 llvm_unreachable("Can't load this register from stack slot"); 701 702 if (IsScalableVector) { 703 MachineMemOperand *MMO = MF->getMachineMemOperand( 704 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 705 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI)); 706 707 MFI.setStackID(FI, TargetStackID::ScalableVector); 708 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 709 .addFrameIndex(FI) 710 .addMemOperand(MMO); 711 } else { 712 MachineMemOperand *MMO = MF->getMachineMemOperand( 713 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 714 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 715 716 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 717 .addFrameIndex(FI) 718 .addImm(0) 719 .addMemOperand(MMO); 720 } 721 } 722 723 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 724 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 725 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 726 VirtRegMap *VRM) const { 727 const MachineFrameInfo &MFI = MF.getFrameInfo(); 728 729 // The below optimizations narrow the load so they are only valid for little 730 // endian. 731 // TODO: Support big endian by adding an offset into the frame object? 732 if (MF.getDataLayout().isBigEndian()) 733 return nullptr; 734 735 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 736 if (Ops.size() != 1 || Ops[0] != 1) 737 return nullptr; 738 739 unsigned LoadOpc; 740 switch (MI.getOpcode()) { 741 default: 742 if (RISCV::isSEXT_W(MI)) { 743 LoadOpc = RISCV::LW; 744 break; 745 } 746 if (RISCV::isZEXT_W(MI)) { 747 LoadOpc = RISCV::LWU; 748 break; 749 } 750 if (RISCV::isZEXT_B(MI)) { 751 LoadOpc = RISCV::LBU; 752 break; 753 } 754 return nullptr; 755 case RISCV::SEXT_H: 756 LoadOpc = RISCV::LH; 757 break; 758 case RISCV::SEXT_B: 759 LoadOpc = RISCV::LB; 760 break; 761 case RISCV::ZEXT_H_RV32: 762 case RISCV::ZEXT_H_RV64: 763 LoadOpc = RISCV::LHU; 764 break; 765 } 766 767 MachineMemOperand *MMO = MF.getMachineMemOperand( 768 MachinePointerInfo::getFixedStack(MF, FrameIndex), 769 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), 770 MFI.getObjectAlign(FrameIndex)); 771 772 Register DstReg = MI.getOperand(0).getReg(); 773 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 774 DstReg) 775 .addFrameIndex(FrameIndex) 776 .addImm(0) 777 .addMemOperand(MMO); 778 } 779 780 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 781 MachineBasicBlock::iterator MBBI, 782 const DebugLoc &DL, Register DstReg, uint64_t Val, 783 MachineInstr::MIFlag Flag, bool DstRenamable, 784 bool DstIsDead) const { 785 Register SrcReg = RISCV::X0; 786 787 // For RV32, allow a sign or unsigned 32 bit value. 788 if (!STI.is64Bit() && !isInt<32>(Val)) { 789 // If have a uimm32 it will still fit in a register so we can allow it. 790 if (!isUInt<32>(Val)) 791 report_fatal_error("Should only materialize 32-bit constants for RV32"); 792 793 // Sign extend for generateInstSeq. 794 Val = SignExtend64<32>(Val); 795 } 796 797 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 798 assert(!Seq.empty()); 799 800 bool SrcRenamable = false; 801 unsigned Num = 0; 802 803 for (const RISCVMatInt::Inst &Inst : Seq) { 804 bool LastItem = ++Num == Seq.size(); 805 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 806 getRenamableRegState(DstRenamable); 807 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 808 getRenamableRegState(SrcRenamable); 809 switch (Inst.getOpndKind()) { 810 case RISCVMatInt::Imm: 811 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 812 .addReg(DstReg, RegState::Define | DstRegState) 813 .addImm(Inst.getImm()) 814 .setMIFlag(Flag); 815 break; 816 case RISCVMatInt::RegX0: 817 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 818 .addReg(DstReg, RegState::Define | DstRegState) 819 .addReg(SrcReg, SrcRegState) 820 .addReg(RISCV::X0) 821 .setMIFlag(Flag); 822 break; 823 case RISCVMatInt::RegReg: 824 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 825 .addReg(DstReg, RegState::Define | DstRegState) 826 .addReg(SrcReg, SrcRegState) 827 .addReg(SrcReg, SrcRegState) 828 .setMIFlag(Flag); 829 break; 830 case RISCVMatInt::RegImm: 831 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 832 .addReg(DstReg, RegState::Define | DstRegState) 833 .addReg(SrcReg, SrcRegState) 834 .addImm(Inst.getImm()) 835 .setMIFlag(Flag); 836 break; 837 } 838 839 // Only the first instruction has X0 as its source. 840 SrcReg = DstReg; 841 SrcRenamable = DstRenamable; 842 } 843 } 844 845 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 846 switch (Opc) { 847 default: 848 return RISCVCC::COND_INVALID; 849 case RISCV::CV_BEQIMM: 850 return RISCVCC::COND_EQ; 851 case RISCV::CV_BNEIMM: 852 return RISCVCC::COND_NE; 853 case RISCV::BEQ: 854 return RISCVCC::COND_EQ; 855 case RISCV::BNE: 856 return RISCVCC::COND_NE; 857 case RISCV::BLT: 858 return RISCVCC::COND_LT; 859 case RISCV::BGE: 860 return RISCVCC::COND_GE; 861 case RISCV::BLTU: 862 return RISCVCC::COND_LTU; 863 case RISCV::BGEU: 864 return RISCVCC::COND_GEU; 865 } 866 } 867 868 // The contents of values added to Cond are not examined outside of 869 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 870 // push BranchOpcode, Reg1, Reg2. 871 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 872 SmallVectorImpl<MachineOperand> &Cond) { 873 // Block ends with fall-through condbranch. 874 assert(LastInst.getDesc().isConditionalBranch() && 875 "Unknown conditional branch"); 876 Target = LastInst.getOperand(2).getMBB(); 877 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 878 Cond.push_back(MachineOperand::CreateImm(CC)); 879 Cond.push_back(LastInst.getOperand(0)); 880 Cond.push_back(LastInst.getOperand(1)); 881 } 882 883 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) { 884 switch (CC) { 885 default: 886 llvm_unreachable("Unknown condition code!"); 887 case RISCVCC::COND_EQ: 888 return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ; 889 case RISCVCC::COND_NE: 890 return Imm ? RISCV::CV_BNEIMM : RISCV::BNE; 891 case RISCVCC::COND_LT: 892 return RISCV::BLT; 893 case RISCVCC::COND_GE: 894 return RISCV::BGE; 895 case RISCVCC::COND_LTU: 896 return RISCV::BLTU; 897 case RISCVCC::COND_GEU: 898 return RISCV::BGEU; 899 } 900 } 901 902 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC, 903 bool Imm) const { 904 return get(RISCVCC::getBrCond(CC, Imm)); 905 } 906 907 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 908 switch (CC) { 909 default: 910 llvm_unreachable("Unrecognized conditional branch"); 911 case RISCVCC::COND_EQ: 912 return RISCVCC::COND_NE; 913 case RISCVCC::COND_NE: 914 return RISCVCC::COND_EQ; 915 case RISCVCC::COND_LT: 916 return RISCVCC::COND_GE; 917 case RISCVCC::COND_GE: 918 return RISCVCC::COND_LT; 919 case RISCVCC::COND_LTU: 920 return RISCVCC::COND_GEU; 921 case RISCVCC::COND_GEU: 922 return RISCVCC::COND_LTU; 923 } 924 } 925 926 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 927 MachineBasicBlock *&TBB, 928 MachineBasicBlock *&FBB, 929 SmallVectorImpl<MachineOperand> &Cond, 930 bool AllowModify) const { 931 TBB = FBB = nullptr; 932 Cond.clear(); 933 934 // If the block has no terminators, it just falls into the block after it. 935 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 936 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 937 return false; 938 939 // Count the number of terminators and find the first unconditional or 940 // indirect branch. 941 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 942 int NumTerminators = 0; 943 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 944 J++) { 945 NumTerminators++; 946 if (J->getDesc().isUnconditionalBranch() || 947 J->getDesc().isIndirectBranch()) { 948 FirstUncondOrIndirectBr = J.getReverse(); 949 } 950 } 951 952 // If AllowModify is true, we can erase any terminators after 953 // FirstUncondOrIndirectBR. 954 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 955 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 956 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 957 NumTerminators--; 958 } 959 I = FirstUncondOrIndirectBr; 960 } 961 962 // We can't handle blocks that end in an indirect branch. 963 if (I->getDesc().isIndirectBranch()) 964 return true; 965 966 // We can't handle Generic branch opcodes from Global ISel. 967 if (I->isPreISelOpcode()) 968 return true; 969 970 // We can't handle blocks with more than 2 terminators. 971 if (NumTerminators > 2) 972 return true; 973 974 // Handle a single unconditional branch. 975 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 976 TBB = getBranchDestBlock(*I); 977 return false; 978 } 979 980 // Handle a single conditional branch. 981 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 982 parseCondBranch(*I, TBB, Cond); 983 return false; 984 } 985 986 // Handle a conditional branch followed by an unconditional branch. 987 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 988 I->getDesc().isUnconditionalBranch()) { 989 parseCondBranch(*std::prev(I), TBB, Cond); 990 FBB = getBranchDestBlock(*I); 991 return false; 992 } 993 994 // Otherwise, we can't handle this. 995 return true; 996 } 997 998 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 999 int *BytesRemoved) const { 1000 if (BytesRemoved) 1001 *BytesRemoved = 0; 1002 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1003 if (I == MBB.end()) 1004 return 0; 1005 1006 if (!I->getDesc().isUnconditionalBranch() && 1007 !I->getDesc().isConditionalBranch()) 1008 return 0; 1009 1010 // Remove the branch. 1011 if (BytesRemoved) 1012 *BytesRemoved += getInstSizeInBytes(*I); 1013 I->eraseFromParent(); 1014 1015 I = MBB.end(); 1016 1017 if (I == MBB.begin()) 1018 return 1; 1019 --I; 1020 if (!I->getDesc().isConditionalBranch()) 1021 return 1; 1022 1023 // Remove the branch. 1024 if (BytesRemoved) 1025 *BytesRemoved += getInstSizeInBytes(*I); 1026 I->eraseFromParent(); 1027 return 2; 1028 } 1029 1030 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1031 // the number of instructions inserted. 1032 unsigned RISCVInstrInfo::insertBranch( 1033 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1034 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1035 if (BytesAdded) 1036 *BytesAdded = 0; 1037 1038 // Shouldn't be a fall through. 1039 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1040 assert((Cond.size() == 3 || Cond.size() == 0) && 1041 "RISC-V branch conditions have two components!"); 1042 1043 // Unconditional branch. 1044 if (Cond.empty()) { 1045 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1046 if (BytesAdded) 1047 *BytesAdded += getInstSizeInBytes(MI); 1048 return 1; 1049 } 1050 1051 // Either a one or two-way conditional branch. 1052 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1053 MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm())) 1054 .add(Cond[1]) 1055 .add(Cond[2]) 1056 .addMBB(TBB); 1057 if (BytesAdded) 1058 *BytesAdded += getInstSizeInBytes(CondMI); 1059 1060 // One-way conditional branch. 1061 if (!FBB) 1062 return 1; 1063 1064 // Two-way conditional branch. 1065 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1066 if (BytesAdded) 1067 *BytesAdded += getInstSizeInBytes(MI); 1068 return 2; 1069 } 1070 1071 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1072 MachineBasicBlock &DestBB, 1073 MachineBasicBlock &RestoreBB, 1074 const DebugLoc &DL, int64_t BrOffset, 1075 RegScavenger *RS) const { 1076 assert(RS && "RegScavenger required for long branching"); 1077 assert(MBB.empty() && 1078 "new block should be inserted for expanding unconditional branch"); 1079 assert(MBB.pred_size() == 1); 1080 assert(RestoreBB.empty() && 1081 "restore block should be inserted for restoring clobbered registers"); 1082 1083 MachineFunction *MF = MBB.getParent(); 1084 MachineRegisterInfo &MRI = MF->getRegInfo(); 1085 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1086 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1087 1088 if (!isInt<32>(BrOffset)) 1089 report_fatal_error( 1090 "Branch offsets outside of the signed 32-bit range not supported"); 1091 1092 // FIXME: A virtual register must be used initially, as the register 1093 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1094 // uses the same workaround). 1095 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass); 1096 auto II = MBB.end(); 1097 // We may also update the jump target to RestoreBB later. 1098 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1099 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1100 .addMBB(&DestBB, RISCVII::MO_CALL); 1101 1102 RS->enterBasicBlockEnd(MBB); 1103 Register TmpGPR = 1104 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1105 /*RestoreAfter=*/false, /*SpAdj=*/0, 1106 /*AllowSpill=*/false); 1107 if (TmpGPR != RISCV::NoRegister) 1108 RS->setRegUsed(TmpGPR); 1109 else { 1110 // The case when there is no scavenged register needs special handling. 1111 1112 // Pick s11 because it doesn't make a difference. 1113 TmpGPR = RISCV::X27; 1114 1115 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1116 if (FrameIndex == -1) 1117 report_fatal_error("underestimated function size"); 1118 1119 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1120 &RISCV::GPRRegClass, TRI, Register()); 1121 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1122 /*SpAdj=*/0, /*FIOperandNum=*/1); 1123 1124 MI.getOperand(1).setMBB(&RestoreBB); 1125 1126 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1127 &RISCV::GPRRegClass, TRI, Register()); 1128 TRI->eliminateFrameIndex(RestoreBB.back(), 1129 /*SpAdj=*/0, /*FIOperandNum=*/1); 1130 } 1131 1132 MRI.replaceRegWith(ScratchReg, TmpGPR); 1133 MRI.clearVirtRegs(); 1134 } 1135 1136 bool RISCVInstrInfo::reverseBranchCondition( 1137 SmallVectorImpl<MachineOperand> &Cond) const { 1138 assert((Cond.size() == 3) && "Invalid branch condition!"); 1139 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1140 Cond[0].setImm(getOppositeBranchCondition(CC)); 1141 return false; 1142 } 1143 1144 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1145 MachineBasicBlock *MBB = MI.getParent(); 1146 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1147 1148 MachineBasicBlock *TBB, *FBB; 1149 SmallVector<MachineOperand, 3> Cond; 1150 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1151 return false; 1152 1153 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1154 assert(CC != RISCVCC::COND_INVALID); 1155 1156 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1157 return false; 1158 1159 // For two constants C0 and C1 from 1160 // ``` 1161 // li Y, C0 1162 // li Z, C1 1163 // ``` 1164 // 1. if C1 = C0 + 1 1165 // we can turn: 1166 // (a) blt Y, X -> bge X, Z 1167 // (b) bge Y, X -> blt X, Z 1168 // 1169 // 2. if C1 = C0 - 1 1170 // we can turn: 1171 // (a) blt X, Y -> bge Z, X 1172 // (b) bge X, Y -> blt Z, X 1173 // 1174 // To make sure this optimization is really beneficial, we only 1175 // optimize for cases where Y had only one use (i.e. only used by the branch). 1176 1177 // Right now we only care about LI (i.e. ADDI x0, imm) 1178 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1179 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1180 MI->getOperand(1).getReg() == RISCV::X0) { 1181 Imm = MI->getOperand(2).getImm(); 1182 return true; 1183 } 1184 return false; 1185 }; 1186 // Either a load from immediate instruction or X0. 1187 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1188 if (!Op.isReg()) 1189 return false; 1190 Register Reg = Op.getReg(); 1191 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm); 1192 }; 1193 1194 MachineOperand &LHS = MI.getOperand(0); 1195 MachineOperand &RHS = MI.getOperand(1); 1196 // Try to find the register for constant Z; return 1197 // invalid register otherwise. 1198 auto searchConst = [&](int64_t C1) -> Register { 1199 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1200 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1201 int64_t Imm; 1202 return isLoadImm(&I, Imm) && Imm == C1 && 1203 I.getOperand(0).getReg().isVirtual(); 1204 }); 1205 if (DefC1 != E) 1206 return DefC1->getOperand(0).getReg(); 1207 1208 return Register(); 1209 }; 1210 1211 bool Modify = false; 1212 int64_t C0; 1213 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1214 // Might be case 1. 1215 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1216 // to worry about unsigned overflow here) 1217 if (C0 < INT64_MAX) 1218 if (Register RegZ = searchConst(C0 + 1)) { 1219 reverseBranchCondition(Cond); 1220 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1221 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1222 // We might extend the live range of Z, clear its kill flag to 1223 // account for this. 1224 MRI.clearKillFlags(RegZ); 1225 Modify = true; 1226 } 1227 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1228 // Might be case 2. 1229 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1230 // when C0 is zero. 1231 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1232 if (Register RegZ = searchConst(C0 - 1)) { 1233 reverseBranchCondition(Cond); 1234 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1235 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1236 // We might extend the live range of Z, clear its kill flag to 1237 // account for this. 1238 MRI.clearKillFlags(RegZ); 1239 Modify = true; 1240 } 1241 } 1242 1243 if (!Modify) 1244 return false; 1245 1246 // Build the new branch and remove the old one. 1247 BuildMI(*MBB, MI, MI.getDebugLoc(), 1248 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1249 .add(Cond[1]) 1250 .add(Cond[2]) 1251 .addMBB(TBB); 1252 MI.eraseFromParent(); 1253 1254 return true; 1255 } 1256 1257 MachineBasicBlock * 1258 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1259 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1260 // The branch target is always the last operand. 1261 int NumOp = MI.getNumExplicitOperands(); 1262 return MI.getOperand(NumOp - 1).getMBB(); 1263 } 1264 1265 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1266 int64_t BrOffset) const { 1267 unsigned XLen = STI.getXLen(); 1268 // Ideally we could determine the supported branch offset from the 1269 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1270 // PseudoBR. 1271 switch (BranchOp) { 1272 default: 1273 llvm_unreachable("Unexpected opcode!"); 1274 case RISCV::BEQ: 1275 case RISCV::BNE: 1276 case RISCV::BLT: 1277 case RISCV::BGE: 1278 case RISCV::BLTU: 1279 case RISCV::BGEU: 1280 case RISCV::CV_BEQIMM: 1281 case RISCV::CV_BNEIMM: 1282 return isIntN(13, BrOffset); 1283 case RISCV::JAL: 1284 case RISCV::PseudoBR: 1285 return isIntN(21, BrOffset); 1286 case RISCV::PseudoJump: 1287 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1288 } 1289 } 1290 1291 // If the operation has a predicated pseudo instruction, return the pseudo 1292 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1293 // TODO: Support more operations. 1294 unsigned getPredicatedOpcode(unsigned Opcode) { 1295 switch (Opcode) { 1296 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1297 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1298 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1299 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1300 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1301 case RISCV::AND: return RISCV::PseudoCCAND; break; 1302 case RISCV::OR: return RISCV::PseudoCCOR; break; 1303 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1304 1305 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1306 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1307 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1308 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1309 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1310 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1311 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1312 1313 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1314 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1315 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1316 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1317 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1318 1319 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1320 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1321 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1322 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1323 1324 case RISCV::ANDN: return RISCV::PseudoCCANDN; break; 1325 case RISCV::ORN: return RISCV::PseudoCCORN; break; 1326 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; 1327 } 1328 1329 return RISCV::INSTRUCTION_LIST_END; 1330 } 1331 1332 /// Identify instructions that can be folded into a CCMOV instruction, and 1333 /// return the defining instruction. 1334 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1335 const MachineRegisterInfo &MRI, 1336 const TargetInstrInfo *TII) { 1337 if (!Reg.isVirtual()) 1338 return nullptr; 1339 if (!MRI.hasOneNonDBGUse(Reg)) 1340 return nullptr; 1341 MachineInstr *MI = MRI.getVRegDef(Reg); 1342 if (!MI) 1343 return nullptr; 1344 // Check if MI can be predicated and folded into the CCMOV. 1345 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1346 return nullptr; 1347 // Don't predicate li idiom. 1348 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1349 MI->getOperand(1).getReg() == RISCV::X0) 1350 return nullptr; 1351 // Check if MI has any other defs or physreg uses. 1352 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1353 // Reject frame index operands, PEI can't handle the predicated pseudos. 1354 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1355 return nullptr; 1356 if (!MO.isReg()) 1357 continue; 1358 // MI can't have any tied operands, that would conflict with predication. 1359 if (MO.isTied()) 1360 return nullptr; 1361 if (MO.isDef()) 1362 return nullptr; 1363 // Allow constant physregs. 1364 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1365 return nullptr; 1366 } 1367 bool DontMoveAcrossStores = true; 1368 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1369 return nullptr; 1370 return MI; 1371 } 1372 1373 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1374 SmallVectorImpl<MachineOperand> &Cond, 1375 unsigned &TrueOp, unsigned &FalseOp, 1376 bool &Optimizable) const { 1377 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1378 "Unknown select instruction"); 1379 // CCMOV operands: 1380 // 0: Def. 1381 // 1: LHS of compare. 1382 // 2: RHS of compare. 1383 // 3: Condition code. 1384 // 4: False use. 1385 // 5: True use. 1386 TrueOp = 5; 1387 FalseOp = 4; 1388 Cond.push_back(MI.getOperand(1)); 1389 Cond.push_back(MI.getOperand(2)); 1390 Cond.push_back(MI.getOperand(3)); 1391 // We can only fold when we support short forward branch opt. 1392 Optimizable = STI.hasShortForwardBranchOpt(); 1393 return false; 1394 } 1395 1396 MachineInstr * 1397 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1398 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1399 bool PreferFalse) const { 1400 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1401 "Unknown select instruction"); 1402 if (!STI.hasShortForwardBranchOpt()) 1403 return nullptr; 1404 1405 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1406 MachineInstr *DefMI = 1407 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1408 bool Invert = !DefMI; 1409 if (!DefMI) 1410 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1411 if (!DefMI) 1412 return nullptr; 1413 1414 // Find new register class to use. 1415 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1416 Register DestReg = MI.getOperand(0).getReg(); 1417 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1418 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1419 return nullptr; 1420 1421 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1422 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1423 1424 // Create a new predicated version of DefMI. 1425 MachineInstrBuilder NewMI = 1426 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1427 1428 // Copy the condition portion. 1429 NewMI.add(MI.getOperand(1)); 1430 NewMI.add(MI.getOperand(2)); 1431 1432 // Add condition code, inverting if necessary. 1433 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1434 if (Invert) 1435 CC = RISCVCC::getOppositeBranchCondition(CC); 1436 NewMI.addImm(CC); 1437 1438 // Copy the false register. 1439 NewMI.add(FalseReg); 1440 1441 // Copy all the DefMI operands. 1442 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1443 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1444 NewMI.add(DefMI->getOperand(i)); 1445 1446 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1447 SeenMIs.insert(NewMI); 1448 SeenMIs.erase(DefMI); 1449 1450 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1451 // DefMI would be invalid when tranferred inside the loop. Checking for a 1452 // loop is expensive, but at least remove kill flags if they are in different 1453 // BBs. 1454 if (DefMI->getParent() != MI.getParent()) 1455 NewMI->clearKillInfo(); 1456 1457 // The caller will erase MI, but not DefMI. 1458 DefMI->eraseFromParent(); 1459 return NewMI; 1460 } 1461 1462 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1463 if (MI.isMetaInstruction()) 1464 return 0; 1465 1466 unsigned Opcode = MI.getOpcode(); 1467 1468 if (Opcode == TargetOpcode::INLINEASM || 1469 Opcode == TargetOpcode::INLINEASM_BR) { 1470 const MachineFunction &MF = *MI.getParent()->getParent(); 1471 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1472 *MF.getTarget().getMCAsmInfo()); 1473 } 1474 1475 if (!MI.memoperands_empty()) { 1476 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1477 if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1478 if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) { 1479 if (isCompressibleInst(MI, STI)) 1480 return 4; // c.ntl.all + c.load/c.store 1481 return 6; // c.ntl.all + load/store 1482 } 1483 return 8; // ntl.all + load/store 1484 } 1485 } 1486 1487 if (Opcode == TargetOpcode::BUNDLE) 1488 return getInstBundleLength(MI); 1489 1490 if (MI.getParent() && MI.getParent()->getParent()) { 1491 if (isCompressibleInst(MI, STI)) 1492 return 2; 1493 } 1494 1495 switch (Opcode) { 1496 case TargetOpcode::STACKMAP: 1497 // The upper bound for a stackmap intrinsic is the full length of its shadow 1498 return StackMapOpers(&MI).getNumPatchBytes(); 1499 case TargetOpcode::PATCHPOINT: 1500 // The size of the patchpoint intrinsic is the number of bytes requested 1501 return PatchPointOpers(&MI).getNumPatchBytes(); 1502 case TargetOpcode::STATEPOINT: { 1503 // The size of the statepoint intrinsic is the number of bytes requested 1504 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 1505 // No patch bytes means at most a PseudoCall is emitted 1506 return std::max(NumBytes, 8U); 1507 } 1508 default: 1509 return get(Opcode).getSize(); 1510 } 1511 } 1512 1513 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1514 unsigned Size = 0; 1515 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1516 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1517 while (++I != E && I->isInsideBundle()) { 1518 assert(!I->isBundle() && "No nested bundle!"); 1519 Size += getInstSizeInBytes(*I); 1520 } 1521 return Size; 1522 } 1523 1524 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1525 const unsigned Opcode = MI.getOpcode(); 1526 switch (Opcode) { 1527 default: 1528 break; 1529 case RISCV::FSGNJ_D: 1530 case RISCV::FSGNJ_S: 1531 case RISCV::FSGNJ_H: 1532 case RISCV::FSGNJ_D_INX: 1533 case RISCV::FSGNJ_D_IN32X: 1534 case RISCV::FSGNJ_S_INX: 1535 case RISCV::FSGNJ_H_INX: 1536 // The canonical floating-point move is fsgnj rd, rs, rs. 1537 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1538 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1539 case RISCV::ADDI: 1540 case RISCV::ORI: 1541 case RISCV::XORI: 1542 return (MI.getOperand(1).isReg() && 1543 MI.getOperand(1).getReg() == RISCV::X0) || 1544 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1545 } 1546 return MI.isAsCheapAsAMove(); 1547 } 1548 1549 std::optional<DestSourcePair> 1550 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1551 if (MI.isMoveReg()) 1552 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1553 switch (MI.getOpcode()) { 1554 default: 1555 break; 1556 case RISCV::ADDI: 1557 // Operand 1 can be a frameindex but callers expect registers 1558 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1559 MI.getOperand(2).getImm() == 0) 1560 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1561 break; 1562 case RISCV::FSGNJ_D: 1563 case RISCV::FSGNJ_S: 1564 case RISCV::FSGNJ_H: 1565 case RISCV::FSGNJ_D_INX: 1566 case RISCV::FSGNJ_D_IN32X: 1567 case RISCV::FSGNJ_S_INX: 1568 case RISCV::FSGNJ_H_INX: 1569 // The canonical floating-point move is fsgnj rd, rs, rs. 1570 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1571 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1572 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1573 break; 1574 } 1575 return std::nullopt; 1576 } 1577 1578 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1579 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1580 // The option is unused. Choose Local strategy only for in-order cores. When 1581 // scheduling model is unspecified, use MinInstrCount strategy as more 1582 // generic one. 1583 const auto &SchedModel = STI.getSchedModel(); 1584 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1585 ? MachineTraceStrategy::TS_MinInstrCount 1586 : MachineTraceStrategy::TS_Local; 1587 } 1588 // The strategy was forced by the option. 1589 return ForceMachineCombinerStrategy; 1590 } 1591 1592 void RISCVInstrInfo::finalizeInsInstrs( 1593 MachineInstr &Root, unsigned &Pattern, 1594 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1595 int16_t FrmOpIdx = 1596 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1597 if (FrmOpIdx < 0) { 1598 assert(all_of(InsInstrs, 1599 [](MachineInstr *MI) { 1600 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1601 RISCV::OpName::frm) < 0; 1602 }) && 1603 "New instructions require FRM whereas the old one does not have it"); 1604 return; 1605 } 1606 1607 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1608 MachineFunction &MF = *Root.getMF(); 1609 1610 for (auto *NewMI : InsInstrs) { 1611 // We'd already added the FRM operand. 1612 if (static_cast<unsigned>(RISCV::getNamedOperandIdx( 1613 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands()) 1614 continue; 1615 MachineInstrBuilder MIB(MF, NewMI); 1616 MIB.add(FRM); 1617 if (FRM.getImm() == RISCVFPRndMode::DYN) 1618 MIB.addUse(RISCV::FRM, RegState::Implicit); 1619 } 1620 } 1621 1622 static bool isFADD(unsigned Opc) { 1623 switch (Opc) { 1624 default: 1625 return false; 1626 case RISCV::FADD_H: 1627 case RISCV::FADD_S: 1628 case RISCV::FADD_D: 1629 return true; 1630 } 1631 } 1632 1633 static bool isFSUB(unsigned Opc) { 1634 switch (Opc) { 1635 default: 1636 return false; 1637 case RISCV::FSUB_H: 1638 case RISCV::FSUB_S: 1639 case RISCV::FSUB_D: 1640 return true; 1641 } 1642 } 1643 1644 static bool isFMUL(unsigned Opc) { 1645 switch (Opc) { 1646 default: 1647 return false; 1648 case RISCV::FMUL_H: 1649 case RISCV::FMUL_S: 1650 case RISCV::FMUL_D: 1651 return true; 1652 } 1653 } 1654 1655 bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst, 1656 bool Invert) const { 1657 #define OPCODE_LMUL_CASE(OPC) \ 1658 case RISCV::OPC##_M1: \ 1659 case RISCV::OPC##_M2: \ 1660 case RISCV::OPC##_M4: \ 1661 case RISCV::OPC##_M8: \ 1662 case RISCV::OPC##_MF2: \ 1663 case RISCV::OPC##_MF4: \ 1664 case RISCV::OPC##_MF8 1665 1666 #define OPCODE_LMUL_MASK_CASE(OPC) \ 1667 case RISCV::OPC##_M1_MASK: \ 1668 case RISCV::OPC##_M2_MASK: \ 1669 case RISCV::OPC##_M4_MASK: \ 1670 case RISCV::OPC##_M8_MASK: \ 1671 case RISCV::OPC##_MF2_MASK: \ 1672 case RISCV::OPC##_MF4_MASK: \ 1673 case RISCV::OPC##_MF8_MASK 1674 1675 unsigned Opcode = Inst.getOpcode(); 1676 if (Invert) { 1677 if (auto InvOpcode = getInverseOpcode(Opcode)) 1678 Opcode = *InvOpcode; 1679 else 1680 return false; 1681 } 1682 1683 // clang-format off 1684 switch (Opcode) { 1685 default: 1686 return false; 1687 OPCODE_LMUL_CASE(PseudoVADD_VV): 1688 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV): 1689 OPCODE_LMUL_CASE(PseudoVMUL_VV): 1690 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV): 1691 return true; 1692 } 1693 // clang-format on 1694 1695 #undef OPCODE_LMUL_MASK_CASE 1696 #undef OPCODE_LMUL_CASE 1697 } 1698 1699 bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root, 1700 const MachineInstr &Prev) const { 1701 if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode())) 1702 return false; 1703 1704 assert(Root.getMF() == Prev.getMF()); 1705 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo(); 1706 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 1707 1708 // Make sure vtype operands are also the same. 1709 const MCInstrDesc &Desc = get(Root.getOpcode()); 1710 const uint64_t TSFlags = Desc.TSFlags; 1711 1712 auto checkImmOperand = [&](unsigned OpIdx) { 1713 return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm(); 1714 }; 1715 1716 auto checkRegOperand = [&](unsigned OpIdx) { 1717 return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg(); 1718 }; 1719 1720 // PassThru 1721 // TODO: Potentially we can loosen the condition to consider Root to be 1722 // associable with Prev if Root has NoReg as passthru. In which case we 1723 // also need to loosen the condition on vector policies between these. 1724 if (!checkRegOperand(1)) 1725 return false; 1726 1727 // SEW 1728 if (RISCVII::hasSEWOp(TSFlags) && 1729 !checkImmOperand(RISCVII::getSEWOpNum(Desc))) 1730 return false; 1731 1732 // Mask 1733 if (RISCVII::usesMaskPolicy(TSFlags)) { 1734 const MachineBasicBlock *MBB = Root.getParent(); 1735 const MachineBasicBlock::const_reverse_iterator It1(&Root); 1736 const MachineBasicBlock::const_reverse_iterator It2(&Prev); 1737 Register MI1VReg; 1738 1739 bool SeenMI2 = false; 1740 for (auto End = MBB->rend(), It = It1; It != End; ++It) { 1741 if (It == It2) { 1742 SeenMI2 = true; 1743 if (!MI1VReg.isValid()) 1744 // There is no V0 def between Root and Prev; they're sharing the 1745 // same V0. 1746 break; 1747 } 1748 1749 if (It->modifiesRegister(RISCV::V0, TRI)) { 1750 Register SrcReg = It->getOperand(1).getReg(); 1751 // If it's not VReg it'll be more difficult to track its defs, so 1752 // bailing out here just to be safe. 1753 if (!SrcReg.isVirtual()) 1754 return false; 1755 1756 if (!MI1VReg.isValid()) { 1757 // This is the V0 def for Root. 1758 MI1VReg = SrcReg; 1759 continue; 1760 } 1761 1762 // Some random mask updates. 1763 if (!SeenMI2) 1764 continue; 1765 1766 // This is the V0 def for Prev; check if it's the same as that of 1767 // Root. 1768 if (MI1VReg != SrcReg) 1769 return false; 1770 else 1771 break; 1772 } 1773 } 1774 1775 // If we haven't encountered Prev, it's likely that this function was 1776 // called in a wrong way (e.g. Root is before Prev). 1777 assert(SeenMI2 && "Prev is expected to appear before Root"); 1778 } 1779 1780 // Tail / Mask policies 1781 if (RISCVII::hasVecPolicyOp(TSFlags) && 1782 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc))) 1783 return false; 1784 1785 // VL 1786 if (RISCVII::hasVLOp(TSFlags)) { 1787 unsigned OpIdx = RISCVII::getVLOpNum(Desc); 1788 const MachineOperand &Op1 = Root.getOperand(OpIdx); 1789 const MachineOperand &Op2 = Prev.getOperand(OpIdx); 1790 if (Op1.getType() != Op2.getType()) 1791 return false; 1792 switch (Op1.getType()) { 1793 case MachineOperand::MO_Register: 1794 if (Op1.getReg() != Op2.getReg()) 1795 return false; 1796 break; 1797 case MachineOperand::MO_Immediate: 1798 if (Op1.getImm() != Op2.getImm()) 1799 return false; 1800 break; 1801 default: 1802 llvm_unreachable("Unrecognized VL operand type"); 1803 } 1804 } 1805 1806 // Rounding modes 1807 if (RISCVII::hasRoundModeOp(TSFlags) && 1808 !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1)) 1809 return false; 1810 1811 return true; 1812 } 1813 1814 // Most of our RVV pseudos have passthru operand, so the real operands 1815 // start from index = 2. 1816 bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst, 1817 bool &Commuted) const { 1818 const MachineBasicBlock *MBB = Inst.getParent(); 1819 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1820 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) && 1821 "Expect the present of passthrough operand."); 1822 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); 1823 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg()); 1824 1825 // If only one operand has the same or inverse opcode and it's the second 1826 // source operand, the operands must be commuted. 1827 Commuted = !areRVVInstsReassociable(Inst, *MI1) && 1828 areRVVInstsReassociable(Inst, *MI2); 1829 if (Commuted) 1830 std::swap(MI1, MI2); 1831 1832 return areRVVInstsReassociable(Inst, *MI1) && 1833 (isVectorAssociativeAndCommutative(*MI1) || 1834 isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) && 1835 hasReassociableOperands(*MI1, MBB) && 1836 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); 1837 } 1838 1839 bool RISCVInstrInfo::hasReassociableOperands( 1840 const MachineInstr &Inst, const MachineBasicBlock *MBB) const { 1841 if (!isVectorAssociativeAndCommutative(Inst) && 1842 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) 1843 return TargetInstrInfo::hasReassociableOperands(Inst, MBB); 1844 1845 const MachineOperand &Op1 = Inst.getOperand(2); 1846 const MachineOperand &Op2 = Inst.getOperand(3); 1847 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1848 1849 // We need virtual register definitions for the operands that we will 1850 // reassociate. 1851 MachineInstr *MI1 = nullptr; 1852 MachineInstr *MI2 = nullptr; 1853 if (Op1.isReg() && Op1.getReg().isVirtual()) 1854 MI1 = MRI.getUniqueVRegDef(Op1.getReg()); 1855 if (Op2.isReg() && Op2.getReg().isVirtual()) 1856 MI2 = MRI.getUniqueVRegDef(Op2.getReg()); 1857 1858 // And at least one operand must be defined in MBB. 1859 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); 1860 } 1861 1862 void RISCVInstrInfo::getReassociateOperandIndices( 1863 const MachineInstr &Root, unsigned Pattern, 1864 std::array<unsigned, 5> &OperandIndices) const { 1865 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices); 1866 if (RISCV::getRVVMCOpcode(Root.getOpcode())) { 1867 // Skip the passthrough operand, so increment all indices by one. 1868 for (unsigned I = 0; I < 5; ++I) 1869 ++OperandIndices[I]; 1870 } 1871 } 1872 1873 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1874 bool &Commuted) const { 1875 if (isVectorAssociativeAndCommutative(Inst) || 1876 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) 1877 return hasReassociableVectorSibling(Inst, Commuted); 1878 1879 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1880 return false; 1881 1882 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1883 unsigned OperandIdx = Commuted ? 2 : 1; 1884 const MachineInstr &Sibling = 1885 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1886 1887 int16_t InstFrmOpIdx = 1888 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1889 int16_t SiblingFrmOpIdx = 1890 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1891 1892 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1893 RISCV::hasEqualFRM(Inst, Sibling); 1894 } 1895 1896 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1897 bool Invert) const { 1898 if (isVectorAssociativeAndCommutative(Inst, Invert)) 1899 return true; 1900 1901 unsigned Opc = Inst.getOpcode(); 1902 if (Invert) { 1903 auto InverseOpcode = getInverseOpcode(Opc); 1904 if (!InverseOpcode) 1905 return false; 1906 Opc = *InverseOpcode; 1907 } 1908 1909 if (isFADD(Opc) || isFMUL(Opc)) 1910 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 1911 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 1912 1913 switch (Opc) { 1914 default: 1915 return false; 1916 case RISCV::ADD: 1917 case RISCV::ADDW: 1918 case RISCV::AND: 1919 case RISCV::OR: 1920 case RISCV::XOR: 1921 // From RISC-V ISA spec, if both the high and low bits of the same product 1922 // are required, then the recommended code sequence is: 1923 // 1924 // MULH[[S]U] rdh, rs1, rs2 1925 // MUL rdl, rs1, rs2 1926 // (source register specifiers must be in same order and rdh cannot be the 1927 // same as rs1 or rs2) 1928 // 1929 // Microarchitectures can then fuse these into a single multiply operation 1930 // instead of performing two separate multiplies. 1931 // MachineCombiner may reassociate MUL operands and lose the fusion 1932 // opportunity. 1933 case RISCV::MUL: 1934 case RISCV::MULW: 1935 case RISCV::MIN: 1936 case RISCV::MINU: 1937 case RISCV::MAX: 1938 case RISCV::MAXU: 1939 case RISCV::FMIN_H: 1940 case RISCV::FMIN_S: 1941 case RISCV::FMIN_D: 1942 case RISCV::FMAX_H: 1943 case RISCV::FMAX_S: 1944 case RISCV::FMAX_D: 1945 return true; 1946 } 1947 1948 return false; 1949 } 1950 1951 std::optional<unsigned> 1952 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 1953 #define RVV_OPC_LMUL_CASE(OPC, INV) \ 1954 case RISCV::OPC##_M1: \ 1955 return RISCV::INV##_M1; \ 1956 case RISCV::OPC##_M2: \ 1957 return RISCV::INV##_M2; \ 1958 case RISCV::OPC##_M4: \ 1959 return RISCV::INV##_M4; \ 1960 case RISCV::OPC##_M8: \ 1961 return RISCV::INV##_M8; \ 1962 case RISCV::OPC##_MF2: \ 1963 return RISCV::INV##_MF2; \ 1964 case RISCV::OPC##_MF4: \ 1965 return RISCV::INV##_MF4; \ 1966 case RISCV::OPC##_MF8: \ 1967 return RISCV::INV##_MF8 1968 1969 #define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \ 1970 case RISCV::OPC##_M1_MASK: \ 1971 return RISCV::INV##_M1_MASK; \ 1972 case RISCV::OPC##_M2_MASK: \ 1973 return RISCV::INV##_M2_MASK; \ 1974 case RISCV::OPC##_M4_MASK: \ 1975 return RISCV::INV##_M4_MASK; \ 1976 case RISCV::OPC##_M8_MASK: \ 1977 return RISCV::INV##_M8_MASK; \ 1978 case RISCV::OPC##_MF2_MASK: \ 1979 return RISCV::INV##_MF2_MASK; \ 1980 case RISCV::OPC##_MF4_MASK: \ 1981 return RISCV::INV##_MF4_MASK; \ 1982 case RISCV::OPC##_MF8_MASK: \ 1983 return RISCV::INV##_MF8_MASK 1984 1985 switch (Opcode) { 1986 default: 1987 return std::nullopt; 1988 case RISCV::FADD_H: 1989 return RISCV::FSUB_H; 1990 case RISCV::FADD_S: 1991 return RISCV::FSUB_S; 1992 case RISCV::FADD_D: 1993 return RISCV::FSUB_D; 1994 case RISCV::FSUB_H: 1995 return RISCV::FADD_H; 1996 case RISCV::FSUB_S: 1997 return RISCV::FADD_S; 1998 case RISCV::FSUB_D: 1999 return RISCV::FADD_D; 2000 case RISCV::ADD: 2001 return RISCV::SUB; 2002 case RISCV::SUB: 2003 return RISCV::ADD; 2004 case RISCV::ADDW: 2005 return RISCV::SUBW; 2006 case RISCV::SUBW: 2007 return RISCV::ADDW; 2008 // clang-format off 2009 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV); 2010 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV); 2011 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV); 2012 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV); 2013 // clang-format on 2014 } 2015 2016 #undef RVV_OPC_LMUL_MASK_CASE 2017 #undef RVV_OPC_LMUL_CASE 2018 } 2019 2020 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 2021 const MachineOperand &MO, 2022 bool DoRegPressureReduce) { 2023 if (!MO.isReg() || !MO.getReg().isVirtual()) 2024 return false; 2025 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 2026 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 2027 if (!MI || !isFMUL(MI->getOpcode())) 2028 return false; 2029 2030 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 2031 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 2032 return false; 2033 2034 // Try combining even if fmul has more than one use as it eliminates 2035 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 2036 // for fmul operands, so reject the transformation in register pressure 2037 // reduction mode. 2038 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2039 return false; 2040 2041 // Do not combine instructions from different basic blocks. 2042 if (Root.getParent() != MI->getParent()) 2043 return false; 2044 return RISCV::hasEqualFRM(Root, *MI); 2045 } 2046 2047 static bool getFPFusedMultiplyPatterns(MachineInstr &Root, 2048 SmallVectorImpl<unsigned> &Patterns, 2049 bool DoRegPressureReduce) { 2050 unsigned Opc = Root.getOpcode(); 2051 bool IsFAdd = isFADD(Opc); 2052 if (!IsFAdd && !isFSUB(Opc)) 2053 return false; 2054 bool Added = false; 2055 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 2056 DoRegPressureReduce)) { 2057 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX 2058 : RISCVMachineCombinerPattern::FMSUB); 2059 Added = true; 2060 } 2061 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 2062 DoRegPressureReduce)) { 2063 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA 2064 : RISCVMachineCombinerPattern::FNMSUB); 2065 Added = true; 2066 } 2067 return Added; 2068 } 2069 2070 static bool getFPPatterns(MachineInstr &Root, 2071 SmallVectorImpl<unsigned> &Patterns, 2072 bool DoRegPressureReduce) { 2073 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 2074 } 2075 2076 /// Utility routine that checks if \param MO is defined by an 2077 /// \param CombineOpc instruction in the basic block \param MBB 2078 static const MachineInstr *canCombine(const MachineBasicBlock &MBB, 2079 const MachineOperand &MO, 2080 unsigned CombineOpc) { 2081 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2082 const MachineInstr *MI = nullptr; 2083 2084 if (MO.isReg() && MO.getReg().isVirtual()) 2085 MI = MRI.getUniqueVRegDef(MO.getReg()); 2086 // And it needs to be in the trace (otherwise, it won't have a depth). 2087 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc) 2088 return nullptr; 2089 // Must only used by the user we combine with. 2090 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2091 return nullptr; 2092 2093 return MI; 2094 } 2095 2096 /// Utility routine that checks if \param MO is defined by a SLLI in \param 2097 /// MBB that can be combined by splitting across 2 SHXADD instructions. The 2098 /// first SHXADD shift amount is given by \param OuterShiftAmt. 2099 static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB, 2100 const MachineOperand &MO, 2101 unsigned OuterShiftAmt) { 2102 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI); 2103 if (!ShiftMI) 2104 return false; 2105 2106 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); 2107 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3) 2108 return false; 2109 2110 return true; 2111 } 2112 2113 // Returns the shift amount from a SHXADD instruction. Returns 0 if the 2114 // instruction is not a SHXADD. 2115 static unsigned getSHXADDShiftAmount(unsigned Opc) { 2116 switch (Opc) { 2117 default: 2118 return 0; 2119 case RISCV::SH1ADD: 2120 return 1; 2121 case RISCV::SH2ADD: 2122 return 2; 2123 case RISCV::SH3ADD: 2124 return 3; 2125 } 2126 } 2127 2128 // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into 2129 // (sh3add (sh2add Y, Z), X). 2130 static bool getSHXADDPatterns(const MachineInstr &Root, 2131 SmallVectorImpl<unsigned> &Patterns) { 2132 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); 2133 if (!ShiftAmt) 2134 return false; 2135 2136 const MachineBasicBlock &MBB = *Root.getParent(); 2137 2138 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD); 2139 if (!AddMI) 2140 return false; 2141 2142 bool Found = false; 2143 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) { 2144 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1); 2145 Found = true; 2146 } 2147 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) { 2148 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2); 2149 Found = true; 2150 } 2151 2152 return Found; 2153 } 2154 2155 CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const { 2156 switch (Pattern) { 2157 case RISCVMachineCombinerPattern::FMADD_AX: 2158 case RISCVMachineCombinerPattern::FMADD_XA: 2159 case RISCVMachineCombinerPattern::FMSUB: 2160 case RISCVMachineCombinerPattern::FNMSUB: 2161 return CombinerObjective::MustReduceDepth; 2162 default: 2163 return TargetInstrInfo::getCombinerObjective(Pattern); 2164 } 2165 } 2166 2167 bool RISCVInstrInfo::getMachineCombinerPatterns( 2168 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns, 2169 bool DoRegPressureReduce) const { 2170 2171 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 2172 return true; 2173 2174 if (getSHXADDPatterns(Root, Patterns)) 2175 return true; 2176 2177 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 2178 DoRegPressureReduce); 2179 } 2180 2181 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) { 2182 switch (RootOpc) { 2183 default: 2184 llvm_unreachable("Unexpected opcode"); 2185 case RISCV::FADD_H: 2186 return RISCV::FMADD_H; 2187 case RISCV::FADD_S: 2188 return RISCV::FMADD_S; 2189 case RISCV::FADD_D: 2190 return RISCV::FMADD_D; 2191 case RISCV::FSUB_H: 2192 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 2193 : RISCV::FNMSUB_H; 2194 case RISCV::FSUB_S: 2195 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 2196 : RISCV::FNMSUB_S; 2197 case RISCV::FSUB_D: 2198 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 2199 : RISCV::FNMSUB_D; 2200 } 2201 } 2202 2203 static unsigned getAddendOperandIdx(unsigned Pattern) { 2204 switch (Pattern) { 2205 default: 2206 llvm_unreachable("Unexpected pattern"); 2207 case RISCVMachineCombinerPattern::FMADD_AX: 2208 case RISCVMachineCombinerPattern::FMSUB: 2209 return 2; 2210 case RISCVMachineCombinerPattern::FMADD_XA: 2211 case RISCVMachineCombinerPattern::FNMSUB: 2212 return 1; 2213 } 2214 } 2215 2216 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 2217 unsigned Pattern, 2218 SmallVectorImpl<MachineInstr *> &InsInstrs, 2219 SmallVectorImpl<MachineInstr *> &DelInstrs) { 2220 MachineFunction *MF = Root.getMF(); 2221 MachineRegisterInfo &MRI = MF->getRegInfo(); 2222 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 2223 2224 MachineOperand &Mul1 = Prev.getOperand(1); 2225 MachineOperand &Mul2 = Prev.getOperand(2); 2226 MachineOperand &Dst = Root.getOperand(0); 2227 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 2228 2229 Register DstReg = Dst.getReg(); 2230 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 2231 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 2232 DebugLoc MergedLoc = 2233 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 2234 2235 bool Mul1IsKill = Mul1.isKill(); 2236 bool Mul2IsKill = Mul2.isKill(); 2237 bool AddendIsKill = Addend.isKill(); 2238 2239 // We need to clear kill flags since we may be extending the live range past 2240 // a kill. If the mul had kill flags, we can preserve those since we know 2241 // where the previous range stopped. 2242 MRI.clearKillFlags(Mul1.getReg()); 2243 MRI.clearKillFlags(Mul2.getReg()); 2244 2245 MachineInstrBuilder MIB = 2246 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 2247 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 2248 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 2249 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 2250 .setMIFlags(IntersectedFlags); 2251 2252 InsInstrs.push_back(MIB); 2253 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 2254 DelInstrs.push_back(&Prev); 2255 DelInstrs.push_back(&Root); 2256 } 2257 2258 // Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to 2259 // (sh3add (sh2add Y, Z), X) if the shift amount can be split across two 2260 // shXadd instructions. The outer shXadd keeps its original opcode. 2261 static void 2262 genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, 2263 SmallVectorImpl<MachineInstr *> &InsInstrs, 2264 SmallVectorImpl<MachineInstr *> &DelInstrs, 2265 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { 2266 MachineFunction *MF = Root.getMF(); 2267 MachineRegisterInfo &MRI = MF->getRegInfo(); 2268 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 2269 2270 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); 2271 assert(OuterShiftAmt != 0 && "Unexpected opcode"); 2272 2273 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); 2274 MachineInstr *ShiftMI = 2275 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg()); 2276 2277 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); 2278 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount"); 2279 2280 unsigned InnerOpc; 2281 switch (InnerShiftAmt - OuterShiftAmt) { 2282 default: 2283 llvm_unreachable("Unexpected shift amount"); 2284 case 0: 2285 InnerOpc = RISCV::ADD; 2286 break; 2287 case 1: 2288 InnerOpc = RISCV::SH1ADD; 2289 break; 2290 case 2: 2291 InnerOpc = RISCV::SH2ADD; 2292 break; 2293 case 3: 2294 InnerOpc = RISCV::SH3ADD; 2295 break; 2296 } 2297 2298 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx); 2299 const MachineOperand &Y = ShiftMI->getOperand(1); 2300 const MachineOperand &Z = Root.getOperand(1); 2301 2302 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2303 2304 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR) 2305 .addReg(Y.getReg(), getKillRegState(Y.isKill())) 2306 .addReg(Z.getReg(), getKillRegState(Z.isKill())); 2307 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()), 2308 Root.getOperand(0).getReg()) 2309 .addReg(NewVR, RegState::Kill) 2310 .addReg(X.getReg(), getKillRegState(X.isKill())); 2311 2312 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2313 InsInstrs.push_back(MIB1); 2314 InsInstrs.push_back(MIB2); 2315 DelInstrs.push_back(ShiftMI); 2316 DelInstrs.push_back(AddMI); 2317 DelInstrs.push_back(&Root); 2318 } 2319 2320 void RISCVInstrInfo::genAlternativeCodeSequence( 2321 MachineInstr &Root, unsigned Pattern, 2322 SmallVectorImpl<MachineInstr *> &InsInstrs, 2323 SmallVectorImpl<MachineInstr *> &DelInstrs, 2324 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 2325 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 2326 switch (Pattern) { 2327 default: 2328 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 2329 DelInstrs, InstrIdxForVirtReg); 2330 return; 2331 case RISCVMachineCombinerPattern::FMADD_AX: 2332 case RISCVMachineCombinerPattern::FMSUB: { 2333 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 2334 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 2335 return; 2336 } 2337 case RISCVMachineCombinerPattern::FMADD_XA: 2338 case RISCVMachineCombinerPattern::FNMSUB: { 2339 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 2340 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 2341 return; 2342 } 2343 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1: 2344 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg); 2345 return; 2346 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2: 2347 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg); 2348 return; 2349 } 2350 } 2351 2352 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 2353 StringRef &ErrInfo) const { 2354 MCInstrDesc const &Desc = MI.getDesc(); 2355 2356 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 2357 unsigned OpType = Operand.OperandType; 2358 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 2359 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 2360 const MachineOperand &MO = MI.getOperand(Index); 2361 if (MO.isImm()) { 2362 int64_t Imm = MO.getImm(); 2363 bool Ok; 2364 switch (OpType) { 2365 default: 2366 llvm_unreachable("Unexpected operand type"); 2367 2368 // clang-format off 2369 #define CASE_OPERAND_UIMM(NUM) \ 2370 case RISCVOp::OPERAND_UIMM##NUM: \ 2371 Ok = isUInt<NUM>(Imm); \ 2372 break; 2373 CASE_OPERAND_UIMM(1) 2374 CASE_OPERAND_UIMM(2) 2375 CASE_OPERAND_UIMM(3) 2376 CASE_OPERAND_UIMM(4) 2377 CASE_OPERAND_UIMM(5) 2378 CASE_OPERAND_UIMM(6) 2379 CASE_OPERAND_UIMM(7) 2380 CASE_OPERAND_UIMM(8) 2381 CASE_OPERAND_UIMM(12) 2382 CASE_OPERAND_UIMM(20) 2383 // clang-format on 2384 case RISCVOp::OPERAND_UIMM2_LSB0: 2385 Ok = isShiftedUInt<1, 1>(Imm); 2386 break; 2387 case RISCVOp::OPERAND_UIMM5_LSB0: 2388 Ok = isShiftedUInt<4, 1>(Imm); 2389 break; 2390 case RISCVOp::OPERAND_UIMM6_LSB0: 2391 Ok = isShiftedUInt<5, 1>(Imm); 2392 break; 2393 case RISCVOp::OPERAND_UIMM7_LSB00: 2394 Ok = isShiftedUInt<5, 2>(Imm); 2395 break; 2396 case RISCVOp::OPERAND_UIMM8_LSB00: 2397 Ok = isShiftedUInt<6, 2>(Imm); 2398 break; 2399 case RISCVOp::OPERAND_UIMM8_LSB000: 2400 Ok = isShiftedUInt<5, 3>(Imm); 2401 break; 2402 case RISCVOp::OPERAND_UIMM8_GE32: 2403 Ok = isUInt<8>(Imm) && Imm >= 32; 2404 break; 2405 case RISCVOp::OPERAND_UIMM9_LSB000: 2406 Ok = isShiftedUInt<6, 3>(Imm); 2407 break; 2408 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 2409 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2410 break; 2411 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2412 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2413 break; 2414 case RISCVOp::OPERAND_ZERO: 2415 Ok = Imm == 0; 2416 break; 2417 case RISCVOp::OPERAND_SIMM5: 2418 Ok = isInt<5>(Imm); 2419 break; 2420 case RISCVOp::OPERAND_SIMM5_PLUS1: 2421 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2422 break; 2423 case RISCVOp::OPERAND_SIMM6: 2424 Ok = isInt<6>(Imm); 2425 break; 2426 case RISCVOp::OPERAND_SIMM6_NONZERO: 2427 Ok = Imm != 0 && isInt<6>(Imm); 2428 break; 2429 case RISCVOp::OPERAND_VTYPEI10: 2430 Ok = isUInt<10>(Imm); 2431 break; 2432 case RISCVOp::OPERAND_VTYPEI11: 2433 Ok = isUInt<11>(Imm); 2434 break; 2435 case RISCVOp::OPERAND_SIMM12: 2436 Ok = isInt<12>(Imm); 2437 break; 2438 case RISCVOp::OPERAND_SIMM12_LSB00000: 2439 Ok = isShiftedInt<7, 5>(Imm); 2440 break; 2441 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2442 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2443 break; 2444 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2445 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2446 Ok = Ok && Imm != 0; 2447 break; 2448 case RISCVOp::OPERAND_CLUI_IMM: 2449 Ok = (isUInt<5>(Imm) && Imm != 0) || 2450 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2451 break; 2452 case RISCVOp::OPERAND_RVKRNUM: 2453 Ok = Imm >= 0 && Imm <= 10; 2454 break; 2455 case RISCVOp::OPERAND_RVKRNUM_0_7: 2456 Ok = Imm >= 0 && Imm <= 7; 2457 break; 2458 case RISCVOp::OPERAND_RVKRNUM_1_10: 2459 Ok = Imm >= 1 && Imm <= 10; 2460 break; 2461 case RISCVOp::OPERAND_RVKRNUM_2_14: 2462 Ok = Imm >= 2 && Imm <= 14; 2463 break; 2464 case RISCVOp::OPERAND_SPIMM: 2465 Ok = (Imm & 0xf) == 0; 2466 break; 2467 } 2468 if (!Ok) { 2469 ErrInfo = "Invalid immediate"; 2470 return false; 2471 } 2472 } 2473 } 2474 } 2475 2476 const uint64_t TSFlags = Desc.TSFlags; 2477 if (RISCVII::hasVLOp(TSFlags)) { 2478 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2479 if (!Op.isImm() && !Op.isReg()) { 2480 ErrInfo = "Invalid operand type for VL operand"; 2481 return false; 2482 } 2483 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2484 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2485 auto *RC = MRI.getRegClass(Op.getReg()); 2486 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2487 ErrInfo = "Invalid register class for VL operand"; 2488 return false; 2489 } 2490 } 2491 if (!RISCVII::hasSEWOp(TSFlags)) { 2492 ErrInfo = "VL operand w/o SEW operand?"; 2493 return false; 2494 } 2495 } 2496 if (RISCVII::hasSEWOp(TSFlags)) { 2497 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2498 if (!MI.getOperand(OpIdx).isImm()) { 2499 ErrInfo = "SEW value expected to be an immediate"; 2500 return false; 2501 } 2502 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2503 if (Log2SEW > 31) { 2504 ErrInfo = "Unexpected SEW value"; 2505 return false; 2506 } 2507 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2508 if (!RISCVVType::isValidSEW(SEW)) { 2509 ErrInfo = "Unexpected SEW value"; 2510 return false; 2511 } 2512 } 2513 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2514 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2515 if (!MI.getOperand(OpIdx).isImm()) { 2516 ErrInfo = "Policy operand expected to be an immediate"; 2517 return false; 2518 } 2519 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2520 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2521 ErrInfo = "Invalid Policy Value"; 2522 return false; 2523 } 2524 if (!RISCVII::hasVLOp(TSFlags)) { 2525 ErrInfo = "policy operand w/o VL operand?"; 2526 return false; 2527 } 2528 2529 // VecPolicy operands can only exist on instructions with passthru/merge 2530 // arguments. Note that not all arguments with passthru have vec policy 2531 // operands- some instructions have implicit policies. 2532 unsigned UseOpIdx; 2533 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2534 ErrInfo = "policy operand w/o tied operand?"; 2535 return false; 2536 } 2537 } 2538 2539 return true; 2540 } 2541 2542 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2543 const MachineInstr &AddrI, 2544 ExtAddrMode &AM) const { 2545 switch (MemI.getOpcode()) { 2546 default: 2547 return false; 2548 case RISCV::LB: 2549 case RISCV::LBU: 2550 case RISCV::LH: 2551 case RISCV::LHU: 2552 case RISCV::LW: 2553 case RISCV::LWU: 2554 case RISCV::LD: 2555 case RISCV::FLH: 2556 case RISCV::FLW: 2557 case RISCV::FLD: 2558 case RISCV::SB: 2559 case RISCV::SH: 2560 case RISCV::SW: 2561 case RISCV::SD: 2562 case RISCV::FSH: 2563 case RISCV::FSW: 2564 case RISCV::FSD: 2565 break; 2566 } 2567 2568 if (MemI.getOperand(0).getReg() == Reg) 2569 return false; 2570 2571 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2572 !AddrI.getOperand(2).isImm()) 2573 return false; 2574 2575 int64_t OldOffset = MemI.getOperand(2).getImm(); 2576 int64_t Disp = AddrI.getOperand(2).getImm(); 2577 int64_t NewOffset = OldOffset + Disp; 2578 if (!STI.is64Bit()) 2579 NewOffset = SignExtend64<32>(NewOffset); 2580 2581 if (!isInt<12>(NewOffset)) 2582 return false; 2583 2584 AM.BaseReg = AddrI.getOperand(1).getReg(); 2585 AM.ScaledReg = 0; 2586 AM.Scale = 0; 2587 AM.Displacement = NewOffset; 2588 AM.Form = ExtAddrMode::Formula::Basic; 2589 return true; 2590 } 2591 2592 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2593 const ExtAddrMode &AM) const { 2594 2595 const DebugLoc &DL = MemI.getDebugLoc(); 2596 MachineBasicBlock &MBB = *MemI.getParent(); 2597 2598 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2599 "Addressing mode not supported for folding"); 2600 2601 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2602 .addReg(MemI.getOperand(0).getReg(), 2603 MemI.mayLoad() ? RegState::Define : 0) 2604 .addReg(AM.BaseReg) 2605 .addImm(AM.Displacement) 2606 .setMemRefs(MemI.memoperands()) 2607 .setMIFlags(MemI.getFlags()); 2608 } 2609 2610 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2611 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2612 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, 2613 const TargetRegisterInfo *TRI) const { 2614 if (!LdSt.mayLoadOrStore()) 2615 return false; 2616 2617 // Conservatively, only handle scalar loads/stores for now. 2618 switch (LdSt.getOpcode()) { 2619 case RISCV::LB: 2620 case RISCV::LBU: 2621 case RISCV::SB: 2622 case RISCV::LH: 2623 case RISCV::LHU: 2624 case RISCV::FLH: 2625 case RISCV::SH: 2626 case RISCV::FSH: 2627 case RISCV::LW: 2628 case RISCV::LWU: 2629 case RISCV::FLW: 2630 case RISCV::SW: 2631 case RISCV::FSW: 2632 case RISCV::LD: 2633 case RISCV::FLD: 2634 case RISCV::SD: 2635 case RISCV::FSD: 2636 break; 2637 default: 2638 return false; 2639 } 2640 const MachineOperand *BaseOp; 2641 OffsetIsScalable = false; 2642 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2643 return false; 2644 BaseOps.push_back(BaseOp); 2645 return true; 2646 } 2647 2648 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2649 // helper? 2650 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2651 ArrayRef<const MachineOperand *> BaseOps1, 2652 const MachineInstr &MI2, 2653 ArrayRef<const MachineOperand *> BaseOps2) { 2654 // Only examine the first "base" operand of each instruction, on the 2655 // assumption that it represents the real base address of the memory access. 2656 // Other operands are typically offsets or indices from this base address. 2657 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2658 return true; 2659 2660 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2661 return false; 2662 2663 auto MO1 = *MI1.memoperands_begin(); 2664 auto MO2 = *MI2.memoperands_begin(); 2665 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2666 return false; 2667 2668 auto Base1 = MO1->getValue(); 2669 auto Base2 = MO2->getValue(); 2670 if (!Base1 || !Base2) 2671 return false; 2672 Base1 = getUnderlyingObject(Base1); 2673 Base2 = getUnderlyingObject(Base2); 2674 2675 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2676 return false; 2677 2678 return Base1 == Base2; 2679 } 2680 2681 bool RISCVInstrInfo::shouldClusterMemOps( 2682 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2683 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2684 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2685 unsigned NumBytes) const { 2686 // If the mem ops (to be clustered) do not have the same base ptr, then they 2687 // should not be clustered 2688 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2689 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2690 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2691 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2692 return false; 2693 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2694 // If only one base op is empty, they do not have the same base ptr 2695 return false; 2696 } 2697 2698 unsigned CacheLineSize = 2699 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2700 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2701 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2702 // Cluster if the memory operations are on the same or a neighbouring cache 2703 // line, but limit the maximum ClusterSize to avoid creating too much 2704 // additional register pressure. 2705 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2706 } 2707 2708 // Set BaseReg (the base register operand), Offset (the byte offset being 2709 // accessed) and the access Width of the passed instruction that reads/writes 2710 // memory. Returns false if the instruction does not read/write memory or the 2711 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2712 // recognise base operands and offsets in all cases. 2713 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2714 // function) and set it as appropriate. 2715 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2716 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2717 LocationSize &Width, const TargetRegisterInfo *TRI) const { 2718 if (!LdSt.mayLoadOrStore()) 2719 return false; 2720 2721 // Here we assume the standard RISC-V ISA, which uses a base+offset 2722 // addressing mode. You'll need to relax these conditions to support custom 2723 // load/store instructions. 2724 if (LdSt.getNumExplicitOperands() != 3) 2725 return false; 2726 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2727 !LdSt.getOperand(2).isImm()) 2728 return false; 2729 2730 if (!LdSt.hasOneMemOperand()) 2731 return false; 2732 2733 Width = (*LdSt.memoperands_begin())->getSize(); 2734 BaseReg = &LdSt.getOperand(1); 2735 Offset = LdSt.getOperand(2).getImm(); 2736 return true; 2737 } 2738 2739 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2740 const MachineInstr &MIa, const MachineInstr &MIb) const { 2741 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2742 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2743 2744 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2745 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2746 return false; 2747 2748 // Retrieve the base register, offset from the base register and width. Width 2749 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2750 // base registers are identical, and the offset of a lower memory access + 2751 // the width doesn't overlap the offset of a higher memory access, 2752 // then the memory accesses are different. 2753 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2754 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2755 int64_t OffsetA = 0, OffsetB = 0; 2756 LocationSize WidthA = 0, WidthB = 0; 2757 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2758 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2759 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2760 int LowOffset = std::min(OffsetA, OffsetB); 2761 int HighOffset = std::max(OffsetA, OffsetB); 2762 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2763 if (LowWidth.hasValue() && 2764 LowOffset + (int)LowWidth.getValue() <= HighOffset) 2765 return true; 2766 } 2767 } 2768 return false; 2769 } 2770 2771 std::pair<unsigned, unsigned> 2772 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2773 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2774 return std::make_pair(TF & Mask, TF & ~Mask); 2775 } 2776 2777 ArrayRef<std::pair<unsigned, const char *>> 2778 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2779 using namespace RISCVII; 2780 static const std::pair<unsigned, const char *> TargetFlags[] = { 2781 {MO_CALL, "riscv-call"}, 2782 {MO_LO, "riscv-lo"}, 2783 {MO_HI, "riscv-hi"}, 2784 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2785 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2786 {MO_GOT_HI, "riscv-got-hi"}, 2787 {MO_TPREL_LO, "riscv-tprel-lo"}, 2788 {MO_TPREL_HI, "riscv-tprel-hi"}, 2789 {MO_TPREL_ADD, "riscv-tprel-add"}, 2790 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2791 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}, 2792 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"}, 2793 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"}, 2794 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"}, 2795 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}}; 2796 return ArrayRef(TargetFlags); 2797 } 2798 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2799 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2800 const Function &F = MF.getFunction(); 2801 2802 // Can F be deduplicated by the linker? If it can, don't outline from it. 2803 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2804 return false; 2805 2806 // Don't outline from functions with section markings; the program could 2807 // expect that all the code is in the named section. 2808 if (F.hasSection()) 2809 return false; 2810 2811 // It's safe to outline from MF. 2812 return true; 2813 } 2814 2815 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2816 unsigned &Flags) const { 2817 // More accurate safety checking is done in getOutliningCandidateInfo. 2818 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2819 } 2820 2821 // Enum values indicating how an outlined call should be constructed. 2822 enum MachineOutlinerConstructionID { 2823 MachineOutlinerDefault 2824 }; 2825 2826 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2827 MachineFunction &MF) const { 2828 return MF.getFunction().hasMinSize(); 2829 } 2830 2831 std::optional<outliner::OutlinedFunction> 2832 RISCVInstrInfo::getOutliningCandidateInfo( 2833 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 2834 2835 // First we need to filter out candidates where the X5 register (IE t0) can't 2836 // be used to setup the function call. 2837 auto CannotInsertCall = [](outliner::Candidate &C) { 2838 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 2839 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 2840 }; 2841 2842 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); 2843 2844 // If the sequence doesn't have enough candidates left, then we're done. 2845 if (RepeatedSequenceLocs.size() < 2) 2846 return std::nullopt; 2847 2848 unsigned SequenceSize = 0; 2849 2850 for (auto &MI : RepeatedSequenceLocs[0]) 2851 SequenceSize += getInstSizeInBytes(MI); 2852 2853 // call t0, function = 8 bytes. 2854 unsigned CallOverhead = 8; 2855 for (auto &C : RepeatedSequenceLocs) 2856 C.setCallInfo(MachineOutlinerDefault, CallOverhead); 2857 2858 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 2859 unsigned FrameOverhead = 4; 2860 if (RepeatedSequenceLocs[0] 2861 .getMF() 2862 ->getSubtarget<RISCVSubtarget>() 2863 .hasStdExtCOrZca()) 2864 FrameOverhead = 2; 2865 2866 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 2867 FrameOverhead, MachineOutlinerDefault); 2868 } 2869 2870 outliner::InstrType 2871 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, 2872 unsigned Flags) const { 2873 MachineInstr &MI = *MBBI; 2874 MachineBasicBlock *MBB = MI.getParent(); 2875 const TargetRegisterInfo *TRI = 2876 MBB->getParent()->getSubtarget().getRegisterInfo(); 2877 const auto &F = MI.getMF()->getFunction(); 2878 2879 // We can manually strip out CFI instructions later. 2880 if (MI.isCFIInstruction()) 2881 // If current function has exception handling code, we can't outline & 2882 // strip these CFI instructions since it may break .eh_frame section 2883 // needed in unwinding. 2884 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 2885 : outliner::InstrType::Invisible; 2886 2887 // We need support for tail calls to outlined functions before return 2888 // statements can be allowed. 2889 if (MI.isReturn()) 2890 return outliner::InstrType::Illegal; 2891 2892 // Don't allow modifying the X5 register which we use for return addresses for 2893 // these outlined functions. 2894 if (MI.modifiesRegister(RISCV::X5, TRI) || 2895 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) 2896 return outliner::InstrType::Illegal; 2897 2898 // Make sure the operands don't reference something unsafe. 2899 for (const auto &MO : MI.operands()) { 2900 2901 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 2902 // if any possible. 2903 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 2904 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 2905 F.hasSection() || F.getSectionPrefix())) 2906 return outliner::InstrType::Illegal; 2907 } 2908 2909 return outliner::InstrType::Legal; 2910 } 2911 2912 void RISCVInstrInfo::buildOutlinedFrame( 2913 MachineBasicBlock &MBB, MachineFunction &MF, 2914 const outliner::OutlinedFunction &OF) const { 2915 2916 // Strip out any CFI instructions 2917 bool Changed = true; 2918 while (Changed) { 2919 Changed = false; 2920 auto I = MBB.begin(); 2921 auto E = MBB.end(); 2922 for (; I != E; ++I) { 2923 if (I->isCFIInstruction()) { 2924 I->removeFromParent(); 2925 Changed = true; 2926 break; 2927 } 2928 } 2929 } 2930 2931 MBB.addLiveIn(RISCV::X5); 2932 2933 // Add in a return instruction to the end of the outlined frame. 2934 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 2935 .addReg(RISCV::X0, RegState::Define) 2936 .addReg(RISCV::X5) 2937 .addImm(0)); 2938 } 2939 2940 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 2941 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 2942 MachineFunction &MF, outliner::Candidate &C) const { 2943 2944 // Add in a call instruction to the outlined function at the given location. 2945 It = MBB.insert(It, 2946 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 2947 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 2948 RISCVII::MO_CALL)); 2949 return It; 2950 } 2951 2952 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 2953 Register Reg) const { 2954 // TODO: Handle cases where Reg is a super- or sub-register of the 2955 // destination register. 2956 const MachineOperand &Op0 = MI.getOperand(0); 2957 if (!Op0.isReg() || Reg != Op0.getReg()) 2958 return std::nullopt; 2959 2960 // Don't consider ADDIW as a candidate because the caller may not be aware 2961 // of its sign extension behaviour. 2962 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 2963 MI.getOperand(2).isImm()) 2964 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 2965 2966 return std::nullopt; 2967 } 2968 2969 // MIR printer helper function to annotate Operands with a comment. 2970 std::string RISCVInstrInfo::createMIROperandComment( 2971 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2972 const TargetRegisterInfo *TRI) const { 2973 // Print a generic comment for this operand if there is one. 2974 std::string GenericComment = 2975 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 2976 if (!GenericComment.empty()) 2977 return GenericComment; 2978 2979 // If not, we must have an immediate operand. 2980 if (!Op.isImm()) 2981 return std::string(); 2982 2983 std::string Comment; 2984 raw_string_ostream OS(Comment); 2985 2986 uint64_t TSFlags = MI.getDesc().TSFlags; 2987 2988 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 2989 // operand of vector codegen pseudos. 2990 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || 2991 MI.getOpcode() == RISCV::PseudoVSETVLI || 2992 MI.getOpcode() == RISCV::PseudoVSETIVLI || 2993 MI.getOpcode() == RISCV::PseudoVSETVLIX0) && 2994 OpIdx == 2) { 2995 unsigned Imm = MI.getOperand(OpIdx).getImm(); 2996 RISCVVType::printVType(Imm, OS); 2997 } else if (RISCVII::hasSEWOp(TSFlags) && 2998 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) { 2999 unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); 3000 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 3001 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3002 OS << "e" << SEW; 3003 } else if (RISCVII::hasVecPolicyOp(TSFlags) && 3004 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) { 3005 unsigned Policy = MI.getOperand(OpIdx).getImm(); 3006 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 3007 "Invalid Policy Value"); 3008 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 3009 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 3010 } 3011 3012 OS.flush(); 3013 return Comment; 3014 } 3015 3016 // clang-format off 3017 #define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \ 3018 RISCV::Pseudo##OP##_##LMUL 3019 3020 #define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \ 3021 RISCV::Pseudo##OP##_##LMUL##_MASK 3022 3023 #define CASE_RVV_OPCODE_LMUL(OP, LMUL) \ 3024 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \ 3025 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) 3026 3027 #define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \ 3028 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \ 3029 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \ 3030 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \ 3031 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \ 3032 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \ 3033 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4) 3034 3035 #define CASE_RVV_OPCODE_UNMASK(OP) \ 3036 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ 3037 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8) 3038 3039 #define CASE_RVV_OPCODE_MASK_WIDEN(OP) \ 3040 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \ 3041 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \ 3042 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \ 3043 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \ 3044 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \ 3045 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4) 3046 3047 #define CASE_RVV_OPCODE_MASK(OP) \ 3048 CASE_RVV_OPCODE_MASK_WIDEN(OP): \ 3049 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8) 3050 3051 #define CASE_RVV_OPCODE_WIDEN(OP) \ 3052 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ 3053 case CASE_RVV_OPCODE_MASK_WIDEN(OP) 3054 3055 #define CASE_RVV_OPCODE(OP) \ 3056 CASE_RVV_OPCODE_UNMASK(OP): \ 3057 case CASE_RVV_OPCODE_MASK(OP) 3058 // clang-format on 3059 3060 // clang-format off 3061 #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 3062 RISCV::PseudoV##OP##_##TYPE##_##LMUL 3063 3064 #define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \ 3065 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \ 3066 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \ 3067 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \ 3068 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8) 3069 3070 #define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \ 3071 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \ 3072 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) 3073 3074 #define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \ 3075 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \ 3076 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) 3077 3078 #define CASE_VMA_OPCODE_LMULS(OP, TYPE) \ 3079 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \ 3080 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) 3081 3082 // VFMA instructions are SEW specific. 3083 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \ 3084 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW 3085 3086 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \ 3087 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \ 3088 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \ 3089 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \ 3090 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW) 3091 3092 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \ 3093 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \ 3094 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) 3095 3096 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \ 3097 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \ 3098 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) 3099 3100 #define CASE_VFMA_OPCODE_VV(OP) \ 3101 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \ 3102 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \ 3103 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64) 3104 3105 #define CASE_VFMA_SPLATS(OP) \ 3106 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \ 3107 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \ 3108 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64) 3109 // clang-format on 3110 3111 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 3112 unsigned &SrcOpIdx1, 3113 unsigned &SrcOpIdx2) const { 3114 const MCInstrDesc &Desc = MI.getDesc(); 3115 if (!Desc.isCommutable()) 3116 return false; 3117 3118 switch (MI.getOpcode()) { 3119 case RISCV::TH_MVEQZ: 3120 case RISCV::TH_MVNEZ: 3121 // We can't commute operands if operand 2 (i.e., rs1 in 3122 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 3123 // not valid as the in/out-operand 1). 3124 if (MI.getOperand(2).getReg() == RISCV::X0) 3125 return false; 3126 // Operands 1 and 2 are commutable, if we switch the opcode. 3127 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 3128 case RISCV::TH_MULA: 3129 case RISCV::TH_MULAW: 3130 case RISCV::TH_MULAH: 3131 case RISCV::TH_MULS: 3132 case RISCV::TH_MULSW: 3133 case RISCV::TH_MULSH: 3134 // Operands 2 and 3 are commutable. 3135 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 3136 case RISCV::PseudoCCMOVGPRNoX0: 3137 case RISCV::PseudoCCMOVGPR: 3138 // Operands 4 and 5 are commutable. 3139 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 3140 case CASE_RVV_OPCODE(VADD_VV): 3141 case CASE_RVV_OPCODE(VAND_VV): 3142 case CASE_RVV_OPCODE(VOR_VV): 3143 case CASE_RVV_OPCODE(VXOR_VV): 3144 case CASE_RVV_OPCODE_MASK(VMSEQ_VV): 3145 case CASE_RVV_OPCODE_MASK(VMSNE_VV): 3146 case CASE_RVV_OPCODE(VMIN_VV): 3147 case CASE_RVV_OPCODE(VMINU_VV): 3148 case CASE_RVV_OPCODE(VMAX_VV): 3149 case CASE_RVV_OPCODE(VMAXU_VV): 3150 case CASE_RVV_OPCODE(VMUL_VV): 3151 case CASE_RVV_OPCODE(VMULH_VV): 3152 case CASE_RVV_OPCODE(VMULHU_VV): 3153 case CASE_RVV_OPCODE_WIDEN(VWADD_VV): 3154 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV): 3155 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV): 3156 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV): 3157 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV): 3158 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV): 3159 case CASE_RVV_OPCODE_UNMASK(VADC_VVM): 3160 case CASE_RVV_OPCODE(VSADD_VV): 3161 case CASE_RVV_OPCODE(VSADDU_VV): 3162 case CASE_RVV_OPCODE(VAADD_VV): 3163 case CASE_RVV_OPCODE(VAADDU_VV): 3164 case CASE_RVV_OPCODE(VSMUL_VV): 3165 // Operands 2 and 3 are commutable. 3166 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 3167 case CASE_VFMA_SPLATS(FMADD): 3168 case CASE_VFMA_SPLATS(FMSUB): 3169 case CASE_VFMA_SPLATS(FMACC): 3170 case CASE_VFMA_SPLATS(FMSAC): 3171 case CASE_VFMA_SPLATS(FNMADD): 3172 case CASE_VFMA_SPLATS(FNMSUB): 3173 case CASE_VFMA_SPLATS(FNMACC): 3174 case CASE_VFMA_SPLATS(FNMSAC): 3175 case CASE_VFMA_OPCODE_VV(FMACC): 3176 case CASE_VFMA_OPCODE_VV(FMSAC): 3177 case CASE_VFMA_OPCODE_VV(FNMACC): 3178 case CASE_VFMA_OPCODE_VV(FNMSAC): 3179 case CASE_VMA_OPCODE_LMULS(MADD, VX): 3180 case CASE_VMA_OPCODE_LMULS(NMSUB, VX): 3181 case CASE_VMA_OPCODE_LMULS(MACC, VX): 3182 case CASE_VMA_OPCODE_LMULS(NMSAC, VX): 3183 case CASE_VMA_OPCODE_LMULS(MACC, VV): 3184 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): { 3185 // If the tail policy is undisturbed we can't commute. 3186 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 3187 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 3188 return false; 3189 3190 // For these instructions we can only swap operand 1 and operand 3 by 3191 // changing the opcode. 3192 unsigned CommutableOpIdx1 = 1; 3193 unsigned CommutableOpIdx2 = 3; 3194 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 3195 CommutableOpIdx2)) 3196 return false; 3197 return true; 3198 } 3199 case CASE_VFMA_OPCODE_VV(FMADD): 3200 case CASE_VFMA_OPCODE_VV(FMSUB): 3201 case CASE_VFMA_OPCODE_VV(FNMADD): 3202 case CASE_VFMA_OPCODE_VV(FNMSUB): 3203 case CASE_VMA_OPCODE_LMULS(MADD, VV): 3204 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): { 3205 // If the tail policy is undisturbed we can't commute. 3206 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 3207 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 3208 return false; 3209 3210 // For these instructions we have more freedom. We can commute with the 3211 // other multiplicand or with the addend/subtrahend/minuend. 3212 3213 // Any fixed operand must be from source 1, 2 or 3. 3214 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 3215 return false; 3216 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 3217 return false; 3218 3219 // It both ops are fixed one must be the tied source. 3220 if (SrcOpIdx1 != CommuteAnyOperandIndex && 3221 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 3222 return false; 3223 3224 // Look for two different register operands assumed to be commutable 3225 // regardless of the FMA opcode. The FMA opcode is adjusted later if 3226 // needed. 3227 if (SrcOpIdx1 == CommuteAnyOperandIndex || 3228 SrcOpIdx2 == CommuteAnyOperandIndex) { 3229 // At least one of operands to be commuted is not specified and 3230 // this method is free to choose appropriate commutable operands. 3231 unsigned CommutableOpIdx1 = SrcOpIdx1; 3232 if (SrcOpIdx1 == SrcOpIdx2) { 3233 // Both of operands are not fixed. Set one of commutable 3234 // operands to the tied source. 3235 CommutableOpIdx1 = 1; 3236 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 3237 // Only one of the operands is not fixed. 3238 CommutableOpIdx1 = SrcOpIdx2; 3239 } 3240 3241 // CommutableOpIdx1 is well defined now. Let's choose another commutable 3242 // operand and assign its index to CommutableOpIdx2. 3243 unsigned CommutableOpIdx2; 3244 if (CommutableOpIdx1 != 1) { 3245 // If we haven't already used the tied source, we must use it now. 3246 CommutableOpIdx2 = 1; 3247 } else { 3248 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 3249 3250 // The commuted operands should have different registers. 3251 // Otherwise, the commute transformation does not change anything and 3252 // is useless. We use this as a hint to make our decision. 3253 if (Op1Reg != MI.getOperand(2).getReg()) 3254 CommutableOpIdx2 = 2; 3255 else 3256 CommutableOpIdx2 = 3; 3257 } 3258 3259 // Assign the found pair of commutable indices to SrcOpIdx1 and 3260 // SrcOpIdx2 to return those values. 3261 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 3262 CommutableOpIdx2)) 3263 return false; 3264 } 3265 3266 return true; 3267 } 3268 } 3269 3270 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 3271 } 3272 3273 // clang-format off 3274 #define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 3275 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 3276 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 3277 break; 3278 3279 #define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 3280 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 3281 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 3282 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 3283 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 3284 3285 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 3286 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 3287 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 3288 3289 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 3290 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 3291 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 3292 3293 #define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 3294 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 3295 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 3296 3297 #define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 3298 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 3299 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 3300 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 3301 3302 // VFMA depends on SEW. 3303 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \ 3304 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \ 3305 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \ 3306 break; 3307 3308 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \ 3309 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \ 3310 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \ 3311 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \ 3312 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW) 3313 3314 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \ 3315 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \ 3316 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) 3317 3318 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \ 3319 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \ 3320 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \ 3321 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64) 3322 3323 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \ 3324 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \ 3325 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) 3326 3327 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \ 3328 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \ 3329 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) 3330 3331 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 3332 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \ 3333 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \ 3334 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64) 3335 3336 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 3337 bool NewMI, 3338 unsigned OpIdx1, 3339 unsigned OpIdx2) const { 3340 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 3341 if (NewMI) 3342 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 3343 return MI; 3344 }; 3345 3346 switch (MI.getOpcode()) { 3347 case RISCV::TH_MVEQZ: 3348 case RISCV::TH_MVNEZ: { 3349 auto &WorkingMI = cloneIfNew(MI); 3350 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 3351 : RISCV::TH_MVEQZ)); 3352 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 3353 OpIdx2); 3354 } 3355 case RISCV::PseudoCCMOVGPRNoX0: 3356 case RISCV::PseudoCCMOVGPR: { 3357 // CCMOV can be commuted by inverting the condition. 3358 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 3359 CC = RISCVCC::getOppositeBranchCondition(CC); 3360 auto &WorkingMI = cloneIfNew(MI); 3361 WorkingMI.getOperand(3).setImm(CC); 3362 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 3363 OpIdx1, OpIdx2); 3364 } 3365 case CASE_VFMA_SPLATS(FMACC): 3366 case CASE_VFMA_SPLATS(FMADD): 3367 case CASE_VFMA_SPLATS(FMSAC): 3368 case CASE_VFMA_SPLATS(FMSUB): 3369 case CASE_VFMA_SPLATS(FNMACC): 3370 case CASE_VFMA_SPLATS(FNMADD): 3371 case CASE_VFMA_SPLATS(FNMSAC): 3372 case CASE_VFMA_SPLATS(FNMSUB): 3373 case CASE_VFMA_OPCODE_VV(FMACC): 3374 case CASE_VFMA_OPCODE_VV(FMSAC): 3375 case CASE_VFMA_OPCODE_VV(FNMACC): 3376 case CASE_VFMA_OPCODE_VV(FNMSAC): 3377 case CASE_VMA_OPCODE_LMULS(MADD, VX): 3378 case CASE_VMA_OPCODE_LMULS(NMSUB, VX): 3379 case CASE_VMA_OPCODE_LMULS(MACC, VX): 3380 case CASE_VMA_OPCODE_LMULS(NMSAC, VX): 3381 case CASE_VMA_OPCODE_LMULS(MACC, VV): 3382 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): { 3383 // It only make sense to toggle these between clobbering the 3384 // addend/subtrahend/minuend one of the multiplicands. 3385 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 3386 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 3387 unsigned Opc; 3388 switch (MI.getOpcode()) { 3389 default: 3390 llvm_unreachable("Unexpected opcode"); 3391 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 3392 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 3393 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 3394 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 3395 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 3396 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 3397 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 3398 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 3399 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD) 3400 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB) 3401 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD) 3402 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB) 3403 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 3404 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 3405 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 3406 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 3407 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 3408 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 3409 } 3410 3411 auto &WorkingMI = cloneIfNew(MI); 3412 WorkingMI.setDesc(get(Opc)); 3413 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 3414 OpIdx1, OpIdx2); 3415 } 3416 case CASE_VFMA_OPCODE_VV(FMADD): 3417 case CASE_VFMA_OPCODE_VV(FMSUB): 3418 case CASE_VFMA_OPCODE_VV(FNMADD): 3419 case CASE_VFMA_OPCODE_VV(FNMSUB): 3420 case CASE_VMA_OPCODE_LMULS(MADD, VV): 3421 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): { 3422 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 3423 // If one of the operands, is the addend we need to change opcode. 3424 // Otherwise we're just swapping 2 of the multiplicands. 3425 if (OpIdx1 == 3 || OpIdx2 == 3) { 3426 unsigned Opc; 3427 switch (MI.getOpcode()) { 3428 default: 3429 llvm_unreachable("Unexpected opcode"); 3430 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC) 3431 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC) 3432 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC) 3433 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC) 3434 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 3435 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 3436 } 3437 3438 auto &WorkingMI = cloneIfNew(MI); 3439 WorkingMI.setDesc(get(Opc)); 3440 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 3441 OpIdx1, OpIdx2); 3442 } 3443 // Let the default code handle it. 3444 break; 3445 } 3446 } 3447 3448 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 3449 } 3450 3451 #undef CASE_RVV_OPCODE_UNMASK_LMUL 3452 #undef CASE_RVV_OPCODE_MASK_LMUL 3453 #undef CASE_RVV_OPCODE_LMUL 3454 #undef CASE_RVV_OPCODE_UNMASK_WIDEN 3455 #undef CASE_RVV_OPCODE_UNMASK 3456 #undef CASE_RVV_OPCODE_MASK_WIDEN 3457 #undef CASE_RVV_OPCODE_MASK 3458 #undef CASE_RVV_OPCODE_WIDEN 3459 #undef CASE_RVV_OPCODE 3460 3461 #undef CASE_VMA_OPCODE_COMMON 3462 #undef CASE_VMA_OPCODE_LMULS_M1 3463 #undef CASE_VMA_OPCODE_LMULS_MF2 3464 #undef CASE_VMA_OPCODE_LMULS_MF4 3465 #undef CASE_VMA_OPCODE_LMULS 3466 #undef CASE_VFMA_OPCODE_COMMON 3467 #undef CASE_VFMA_OPCODE_LMULS_M1 3468 #undef CASE_VFMA_OPCODE_LMULS_MF2 3469 #undef CASE_VFMA_OPCODE_LMULS_MF4 3470 #undef CASE_VFMA_OPCODE_VV 3471 #undef CASE_VFMA_SPLATS 3472 3473 // clang-format off 3474 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 3475 RISCV::PseudoV##OP##_##LMUL##_TIED 3476 3477 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 3478 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 3479 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 3480 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 3481 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 3482 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 3483 3484 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 3485 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 3486 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 3487 3488 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 3489 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 3490 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 3491 break; 3492 3493 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 3494 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 3495 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 3496 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 3497 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 3498 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 3499 3500 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 3501 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 3502 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 3503 3504 // FP Widening Ops may by SEW aware. Create SEW aware cases for these cases. 3505 #define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \ 3506 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED 3507 3508 #define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \ 3509 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \ 3510 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \ 3511 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \ 3512 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \ 3513 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \ 3514 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \ 3515 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \ 3516 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \ 3517 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \ 3518 3519 #define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \ 3520 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \ 3521 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \ 3522 break; 3523 3524 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 3525 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \ 3526 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \ 3527 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \ 3528 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \ 3529 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \ 3530 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \ 3531 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \ 3532 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \ 3533 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \ 3534 3535 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 3536 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 3537 // clang-format on 3538 3539 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 3540 LiveVariables *LV, 3541 LiveIntervals *LIS) const { 3542 MachineInstrBuilder MIB; 3543 switch (MI.getOpcode()) { 3544 default: 3545 return nullptr; 3546 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 3547 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 3548 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 3549 MI.getNumExplicitOperands() == 7 && 3550 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 3551 // If the tail policy is undisturbed we can't convert. 3552 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 3553 1) == 0) 3554 return nullptr; 3555 // clang-format off 3556 unsigned NewOpc; 3557 switch (MI.getOpcode()) { 3558 default: 3559 llvm_unreachable("Unexpected opcode"); 3560 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 3561 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 3562 } 3563 // clang-format on 3564 3565 MachineBasicBlock &MBB = *MI.getParent(); 3566 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3567 .add(MI.getOperand(0)) 3568 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3569 .add(MI.getOperand(1)) 3570 .add(MI.getOperand(2)) 3571 .add(MI.getOperand(3)) 3572 .add(MI.getOperand(4)) 3573 .add(MI.getOperand(5)) 3574 .add(MI.getOperand(6)); 3575 break; 3576 } 3577 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 3578 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 3579 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 3580 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 3581 // If the tail policy is undisturbed we can't convert. 3582 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 3583 MI.getNumExplicitOperands() == 6); 3584 if ((MI.getOperand(5).getImm() & 1) == 0) 3585 return nullptr; 3586 3587 // clang-format off 3588 unsigned NewOpc; 3589 switch (MI.getOpcode()) { 3590 default: 3591 llvm_unreachable("Unexpected opcode"); 3592 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 3593 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 3594 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3595 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3596 } 3597 // clang-format on 3598 3599 MachineBasicBlock &MBB = *MI.getParent(); 3600 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3601 .add(MI.getOperand(0)) 3602 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3603 .add(MI.getOperand(1)) 3604 .add(MI.getOperand(2)) 3605 .add(MI.getOperand(3)) 3606 .add(MI.getOperand(4)) 3607 .add(MI.getOperand(5)); 3608 break; 3609 } 3610 } 3611 MIB.copyImplicitOps(MI); 3612 3613 if (LV) { 3614 unsigned NumOps = MI.getNumOperands(); 3615 for (unsigned I = 1; I < NumOps; ++I) { 3616 MachineOperand &Op = MI.getOperand(I); 3617 if (Op.isReg() && Op.isKill()) 3618 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3619 } 3620 } 3621 3622 if (LIS) { 3623 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3624 3625 if (MI.getOperand(0).isEarlyClobber()) { 3626 // Use operand 1 was tied to early-clobber def operand 0, so its live 3627 // interval could have ended at an early-clobber slot. Now they are not 3628 // tied we need to update it to the normal register slot. 3629 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3630 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3631 if (S->end == Idx.getRegSlot(true)) 3632 S->end = Idx.getRegSlot(); 3633 } 3634 } 3635 3636 return MIB; 3637 } 3638 3639 #undef CASE_WIDEOP_OPCODE_COMMON 3640 #undef CASE_WIDEOP_OPCODE_LMULS_MF4 3641 #undef CASE_WIDEOP_OPCODE_LMULS 3642 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3643 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4 3644 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3645 #undef CASE_FP_WIDEOP_OPCODE_COMMON 3646 #undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4 3647 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON 3648 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4 3649 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS 3650 3651 void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, 3652 MachineBasicBlock::iterator II, const DebugLoc &DL, 3653 Register DestReg, uint32_t Amount, 3654 MachineInstr::MIFlag Flag) const { 3655 MachineRegisterInfo &MRI = MF.getRegInfo(); 3656 if (llvm::has_single_bit<uint32_t>(Amount)) { 3657 uint32_t ShiftAmount = Log2_32(Amount); 3658 if (ShiftAmount == 0) 3659 return; 3660 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3661 .addReg(DestReg, RegState::Kill) 3662 .addImm(ShiftAmount) 3663 .setMIFlag(Flag); 3664 } else if (STI.hasStdExtZba() && 3665 ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || 3666 (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || 3667 (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { 3668 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3669 unsigned Opc; 3670 uint32_t ShiftAmount; 3671 if (Amount % 9 == 0) { 3672 Opc = RISCV::SH3ADD; 3673 ShiftAmount = Log2_64(Amount / 9); 3674 } else if (Amount % 5 == 0) { 3675 Opc = RISCV::SH2ADD; 3676 ShiftAmount = Log2_64(Amount / 5); 3677 } else if (Amount % 3 == 0) { 3678 Opc = RISCV::SH1ADD; 3679 ShiftAmount = Log2_64(Amount / 3); 3680 } else { 3681 llvm_unreachable("implied by if-clause"); 3682 } 3683 if (ShiftAmount) 3684 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3685 .addReg(DestReg, RegState::Kill) 3686 .addImm(ShiftAmount) 3687 .setMIFlag(Flag); 3688 BuildMI(MBB, II, DL, get(Opc), DestReg) 3689 .addReg(DestReg, RegState::Kill) 3690 .addReg(DestReg) 3691 .setMIFlag(Flag); 3692 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) { 3693 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3694 uint32_t ShiftAmount = Log2_32(Amount - 1); 3695 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3696 .addReg(DestReg) 3697 .addImm(ShiftAmount) 3698 .setMIFlag(Flag); 3699 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3700 .addReg(ScaledRegister, RegState::Kill) 3701 .addReg(DestReg, RegState::Kill) 3702 .setMIFlag(Flag); 3703 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) { 3704 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3705 uint32_t ShiftAmount = Log2_32(Amount + 1); 3706 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3707 .addReg(DestReg) 3708 .addImm(ShiftAmount) 3709 .setMIFlag(Flag); 3710 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3711 .addReg(ScaledRegister, RegState::Kill) 3712 .addReg(DestReg, RegState::Kill) 3713 .setMIFlag(Flag); 3714 } else if (STI.hasStdExtZmmul()) { 3715 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3716 movImm(MBB, II, DL, N, Amount, Flag); 3717 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3718 .addReg(DestReg, RegState::Kill) 3719 .addReg(N, RegState::Kill) 3720 .setMIFlag(Flag); 3721 } else { 3722 Register Acc; 3723 uint32_t PrevShiftAmount = 0; 3724 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) { 3725 if (Amount & (1U << ShiftAmount)) { 3726 if (ShiftAmount) 3727 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3728 .addReg(DestReg, RegState::Kill) 3729 .addImm(ShiftAmount - PrevShiftAmount) 3730 .setMIFlag(Flag); 3731 if (Amount >> (ShiftAmount + 1)) { 3732 // If we don't have an accmulator yet, create it and copy DestReg. 3733 if (!Acc) { 3734 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3735 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc) 3736 .addReg(DestReg) 3737 .setMIFlag(Flag); 3738 } else { 3739 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc) 3740 .addReg(Acc, RegState::Kill) 3741 .addReg(DestReg) 3742 .setMIFlag(Flag); 3743 } 3744 } 3745 PrevShiftAmount = ShiftAmount; 3746 } 3747 } 3748 assert(Acc && "Expected valid accumulator"); 3749 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3750 .addReg(DestReg, RegState::Kill) 3751 .addReg(Acc, RegState::Kill) 3752 .setMIFlag(Flag); 3753 } 3754 } 3755 3756 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3757 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3758 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3759 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3760 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3761 return ArrayRef(TargetFlags); 3762 } 3763 3764 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3765 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3766 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3767 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 3768 } 3769 3770 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 3771 bool RISCV::isZEXT_W(const MachineInstr &MI) { 3772 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 3773 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 3774 } 3775 3776 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 3777 bool RISCV::isZEXT_B(const MachineInstr &MI) { 3778 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 3779 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 3780 } 3781 3782 static bool isRVVWholeLoadStore(unsigned Opcode) { 3783 switch (Opcode) { 3784 default: 3785 return false; 3786 case RISCV::VS1R_V: 3787 case RISCV::VS2R_V: 3788 case RISCV::VS4R_V: 3789 case RISCV::VS8R_V: 3790 case RISCV::VL1RE8_V: 3791 case RISCV::VL2RE8_V: 3792 case RISCV::VL4RE8_V: 3793 case RISCV::VL8RE8_V: 3794 case RISCV::VL1RE16_V: 3795 case RISCV::VL2RE16_V: 3796 case RISCV::VL4RE16_V: 3797 case RISCV::VL8RE16_V: 3798 case RISCV::VL1RE32_V: 3799 case RISCV::VL2RE32_V: 3800 case RISCV::VL4RE32_V: 3801 case RISCV::VL8RE32_V: 3802 case RISCV::VL1RE64_V: 3803 case RISCV::VL2RE64_V: 3804 case RISCV::VL4RE64_V: 3805 case RISCV::VL8RE64_V: 3806 return true; 3807 } 3808 } 3809 3810 bool RISCV::isRVVSpill(const MachineInstr &MI) { 3811 // RVV lacks any support for immediate addressing for stack addresses, so be 3812 // conservative. 3813 unsigned Opcode = MI.getOpcode(); 3814 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 3815 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 3816 return false; 3817 return true; 3818 } 3819 3820 std::optional<std::pair<unsigned, unsigned>> 3821 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 3822 switch (Opcode) { 3823 default: 3824 return std::nullopt; 3825 case RISCV::PseudoVSPILL2_M1: 3826 case RISCV::PseudoVRELOAD2_M1: 3827 return std::make_pair(2u, 1u); 3828 case RISCV::PseudoVSPILL2_M2: 3829 case RISCV::PseudoVRELOAD2_M2: 3830 return std::make_pair(2u, 2u); 3831 case RISCV::PseudoVSPILL2_M4: 3832 case RISCV::PseudoVRELOAD2_M4: 3833 return std::make_pair(2u, 4u); 3834 case RISCV::PseudoVSPILL3_M1: 3835 case RISCV::PseudoVRELOAD3_M1: 3836 return std::make_pair(3u, 1u); 3837 case RISCV::PseudoVSPILL3_M2: 3838 case RISCV::PseudoVRELOAD3_M2: 3839 return std::make_pair(3u, 2u); 3840 case RISCV::PseudoVSPILL4_M1: 3841 case RISCV::PseudoVRELOAD4_M1: 3842 return std::make_pair(4u, 1u); 3843 case RISCV::PseudoVSPILL4_M2: 3844 case RISCV::PseudoVRELOAD4_M2: 3845 return std::make_pair(4u, 2u); 3846 case RISCV::PseudoVSPILL5_M1: 3847 case RISCV::PseudoVRELOAD5_M1: 3848 return std::make_pair(5u, 1u); 3849 case RISCV::PseudoVSPILL6_M1: 3850 case RISCV::PseudoVRELOAD6_M1: 3851 return std::make_pair(6u, 1u); 3852 case RISCV::PseudoVSPILL7_M1: 3853 case RISCV::PseudoVRELOAD7_M1: 3854 return std::make_pair(7u, 1u); 3855 case RISCV::PseudoVSPILL8_M1: 3856 case RISCV::PseudoVRELOAD8_M1: 3857 return std::make_pair(8u, 1u); 3858 } 3859 } 3860 3861 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 3862 return MI.getNumExplicitDefs() == 2 && 3863 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm(); 3864 } 3865 3866 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 3867 int16_t MI1FrmOpIdx = 3868 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 3869 int16_t MI2FrmOpIdx = 3870 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 3871 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 3872 return false; 3873 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 3874 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 3875 return FrmOp1.getImm() == FrmOp2.getImm(); 3876 } 3877 3878 std::optional<unsigned> 3879 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 3880 // TODO: Handle Zvbb instructions 3881 switch (Opcode) { 3882 default: 3883 return std::nullopt; 3884 3885 // 11.6. Vector Single-Width Shift Instructions 3886 case RISCV::VSLL_VX: 3887 case RISCV::VSRL_VX: 3888 case RISCV::VSRA_VX: 3889 // 12.4. Vector Single-Width Scaling Shift Instructions 3890 case RISCV::VSSRL_VX: 3891 case RISCV::VSSRA_VX: 3892 // Only the low lg2(SEW) bits of the shift-amount value are used. 3893 return Log2SEW; 3894 3895 // 11.7 Vector Narrowing Integer Right Shift Instructions 3896 case RISCV::VNSRL_WX: 3897 case RISCV::VNSRA_WX: 3898 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 3899 case RISCV::VNCLIPU_WX: 3900 case RISCV::VNCLIP_WX: 3901 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 3902 return Log2SEW + 1; 3903 3904 // 11.1. Vector Single-Width Integer Add and Subtract 3905 case RISCV::VADD_VX: 3906 case RISCV::VSUB_VX: 3907 case RISCV::VRSUB_VX: 3908 // 11.2. Vector Widening Integer Add/Subtract 3909 case RISCV::VWADDU_VX: 3910 case RISCV::VWSUBU_VX: 3911 case RISCV::VWADD_VX: 3912 case RISCV::VWSUB_VX: 3913 case RISCV::VWADDU_WX: 3914 case RISCV::VWSUBU_WX: 3915 case RISCV::VWADD_WX: 3916 case RISCV::VWSUB_WX: 3917 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 3918 case RISCV::VADC_VXM: 3919 case RISCV::VADC_VIM: 3920 case RISCV::VMADC_VXM: 3921 case RISCV::VMADC_VIM: 3922 case RISCV::VMADC_VX: 3923 case RISCV::VSBC_VXM: 3924 case RISCV::VMSBC_VXM: 3925 case RISCV::VMSBC_VX: 3926 // 11.5 Vector Bitwise Logical Instructions 3927 case RISCV::VAND_VX: 3928 case RISCV::VOR_VX: 3929 case RISCV::VXOR_VX: 3930 // 11.8. Vector Integer Compare Instructions 3931 case RISCV::VMSEQ_VX: 3932 case RISCV::VMSNE_VX: 3933 case RISCV::VMSLTU_VX: 3934 case RISCV::VMSLT_VX: 3935 case RISCV::VMSLEU_VX: 3936 case RISCV::VMSLE_VX: 3937 case RISCV::VMSGTU_VX: 3938 case RISCV::VMSGT_VX: 3939 // 11.9. Vector Integer Min/Max Instructions 3940 case RISCV::VMINU_VX: 3941 case RISCV::VMIN_VX: 3942 case RISCV::VMAXU_VX: 3943 case RISCV::VMAX_VX: 3944 // 11.10. Vector Single-Width Integer Multiply Instructions 3945 case RISCV::VMUL_VX: 3946 case RISCV::VMULH_VX: 3947 case RISCV::VMULHU_VX: 3948 case RISCV::VMULHSU_VX: 3949 // 11.11. Vector Integer Divide Instructions 3950 case RISCV::VDIVU_VX: 3951 case RISCV::VDIV_VX: 3952 case RISCV::VREMU_VX: 3953 case RISCV::VREM_VX: 3954 // 11.12. Vector Widening Integer Multiply Instructions 3955 case RISCV::VWMUL_VX: 3956 case RISCV::VWMULU_VX: 3957 case RISCV::VWMULSU_VX: 3958 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 3959 case RISCV::VMACC_VX: 3960 case RISCV::VNMSAC_VX: 3961 case RISCV::VMADD_VX: 3962 case RISCV::VNMSUB_VX: 3963 // 11.14. Vector Widening Integer Multiply-Add Instructions 3964 case RISCV::VWMACCU_VX: 3965 case RISCV::VWMACC_VX: 3966 case RISCV::VWMACCSU_VX: 3967 case RISCV::VWMACCUS_VX: 3968 // 11.15. Vector Integer Merge Instructions 3969 case RISCV::VMERGE_VXM: 3970 // 11.16. Vector Integer Move Instructions 3971 case RISCV::VMV_V_X: 3972 // 12.1. Vector Single-Width Saturating Add and Subtract 3973 case RISCV::VSADDU_VX: 3974 case RISCV::VSADD_VX: 3975 case RISCV::VSSUBU_VX: 3976 case RISCV::VSSUB_VX: 3977 // 12.2. Vector Single-Width Averaging Add and Subtract 3978 case RISCV::VAADDU_VX: 3979 case RISCV::VAADD_VX: 3980 case RISCV::VASUBU_VX: 3981 case RISCV::VASUB_VX: 3982 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 3983 case RISCV::VSMUL_VX: 3984 // 16.1. Integer Scalar Move Instructions 3985 case RISCV::VMV_S_X: 3986 return 1U << Log2SEW; 3987 } 3988 } 3989 3990 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 3991 const RISCVVPseudosTable::PseudoInfo *RVV = 3992 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 3993 if (!RVV) 3994 return 0; 3995 return RVV->BaseInstr; 3996 } 3997