1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVSubtarget.h" 18 #include "RISCVTargetMachine.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineTraceMetrics.h" 30 #include "llvm/CodeGen/RegisterScavenging.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/MC/MCInstBuilder.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/ErrorHandling.h" 36 37 using namespace llvm; 38 39 #define GEN_CHECK_COMPRESS_INSTR 40 #include "RISCVGenCompressInstEmitter.inc" 41 42 #define GET_INSTRINFO_CTOR_DTOR 43 #define GET_INSTRINFO_NAMED_OPS 44 #include "RISCVGenInstrInfo.inc" 45 46 static cl::opt<bool> PreferWholeRegisterMove( 47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 48 cl::desc("Prefer whole register move for vector registers.")); 49 50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 51 "riscv-force-machine-combiner-strategy", cl::Hidden, 52 cl::desc("Force machine combiner to use a specific strategy for machine " 53 "trace metrics evaluation."), 54 cl::init(MachineTraceStrategy::TS_NumStrategies), 55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 56 "Local strategy."), 57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 58 "MinInstrCount strategy."))); 59 60 namespace llvm::RISCVVPseudosTable { 61 62 using namespace RISCV; 63 64 #define GET_RISCVVPseudosTable_IMPL 65 #include "RISCVGenSearchableTables.inc" 66 67 } // namespace llvm::RISCVVPseudosTable 68 69 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 70 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 71 STI(STI) {} 72 73 MCInst RISCVInstrInfo::getNop() const { 74 if (STI.hasStdExtCOrZca()) 75 return MCInstBuilder(RISCV::C_NOP); 76 return MCInstBuilder(RISCV::ADDI) 77 .addReg(RISCV::X0) 78 .addReg(RISCV::X0) 79 .addImm(0); 80 } 81 82 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 83 int &FrameIndex) const { 84 unsigned Dummy; 85 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 86 } 87 88 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 89 int &FrameIndex, 90 unsigned &MemBytes) const { 91 switch (MI.getOpcode()) { 92 default: 93 return 0; 94 case RISCV::LB: 95 case RISCV::LBU: 96 MemBytes = 1; 97 break; 98 case RISCV::LH: 99 case RISCV::LHU: 100 case RISCV::FLH: 101 MemBytes = 2; 102 break; 103 case RISCV::LW: 104 case RISCV::FLW: 105 case RISCV::LWU: 106 MemBytes = 4; 107 break; 108 case RISCV::LD: 109 case RISCV::FLD: 110 MemBytes = 8; 111 break; 112 } 113 114 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 115 MI.getOperand(2).getImm() == 0) { 116 FrameIndex = MI.getOperand(1).getIndex(); 117 return MI.getOperand(0).getReg(); 118 } 119 120 return 0; 121 } 122 123 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 124 int &FrameIndex) const { 125 unsigned Dummy; 126 return isStoreToStackSlot(MI, FrameIndex, Dummy); 127 } 128 129 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 130 int &FrameIndex, 131 unsigned &MemBytes) const { 132 switch (MI.getOpcode()) { 133 default: 134 return 0; 135 case RISCV::SB: 136 MemBytes = 1; 137 break; 138 case RISCV::SH: 139 case RISCV::FSH: 140 MemBytes = 2; 141 break; 142 case RISCV::SW: 143 case RISCV::FSW: 144 MemBytes = 4; 145 break; 146 case RISCV::SD: 147 case RISCV::FSD: 148 MemBytes = 8; 149 break; 150 } 151 152 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 153 MI.getOperand(2).getImm() == 0) { 154 FrameIndex = MI.getOperand(1).getIndex(); 155 return MI.getOperand(0).getReg(); 156 } 157 158 return 0; 159 } 160 161 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 162 unsigned NumRegs) { 163 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 164 } 165 166 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 167 const MachineBasicBlock &MBB, 168 MachineBasicBlock::const_iterator MBBI, 169 MachineBasicBlock::const_iterator &DefMBBI, 170 RISCVII::VLMUL LMul) { 171 if (PreferWholeRegisterMove) 172 return false; 173 174 assert(MBBI->getOpcode() == TargetOpcode::COPY && 175 "Unexpected COPY instruction."); 176 Register SrcReg = MBBI->getOperand(1).getReg(); 177 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 178 179 bool FoundDef = false; 180 bool FirstVSetVLI = false; 181 unsigned FirstSEW = 0; 182 while (MBBI != MBB.begin()) { 183 --MBBI; 184 if (MBBI->isMetaInstruction()) 185 continue; 186 187 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 188 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 189 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 190 // There is a vsetvli between COPY and source define instruction. 191 // vy = def_vop ... (producing instruction) 192 // ... 193 // vsetvli 194 // ... 195 // vx = COPY vy 196 if (!FoundDef) { 197 if (!FirstVSetVLI) { 198 FirstVSetVLI = true; 199 unsigned FirstVType = MBBI->getOperand(2).getImm(); 200 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 201 FirstSEW = RISCVVType::getSEW(FirstVType); 202 // The first encountered vsetvli must have the same lmul as the 203 // register class of COPY. 204 if (FirstLMul != LMul) 205 return false; 206 } 207 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 208 // define instruction. 209 if (MBBI->getOperand(0).getReg() != RISCV::X0) 210 return false; 211 if (MBBI->getOperand(1).isImm()) 212 return false; 213 if (MBBI->getOperand(1).getReg() != RISCV::X0) 214 return false; 215 continue; 216 } 217 218 // MBBI is the first vsetvli before the producing instruction. 219 unsigned VType = MBBI->getOperand(2).getImm(); 220 // If there is a vsetvli between COPY and the producing instruction. 221 if (FirstVSetVLI) { 222 // If SEW is different, return false. 223 if (RISCVVType::getSEW(VType) != FirstSEW) 224 return false; 225 } 226 227 // If the vsetvli is tail undisturbed, keep the whole register move. 228 if (!RISCVVType::isTailAgnostic(VType)) 229 return false; 230 231 // The checking is conservative. We only have register classes for 232 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 233 // for fractional LMUL operations. However, we could not use the vsetvli 234 // lmul for widening operations. The result of widening operation is 235 // 2 x LMUL. 236 return LMul == RISCVVType::getVLMUL(VType); 237 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 238 return false; 239 } else if (MBBI->getNumDefs()) { 240 // Check all the instructions which will change VL. 241 // For example, vleff has implicit def VL. 242 if (MBBI->modifiesRegister(RISCV::VL)) 243 return false; 244 245 // Only converting whole register copies to vmv.v.v when the defining 246 // value appears in the explicit operands. 247 for (const MachineOperand &MO : MBBI->explicit_operands()) { 248 if (!MO.isReg() || !MO.isDef()) 249 continue; 250 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 251 // We only permit the source of COPY has the same LMUL as the defined 252 // operand. 253 // There are cases we need to keep the whole register copy if the LMUL 254 // is different. 255 // For example, 256 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 257 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 258 // # The COPY may be created by vlmul_trunc intrinsic. 259 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 260 // 261 // After widening, the valid value will be 4 x e32 elements. If we 262 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 263 // FIXME: The COPY of subregister of Zvlsseg register will not be able 264 // to convert to vmv.v.[v|i] under the constraint. 265 if (MO.getReg() != SrcReg) 266 return false; 267 268 // In widening reduction instructions with LMUL_1 input vector case, 269 // only checking the LMUL is insufficient due to reduction result is 270 // always LMUL_1. 271 // For example, 272 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 273 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 274 // $v26 = COPY killed renamable $v8 275 // After widening, The valid value will be 1 x e16 elements. If we 276 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 277 uint64_t TSFlags = MBBI->getDesc().TSFlags; 278 if (RISCVII::isRVVWideningReduction(TSFlags)) 279 return false; 280 281 // If the producing instruction does not depend on vsetvli, do not 282 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 283 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 284 return false; 285 286 // Found the definition. 287 FoundDef = true; 288 DefMBBI = MBBI; 289 break; 290 } 291 } 292 } 293 } 294 295 return false; 296 } 297 298 void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, 299 MachineBasicBlock::iterator MBBI, 300 const DebugLoc &DL, MCRegister DstReg, 301 MCRegister SrcReg, bool KillSrc, 302 unsigned Opc, unsigned NF) const { 303 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 304 305 RISCVII::VLMUL LMul; 306 unsigned SubRegIdx; 307 unsigned VVOpc, VIOpc; 308 switch (Opc) { 309 default: 310 llvm_unreachable("Impossible LMUL for vector register copy."); 311 case RISCV::VMV1R_V: 312 LMul = RISCVII::LMUL_1; 313 SubRegIdx = RISCV::sub_vrm1_0; 314 VVOpc = RISCV::PseudoVMV_V_V_M1; 315 VIOpc = RISCV::PseudoVMV_V_I_M1; 316 break; 317 case RISCV::VMV2R_V: 318 LMul = RISCVII::LMUL_2; 319 SubRegIdx = RISCV::sub_vrm2_0; 320 VVOpc = RISCV::PseudoVMV_V_V_M2; 321 VIOpc = RISCV::PseudoVMV_V_I_M2; 322 break; 323 case RISCV::VMV4R_V: 324 LMul = RISCVII::LMUL_4; 325 SubRegIdx = RISCV::sub_vrm4_0; 326 VVOpc = RISCV::PseudoVMV_V_V_M4; 327 VIOpc = RISCV::PseudoVMV_V_I_M4; 328 break; 329 case RISCV::VMV8R_V: 330 assert(NF == 1); 331 LMul = RISCVII::LMUL_8; 332 SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. 333 VVOpc = RISCV::PseudoVMV_V_V_M8; 334 VIOpc = RISCV::PseudoVMV_V_I_M8; 335 break; 336 } 337 338 bool UseVMV_V_V = false; 339 bool UseVMV_V_I = false; 340 MachineBasicBlock::const_iterator DefMBBI; 341 if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 342 UseVMV_V_V = true; 343 Opc = VVOpc; 344 345 if (DefMBBI->getOpcode() == VIOpc) { 346 UseVMV_V_I = true; 347 Opc = VIOpc; 348 } 349 } 350 351 if (NF == 1) { 352 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); 353 if (UseVMV_V_V) 354 MIB.addReg(DstReg, RegState::Undef); 355 if (UseVMV_V_I) 356 MIB = MIB.add(DefMBBI->getOperand(2)); 357 else 358 MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); 359 if (UseVMV_V_V) { 360 const MCInstrDesc &Desc = DefMBBI->getDesc(); 361 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 362 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 363 MIB.addImm(0); // tu, mu 364 MIB.addReg(RISCV::VL, RegState::Implicit); 365 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 366 } 367 return; 368 } 369 370 int I = 0, End = NF, Incr = 1; 371 unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); 372 unsigned DstEncoding = TRI->getEncodingValue(DstReg); 373 unsigned LMulVal; 374 bool Fractional; 375 std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); 376 assert(!Fractional && "It is impossible be fractional lmul here."); 377 if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { 378 I = NF - 1; 379 End = -1; 380 Incr = -1; 381 } 382 383 for (; I != End; I += Incr) { 384 auto MIB = 385 BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); 386 if (UseVMV_V_V) 387 MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); 388 if (UseVMV_V_I) 389 MIB = MIB.add(DefMBBI->getOperand(2)); 390 else 391 MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), 392 getKillRegState(KillSrc)); 393 if (UseVMV_V_V) { 394 const MCInstrDesc &Desc = DefMBBI->getDesc(); 395 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 396 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 397 MIB.addImm(0); // tu, mu 398 MIB.addReg(RISCV::VL, RegState::Implicit); 399 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 400 } 401 } 402 } 403 404 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 405 MachineBasicBlock::iterator MBBI, 406 const DebugLoc &DL, MCRegister DstReg, 407 MCRegister SrcReg, bool KillSrc) const { 408 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 409 410 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 411 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 412 .addReg(SrcReg, getKillRegState(KillSrc)) 413 .addImm(0); 414 return; 415 } 416 417 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { 418 // Emit an ADDI for both parts of GPRPair. 419 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 420 TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) 421 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), 422 getKillRegState(KillSrc)) 423 .addImm(0); 424 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 425 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) 426 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), 427 getKillRegState(KillSrc)) 428 .addImm(0); 429 return; 430 } 431 432 // Handle copy from csr 433 if (RISCV::VCSRRegClass.contains(SrcReg) && 434 RISCV::GPRRegClass.contains(DstReg)) { 435 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 436 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 437 .addReg(RISCV::X0); 438 return; 439 } 440 441 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 442 unsigned Opc; 443 if (STI.hasStdExtZfh()) { 444 Opc = RISCV::FSGNJ_H; 445 } else { 446 assert(STI.hasStdExtF() && 447 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 448 "Unexpected extensions"); 449 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 450 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 451 &RISCV::FPR32RegClass); 452 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 453 &RISCV::FPR32RegClass); 454 Opc = RISCV::FSGNJ_S; 455 } 456 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 457 .addReg(SrcReg, getKillRegState(KillSrc)) 458 .addReg(SrcReg, getKillRegState(KillSrc)); 459 return; 460 } 461 462 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 463 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 464 .addReg(SrcReg, getKillRegState(KillSrc)) 465 .addReg(SrcReg, getKillRegState(KillSrc)); 466 return; 467 } 468 469 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 470 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 471 .addReg(SrcReg, getKillRegState(KillSrc)) 472 .addReg(SrcReg, getKillRegState(KillSrc)); 473 return; 474 } 475 476 if (RISCV::FPR32RegClass.contains(DstReg) && 477 RISCV::GPRRegClass.contains(SrcReg)) { 478 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 479 .addReg(SrcReg, getKillRegState(KillSrc)); 480 return; 481 } 482 483 if (RISCV::GPRRegClass.contains(DstReg) && 484 RISCV::FPR32RegClass.contains(SrcReg)) { 485 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 486 .addReg(SrcReg, getKillRegState(KillSrc)); 487 return; 488 } 489 490 if (RISCV::FPR64RegClass.contains(DstReg) && 491 RISCV::GPRRegClass.contains(SrcReg)) { 492 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 493 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 494 .addReg(SrcReg, getKillRegState(KillSrc)); 495 return; 496 } 497 498 if (RISCV::GPRRegClass.contains(DstReg) && 499 RISCV::FPR64RegClass.contains(SrcReg)) { 500 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 501 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 502 .addReg(SrcReg, getKillRegState(KillSrc)); 503 return; 504 } 505 506 // VR->VR copies. 507 if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { 508 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); 509 return; 510 } 511 512 if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { 513 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V); 514 return; 515 } 516 517 if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { 518 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V); 519 return; 520 } 521 522 if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { 523 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V); 524 return; 525 } 526 527 if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { 528 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 529 /*NF=*/2); 530 return; 531 } 532 533 if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { 534 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 535 /*NF=*/2); 536 return; 537 } 538 539 if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { 540 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V, 541 /*NF=*/2); 542 return; 543 } 544 545 if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { 546 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 547 /*NF=*/3); 548 return; 549 } 550 551 if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { 552 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 553 /*NF=*/3); 554 return; 555 } 556 557 if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { 558 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 559 /*NF=*/4); 560 return; 561 } 562 563 if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { 564 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 565 /*NF=*/4); 566 return; 567 } 568 569 if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { 570 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 571 /*NF=*/5); 572 return; 573 } 574 575 if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { 576 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 577 /*NF=*/6); 578 return; 579 } 580 581 if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { 582 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 583 /*NF=*/7); 584 return; 585 } 586 587 if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { 588 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 589 /*NF=*/8); 590 return; 591 } 592 593 llvm_unreachable("Impossible reg-to-reg copy"); 594 } 595 596 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 597 MachineBasicBlock::iterator I, 598 Register SrcReg, bool IsKill, int FI, 599 const TargetRegisterClass *RC, 600 const TargetRegisterInfo *TRI, 601 Register VReg) const { 602 MachineFunction *MF = MBB.getParent(); 603 MachineFrameInfo &MFI = MF->getFrameInfo(); 604 605 unsigned Opcode; 606 bool IsScalableVector = true; 607 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 608 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 609 RISCV::SW : RISCV::SD; 610 IsScalableVector = false; 611 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 612 Opcode = RISCV::PseudoRV32ZdinxSD; 613 IsScalableVector = false; 614 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 615 Opcode = RISCV::FSH; 616 IsScalableVector = false; 617 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 618 Opcode = RISCV::FSW; 619 IsScalableVector = false; 620 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 621 Opcode = RISCV::FSD; 622 IsScalableVector = false; 623 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 624 Opcode = RISCV::VS1R_V; 625 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 626 Opcode = RISCV::VS2R_V; 627 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 628 Opcode = RISCV::VS4R_V; 629 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 630 Opcode = RISCV::VS8R_V; 631 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 632 Opcode = RISCV::PseudoVSPILL2_M1; 633 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 634 Opcode = RISCV::PseudoVSPILL2_M2; 635 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 636 Opcode = RISCV::PseudoVSPILL2_M4; 637 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 638 Opcode = RISCV::PseudoVSPILL3_M1; 639 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 640 Opcode = RISCV::PseudoVSPILL3_M2; 641 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 642 Opcode = RISCV::PseudoVSPILL4_M1; 643 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 644 Opcode = RISCV::PseudoVSPILL4_M2; 645 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 646 Opcode = RISCV::PseudoVSPILL5_M1; 647 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 648 Opcode = RISCV::PseudoVSPILL6_M1; 649 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 650 Opcode = RISCV::PseudoVSPILL7_M1; 651 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 652 Opcode = RISCV::PseudoVSPILL8_M1; 653 else 654 llvm_unreachable("Can't store this register to stack slot"); 655 656 if (IsScalableVector) { 657 MachineMemOperand *MMO = MF->getMachineMemOperand( 658 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 659 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 660 661 MFI.setStackID(FI, TargetStackID::ScalableVector); 662 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 663 .addReg(SrcReg, getKillRegState(IsKill)) 664 .addFrameIndex(FI) 665 .addMemOperand(MMO); 666 } else { 667 MachineMemOperand *MMO = MF->getMachineMemOperand( 668 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 669 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 670 671 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 672 .addReg(SrcReg, getKillRegState(IsKill)) 673 .addFrameIndex(FI) 674 .addImm(0) 675 .addMemOperand(MMO); 676 } 677 } 678 679 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 680 MachineBasicBlock::iterator I, 681 Register DstReg, int FI, 682 const TargetRegisterClass *RC, 683 const TargetRegisterInfo *TRI, 684 Register VReg) const { 685 MachineFunction *MF = MBB.getParent(); 686 MachineFrameInfo &MFI = MF->getFrameInfo(); 687 688 unsigned Opcode; 689 bool IsScalableVector = true; 690 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 691 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 692 RISCV::LW : RISCV::LD; 693 IsScalableVector = false; 694 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 695 Opcode = RISCV::PseudoRV32ZdinxLD; 696 IsScalableVector = false; 697 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 698 Opcode = RISCV::FLH; 699 IsScalableVector = false; 700 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 701 Opcode = RISCV::FLW; 702 IsScalableVector = false; 703 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 704 Opcode = RISCV::FLD; 705 IsScalableVector = false; 706 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 707 Opcode = RISCV::VL1RE8_V; 708 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 709 Opcode = RISCV::VL2RE8_V; 710 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 711 Opcode = RISCV::VL4RE8_V; 712 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 713 Opcode = RISCV::VL8RE8_V; 714 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 715 Opcode = RISCV::PseudoVRELOAD2_M1; 716 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 717 Opcode = RISCV::PseudoVRELOAD2_M2; 718 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 719 Opcode = RISCV::PseudoVRELOAD2_M4; 720 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 721 Opcode = RISCV::PseudoVRELOAD3_M1; 722 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 723 Opcode = RISCV::PseudoVRELOAD3_M2; 724 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 725 Opcode = RISCV::PseudoVRELOAD4_M1; 726 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 727 Opcode = RISCV::PseudoVRELOAD4_M2; 728 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 729 Opcode = RISCV::PseudoVRELOAD5_M1; 730 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 731 Opcode = RISCV::PseudoVRELOAD6_M1; 732 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 733 Opcode = RISCV::PseudoVRELOAD7_M1; 734 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 735 Opcode = RISCV::PseudoVRELOAD8_M1; 736 else 737 llvm_unreachable("Can't load this register from stack slot"); 738 739 if (IsScalableVector) { 740 MachineMemOperand *MMO = MF->getMachineMemOperand( 741 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 742 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 743 744 MFI.setStackID(FI, TargetStackID::ScalableVector); 745 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 746 .addFrameIndex(FI) 747 .addMemOperand(MMO); 748 } else { 749 MachineMemOperand *MMO = MF->getMachineMemOperand( 750 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 751 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 752 753 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 754 .addFrameIndex(FI) 755 .addImm(0) 756 .addMemOperand(MMO); 757 } 758 } 759 760 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 761 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 762 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 763 VirtRegMap *VRM) const { 764 const MachineFrameInfo &MFI = MF.getFrameInfo(); 765 766 // The below optimizations narrow the load so they are only valid for little 767 // endian. 768 // TODO: Support big endian by adding an offset into the frame object? 769 if (MF.getDataLayout().isBigEndian()) 770 return nullptr; 771 772 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 773 if (Ops.size() != 1 || Ops[0] != 1) 774 return nullptr; 775 776 unsigned LoadOpc; 777 switch (MI.getOpcode()) { 778 default: 779 if (RISCV::isSEXT_W(MI)) { 780 LoadOpc = RISCV::LW; 781 break; 782 } 783 if (RISCV::isZEXT_W(MI)) { 784 LoadOpc = RISCV::LWU; 785 break; 786 } 787 if (RISCV::isZEXT_B(MI)) { 788 LoadOpc = RISCV::LBU; 789 break; 790 } 791 return nullptr; 792 case RISCV::SEXT_H: 793 LoadOpc = RISCV::LH; 794 break; 795 case RISCV::SEXT_B: 796 LoadOpc = RISCV::LB; 797 break; 798 case RISCV::ZEXT_H_RV32: 799 case RISCV::ZEXT_H_RV64: 800 LoadOpc = RISCV::LHU; 801 break; 802 } 803 804 MachineMemOperand *MMO = MF.getMachineMemOperand( 805 MachinePointerInfo::getFixedStack(MF, FrameIndex), 806 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), 807 MFI.getObjectAlign(FrameIndex)); 808 809 Register DstReg = MI.getOperand(0).getReg(); 810 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 811 DstReg) 812 .addFrameIndex(FrameIndex) 813 .addImm(0) 814 .addMemOperand(MMO); 815 } 816 817 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 818 MachineBasicBlock::iterator MBBI, 819 const DebugLoc &DL, Register DstReg, uint64_t Val, 820 MachineInstr::MIFlag Flag, bool DstRenamable, 821 bool DstIsDead) const { 822 Register SrcReg = RISCV::X0; 823 824 if (!STI.is64Bit() && !isInt<32>(Val)) 825 report_fatal_error("Should only materialize 32-bit constants for RV32"); 826 827 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 828 assert(!Seq.empty()); 829 830 bool SrcRenamable = false; 831 unsigned Num = 0; 832 833 for (const RISCVMatInt::Inst &Inst : Seq) { 834 bool LastItem = ++Num == Seq.size(); 835 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 836 getRenamableRegState(DstRenamable); 837 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 838 getRenamableRegState(SrcRenamable); 839 switch (Inst.getOpndKind()) { 840 case RISCVMatInt::Imm: 841 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 842 .addReg(DstReg, RegState::Define | DstRegState) 843 .addImm(Inst.getImm()) 844 .setMIFlag(Flag); 845 break; 846 case RISCVMatInt::RegX0: 847 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 848 .addReg(DstReg, RegState::Define | DstRegState) 849 .addReg(SrcReg, SrcRegState) 850 .addReg(RISCV::X0) 851 .setMIFlag(Flag); 852 break; 853 case RISCVMatInt::RegReg: 854 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 855 .addReg(DstReg, RegState::Define | DstRegState) 856 .addReg(SrcReg, SrcRegState) 857 .addReg(SrcReg, SrcRegState) 858 .setMIFlag(Flag); 859 break; 860 case RISCVMatInt::RegImm: 861 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 862 .addReg(DstReg, RegState::Define | DstRegState) 863 .addReg(SrcReg, SrcRegState) 864 .addImm(Inst.getImm()) 865 .setMIFlag(Flag); 866 break; 867 } 868 869 // Only the first instruction has X0 as its source. 870 SrcReg = DstReg; 871 SrcRenamable = DstRenamable; 872 } 873 } 874 875 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 876 switch (Opc) { 877 default: 878 return RISCVCC::COND_INVALID; 879 case RISCV::BEQ: 880 return RISCVCC::COND_EQ; 881 case RISCV::BNE: 882 return RISCVCC::COND_NE; 883 case RISCV::BLT: 884 return RISCVCC::COND_LT; 885 case RISCV::BGE: 886 return RISCVCC::COND_GE; 887 case RISCV::BLTU: 888 return RISCVCC::COND_LTU; 889 case RISCV::BGEU: 890 return RISCVCC::COND_GEU; 891 } 892 } 893 894 // The contents of values added to Cond are not examined outside of 895 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 896 // push BranchOpcode, Reg1, Reg2. 897 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 898 SmallVectorImpl<MachineOperand> &Cond) { 899 // Block ends with fall-through condbranch. 900 assert(LastInst.getDesc().isConditionalBranch() && 901 "Unknown conditional branch"); 902 Target = LastInst.getOperand(2).getMBB(); 903 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 904 Cond.push_back(MachineOperand::CreateImm(CC)); 905 Cond.push_back(LastInst.getOperand(0)); 906 Cond.push_back(LastInst.getOperand(1)); 907 } 908 909 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) { 910 switch (CC) { 911 default: 912 llvm_unreachable("Unknown condition code!"); 913 case RISCVCC::COND_EQ: 914 return RISCV::BEQ; 915 case RISCVCC::COND_NE: 916 return RISCV::BNE; 917 case RISCVCC::COND_LT: 918 return RISCV::BLT; 919 case RISCVCC::COND_GE: 920 return RISCV::BGE; 921 case RISCVCC::COND_LTU: 922 return RISCV::BLTU; 923 case RISCVCC::COND_GEU: 924 return RISCV::BGEU; 925 } 926 } 927 928 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { 929 return get(RISCVCC::getBrCond(CC)); 930 } 931 932 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 933 switch (CC) { 934 default: 935 llvm_unreachable("Unrecognized conditional branch"); 936 case RISCVCC::COND_EQ: 937 return RISCVCC::COND_NE; 938 case RISCVCC::COND_NE: 939 return RISCVCC::COND_EQ; 940 case RISCVCC::COND_LT: 941 return RISCVCC::COND_GE; 942 case RISCVCC::COND_GE: 943 return RISCVCC::COND_LT; 944 case RISCVCC::COND_LTU: 945 return RISCVCC::COND_GEU; 946 case RISCVCC::COND_GEU: 947 return RISCVCC::COND_LTU; 948 } 949 } 950 951 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 952 MachineBasicBlock *&TBB, 953 MachineBasicBlock *&FBB, 954 SmallVectorImpl<MachineOperand> &Cond, 955 bool AllowModify) const { 956 TBB = FBB = nullptr; 957 Cond.clear(); 958 959 // If the block has no terminators, it just falls into the block after it. 960 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 961 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 962 return false; 963 964 // Count the number of terminators and find the first unconditional or 965 // indirect branch. 966 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 967 int NumTerminators = 0; 968 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 969 J++) { 970 NumTerminators++; 971 if (J->getDesc().isUnconditionalBranch() || 972 J->getDesc().isIndirectBranch()) { 973 FirstUncondOrIndirectBr = J.getReverse(); 974 } 975 } 976 977 // If AllowModify is true, we can erase any terminators after 978 // FirstUncondOrIndirectBR. 979 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 980 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 981 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 982 NumTerminators--; 983 } 984 I = FirstUncondOrIndirectBr; 985 } 986 987 // We can't handle blocks that end in an indirect branch. 988 if (I->getDesc().isIndirectBranch()) 989 return true; 990 991 // We can't handle Generic branch opcodes from Global ISel. 992 if (I->isPreISelOpcode()) 993 return true; 994 995 // We can't handle blocks with more than 2 terminators. 996 if (NumTerminators > 2) 997 return true; 998 999 // Handle a single unconditional branch. 1000 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 1001 TBB = getBranchDestBlock(*I); 1002 return false; 1003 } 1004 1005 // Handle a single conditional branch. 1006 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 1007 parseCondBranch(*I, TBB, Cond); 1008 return false; 1009 } 1010 1011 // Handle a conditional branch followed by an unconditional branch. 1012 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 1013 I->getDesc().isUnconditionalBranch()) { 1014 parseCondBranch(*std::prev(I), TBB, Cond); 1015 FBB = getBranchDestBlock(*I); 1016 return false; 1017 } 1018 1019 // Otherwise, we can't handle this. 1020 return true; 1021 } 1022 1023 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 1024 int *BytesRemoved) const { 1025 if (BytesRemoved) 1026 *BytesRemoved = 0; 1027 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1028 if (I == MBB.end()) 1029 return 0; 1030 1031 if (!I->getDesc().isUnconditionalBranch() && 1032 !I->getDesc().isConditionalBranch()) 1033 return 0; 1034 1035 // Remove the branch. 1036 if (BytesRemoved) 1037 *BytesRemoved += getInstSizeInBytes(*I); 1038 I->eraseFromParent(); 1039 1040 I = MBB.end(); 1041 1042 if (I == MBB.begin()) 1043 return 1; 1044 --I; 1045 if (!I->getDesc().isConditionalBranch()) 1046 return 1; 1047 1048 // Remove the branch. 1049 if (BytesRemoved) 1050 *BytesRemoved += getInstSizeInBytes(*I); 1051 I->eraseFromParent(); 1052 return 2; 1053 } 1054 1055 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1056 // the number of instructions inserted. 1057 unsigned RISCVInstrInfo::insertBranch( 1058 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1059 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1060 if (BytesAdded) 1061 *BytesAdded = 0; 1062 1063 // Shouldn't be a fall through. 1064 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1065 assert((Cond.size() == 3 || Cond.size() == 0) && 1066 "RISC-V branch conditions have two components!"); 1067 1068 // Unconditional branch. 1069 if (Cond.empty()) { 1070 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1071 if (BytesAdded) 1072 *BytesAdded += getInstSizeInBytes(MI); 1073 return 1; 1074 } 1075 1076 // Either a one or two-way conditional branch. 1077 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1078 MachineInstr &CondMI = 1079 *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB); 1080 if (BytesAdded) 1081 *BytesAdded += getInstSizeInBytes(CondMI); 1082 1083 // One-way conditional branch. 1084 if (!FBB) 1085 return 1; 1086 1087 // Two-way conditional branch. 1088 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1089 if (BytesAdded) 1090 *BytesAdded += getInstSizeInBytes(MI); 1091 return 2; 1092 } 1093 1094 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1095 MachineBasicBlock &DestBB, 1096 MachineBasicBlock &RestoreBB, 1097 const DebugLoc &DL, int64_t BrOffset, 1098 RegScavenger *RS) const { 1099 assert(RS && "RegScavenger required for long branching"); 1100 assert(MBB.empty() && 1101 "new block should be inserted for expanding unconditional branch"); 1102 assert(MBB.pred_size() == 1); 1103 assert(RestoreBB.empty() && 1104 "restore block should be inserted for restoring clobbered registers"); 1105 1106 MachineFunction *MF = MBB.getParent(); 1107 MachineRegisterInfo &MRI = MF->getRegInfo(); 1108 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1109 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1110 1111 if (!isInt<32>(BrOffset)) 1112 report_fatal_error( 1113 "Branch offsets outside of the signed 32-bit range not supported"); 1114 1115 // FIXME: A virtual register must be used initially, as the register 1116 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1117 // uses the same workaround). 1118 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 1119 auto II = MBB.end(); 1120 // We may also update the jump target to RestoreBB later. 1121 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1122 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1123 .addMBB(&DestBB, RISCVII::MO_CALL); 1124 1125 RS->enterBasicBlockEnd(MBB); 1126 Register TmpGPR = 1127 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1128 /*RestoreAfter=*/false, /*SpAdj=*/0, 1129 /*AllowSpill=*/false); 1130 if (TmpGPR != RISCV::NoRegister) 1131 RS->setRegUsed(TmpGPR); 1132 else { 1133 // The case when there is no scavenged register needs special handling. 1134 1135 // Pick s11 because it doesn't make a difference. 1136 TmpGPR = RISCV::X27; 1137 1138 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1139 if (FrameIndex == -1) 1140 report_fatal_error("underestimated function size"); 1141 1142 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1143 &RISCV::GPRRegClass, TRI, Register()); 1144 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1145 /*SpAdj=*/0, /*FIOperandNum=*/1); 1146 1147 MI.getOperand(1).setMBB(&RestoreBB); 1148 1149 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1150 &RISCV::GPRRegClass, TRI, Register()); 1151 TRI->eliminateFrameIndex(RestoreBB.back(), 1152 /*SpAdj=*/0, /*FIOperandNum=*/1); 1153 } 1154 1155 MRI.replaceRegWith(ScratchReg, TmpGPR); 1156 MRI.clearVirtRegs(); 1157 } 1158 1159 bool RISCVInstrInfo::reverseBranchCondition( 1160 SmallVectorImpl<MachineOperand> &Cond) const { 1161 assert((Cond.size() == 3) && "Invalid branch condition!"); 1162 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1163 Cond[0].setImm(getOppositeBranchCondition(CC)); 1164 return false; 1165 } 1166 1167 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1168 MachineBasicBlock *MBB = MI.getParent(); 1169 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1170 1171 MachineBasicBlock *TBB, *FBB; 1172 SmallVector<MachineOperand, 3> Cond; 1173 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1174 return false; 1175 (void)FBB; 1176 1177 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1178 assert(CC != RISCVCC::COND_INVALID); 1179 1180 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1181 return false; 1182 1183 // For two constants C0 and C1 from 1184 // ``` 1185 // li Y, C0 1186 // li Z, C1 1187 // ``` 1188 // 1. if C1 = C0 + 1 1189 // we can turn: 1190 // (a) blt Y, X -> bge X, Z 1191 // (b) bge Y, X -> blt X, Z 1192 // 1193 // 2. if C1 = C0 - 1 1194 // we can turn: 1195 // (a) blt X, Y -> bge Z, X 1196 // (b) bge X, Y -> blt Z, X 1197 // 1198 // To make sure this optimization is really beneficial, we only 1199 // optimize for cases where Y had only one use (i.e. only used by the branch). 1200 1201 // Right now we only care about LI (i.e. ADDI x0, imm) 1202 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1203 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1204 MI->getOperand(1).getReg() == RISCV::X0) { 1205 Imm = MI->getOperand(2).getImm(); 1206 return true; 1207 } 1208 return false; 1209 }; 1210 // Either a load from immediate instruction or X0. 1211 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1212 if (!Op.isReg()) 1213 return false; 1214 Register Reg = Op.getReg(); 1215 if (Reg == RISCV::X0) { 1216 Imm = 0; 1217 return true; 1218 } 1219 if (!Reg.isVirtual()) 1220 return false; 1221 return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm); 1222 }; 1223 1224 MachineOperand &LHS = MI.getOperand(0); 1225 MachineOperand &RHS = MI.getOperand(1); 1226 // Try to find the register for constant Z; return 1227 // invalid register otherwise. 1228 auto searchConst = [&](int64_t C1) -> Register { 1229 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1230 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1231 int64_t Imm; 1232 return isLoadImm(&I, Imm) && Imm == C1 && 1233 I.getOperand(0).getReg().isVirtual(); 1234 }); 1235 if (DefC1 != E) 1236 return DefC1->getOperand(0).getReg(); 1237 1238 return Register(); 1239 }; 1240 1241 bool Modify = false; 1242 int64_t C0; 1243 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1244 // Might be case 1. 1245 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1246 // to worry about unsigned overflow here) 1247 if (C0 < INT64_MAX) 1248 if (Register RegZ = searchConst(C0 + 1)) { 1249 reverseBranchCondition(Cond); 1250 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1251 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1252 // We might extend the live range of Z, clear its kill flag to 1253 // account for this. 1254 MRI.clearKillFlags(RegZ); 1255 Modify = true; 1256 } 1257 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1258 // Might be case 2. 1259 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1260 // when C0 is zero. 1261 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1262 if (Register RegZ = searchConst(C0 - 1)) { 1263 reverseBranchCondition(Cond); 1264 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1265 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1266 // We might extend the live range of Z, clear its kill flag to 1267 // account for this. 1268 MRI.clearKillFlags(RegZ); 1269 Modify = true; 1270 } 1271 } 1272 1273 if (!Modify) 1274 return false; 1275 1276 // Build the new branch and remove the old one. 1277 BuildMI(*MBB, MI, MI.getDebugLoc(), 1278 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1279 .add(Cond[1]) 1280 .add(Cond[2]) 1281 .addMBB(TBB); 1282 MI.eraseFromParent(); 1283 1284 return true; 1285 } 1286 1287 MachineBasicBlock * 1288 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1289 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1290 // The branch target is always the last operand. 1291 int NumOp = MI.getNumExplicitOperands(); 1292 return MI.getOperand(NumOp - 1).getMBB(); 1293 } 1294 1295 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1296 int64_t BrOffset) const { 1297 unsigned XLen = STI.getXLen(); 1298 // Ideally we could determine the supported branch offset from the 1299 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1300 // PseudoBR. 1301 switch (BranchOp) { 1302 default: 1303 llvm_unreachable("Unexpected opcode!"); 1304 case RISCV::BEQ: 1305 case RISCV::BNE: 1306 case RISCV::BLT: 1307 case RISCV::BGE: 1308 case RISCV::BLTU: 1309 case RISCV::BGEU: 1310 return isIntN(13, BrOffset); 1311 case RISCV::JAL: 1312 case RISCV::PseudoBR: 1313 return isIntN(21, BrOffset); 1314 case RISCV::PseudoJump: 1315 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1316 } 1317 } 1318 1319 // If the operation has a predicated pseudo instruction, return the pseudo 1320 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1321 // TODO: Support more operations. 1322 unsigned getPredicatedOpcode(unsigned Opcode) { 1323 switch (Opcode) { 1324 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1325 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1326 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1327 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1328 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1329 case RISCV::AND: return RISCV::PseudoCCAND; break; 1330 case RISCV::OR: return RISCV::PseudoCCOR; break; 1331 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1332 1333 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1334 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1335 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1336 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1337 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1338 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1339 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1340 1341 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1342 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1343 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1344 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1345 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1346 1347 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1348 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1349 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1350 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1351 1352 case RISCV::ANDN: return RISCV::PseudoCCANDN; break; 1353 case RISCV::ORN: return RISCV::PseudoCCORN; break; 1354 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; 1355 } 1356 1357 return RISCV::INSTRUCTION_LIST_END; 1358 } 1359 1360 /// Identify instructions that can be folded into a CCMOV instruction, and 1361 /// return the defining instruction. 1362 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1363 const MachineRegisterInfo &MRI, 1364 const TargetInstrInfo *TII) { 1365 if (!Reg.isVirtual()) 1366 return nullptr; 1367 if (!MRI.hasOneNonDBGUse(Reg)) 1368 return nullptr; 1369 MachineInstr *MI = MRI.getVRegDef(Reg); 1370 if (!MI) 1371 return nullptr; 1372 // Check if MI can be predicated and folded into the CCMOV. 1373 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1374 return nullptr; 1375 // Don't predicate li idiom. 1376 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1377 MI->getOperand(1).getReg() == RISCV::X0) 1378 return nullptr; 1379 // Check if MI has any other defs or physreg uses. 1380 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1381 // Reject frame index operands, PEI can't handle the predicated pseudos. 1382 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1383 return nullptr; 1384 if (!MO.isReg()) 1385 continue; 1386 // MI can't have any tied operands, that would conflict with predication. 1387 if (MO.isTied()) 1388 return nullptr; 1389 if (MO.isDef()) 1390 return nullptr; 1391 // Allow constant physregs. 1392 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1393 return nullptr; 1394 } 1395 bool DontMoveAcrossStores = true; 1396 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1397 return nullptr; 1398 return MI; 1399 } 1400 1401 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1402 SmallVectorImpl<MachineOperand> &Cond, 1403 unsigned &TrueOp, unsigned &FalseOp, 1404 bool &Optimizable) const { 1405 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1406 "Unknown select instruction"); 1407 // CCMOV operands: 1408 // 0: Def. 1409 // 1: LHS of compare. 1410 // 2: RHS of compare. 1411 // 3: Condition code. 1412 // 4: False use. 1413 // 5: True use. 1414 TrueOp = 5; 1415 FalseOp = 4; 1416 Cond.push_back(MI.getOperand(1)); 1417 Cond.push_back(MI.getOperand(2)); 1418 Cond.push_back(MI.getOperand(3)); 1419 // We can only fold when we support short forward branch opt. 1420 Optimizable = STI.hasShortForwardBranchOpt(); 1421 return false; 1422 } 1423 1424 MachineInstr * 1425 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1426 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1427 bool PreferFalse) const { 1428 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1429 "Unknown select instruction"); 1430 if (!STI.hasShortForwardBranchOpt()) 1431 return nullptr; 1432 1433 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1434 MachineInstr *DefMI = 1435 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1436 bool Invert = !DefMI; 1437 if (!DefMI) 1438 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1439 if (!DefMI) 1440 return nullptr; 1441 1442 // Find new register class to use. 1443 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1444 Register DestReg = MI.getOperand(0).getReg(); 1445 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1446 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1447 return nullptr; 1448 1449 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1450 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1451 1452 // Create a new predicated version of DefMI. 1453 MachineInstrBuilder NewMI = 1454 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1455 1456 // Copy the condition portion. 1457 NewMI.add(MI.getOperand(1)); 1458 NewMI.add(MI.getOperand(2)); 1459 1460 // Add condition code, inverting if necessary. 1461 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1462 if (Invert) 1463 CC = RISCVCC::getOppositeBranchCondition(CC); 1464 NewMI.addImm(CC); 1465 1466 // Copy the false register. 1467 NewMI.add(FalseReg); 1468 1469 // Copy all the DefMI operands. 1470 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1471 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1472 NewMI.add(DefMI->getOperand(i)); 1473 1474 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1475 SeenMIs.insert(NewMI); 1476 SeenMIs.erase(DefMI); 1477 1478 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1479 // DefMI would be invalid when tranferred inside the loop. Checking for a 1480 // loop is expensive, but at least remove kill flags if they are in different 1481 // BBs. 1482 if (DefMI->getParent() != MI.getParent()) 1483 NewMI->clearKillInfo(); 1484 1485 // The caller will erase MI, but not DefMI. 1486 DefMI->eraseFromParent(); 1487 return NewMI; 1488 } 1489 1490 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1491 if (MI.isMetaInstruction()) 1492 return 0; 1493 1494 unsigned Opcode = MI.getOpcode(); 1495 1496 if (Opcode == TargetOpcode::INLINEASM || 1497 Opcode == TargetOpcode::INLINEASM_BR) { 1498 const MachineFunction &MF = *MI.getParent()->getParent(); 1499 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget()); 1500 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1501 *TM.getMCAsmInfo()); 1502 } 1503 1504 if (!MI.memoperands_empty()) { 1505 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1506 const MachineFunction &MF = *MI.getParent()->getParent(); 1507 const auto &ST = MF.getSubtarget<RISCVSubtarget>(); 1508 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1509 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) { 1510 if (isCompressibleInst(MI, STI)) 1511 return 4; // c.ntl.all + c.load/c.store 1512 return 6; // c.ntl.all + load/store 1513 } 1514 return 8; // ntl.all + load/store 1515 } 1516 } 1517 1518 if (Opcode == TargetOpcode::BUNDLE) 1519 return getInstBundleLength(MI); 1520 1521 if (MI.getParent() && MI.getParent()->getParent()) { 1522 if (isCompressibleInst(MI, STI)) 1523 return 2; 1524 } 1525 1526 switch (Opcode) { 1527 case TargetOpcode::STACKMAP: 1528 // The upper bound for a stackmap intrinsic is the full length of its shadow 1529 return StackMapOpers(&MI).getNumPatchBytes(); 1530 case TargetOpcode::PATCHPOINT: 1531 // The size of the patchpoint intrinsic is the number of bytes requested 1532 return PatchPointOpers(&MI).getNumPatchBytes(); 1533 case TargetOpcode::STATEPOINT: 1534 // The size of the statepoint intrinsic is the number of bytes requested 1535 return StatepointOpers(&MI).getNumPatchBytes(); 1536 default: 1537 return get(Opcode).getSize(); 1538 } 1539 } 1540 1541 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1542 unsigned Size = 0; 1543 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1544 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1545 while (++I != E && I->isInsideBundle()) { 1546 assert(!I->isBundle() && "No nested bundle!"); 1547 Size += getInstSizeInBytes(*I); 1548 } 1549 return Size; 1550 } 1551 1552 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1553 const unsigned Opcode = MI.getOpcode(); 1554 switch (Opcode) { 1555 default: 1556 break; 1557 case RISCV::FSGNJ_D: 1558 case RISCV::FSGNJ_S: 1559 case RISCV::FSGNJ_H: 1560 case RISCV::FSGNJ_D_INX: 1561 case RISCV::FSGNJ_D_IN32X: 1562 case RISCV::FSGNJ_S_INX: 1563 case RISCV::FSGNJ_H_INX: 1564 // The canonical floating-point move is fsgnj rd, rs, rs. 1565 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1566 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1567 case RISCV::ADDI: 1568 case RISCV::ORI: 1569 case RISCV::XORI: 1570 return (MI.getOperand(1).isReg() && 1571 MI.getOperand(1).getReg() == RISCV::X0) || 1572 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1573 } 1574 return MI.isAsCheapAsAMove(); 1575 } 1576 1577 std::optional<DestSourcePair> 1578 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1579 if (MI.isMoveReg()) 1580 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1581 switch (MI.getOpcode()) { 1582 default: 1583 break; 1584 case RISCV::ADDI: 1585 // Operand 1 can be a frameindex but callers expect registers 1586 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1587 MI.getOperand(2).getImm() == 0) 1588 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1589 break; 1590 case RISCV::FSGNJ_D: 1591 case RISCV::FSGNJ_S: 1592 case RISCV::FSGNJ_H: 1593 case RISCV::FSGNJ_D_INX: 1594 case RISCV::FSGNJ_D_IN32X: 1595 case RISCV::FSGNJ_S_INX: 1596 case RISCV::FSGNJ_H_INX: 1597 // The canonical floating-point move is fsgnj rd, rs, rs. 1598 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1599 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1600 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1601 break; 1602 } 1603 return std::nullopt; 1604 } 1605 1606 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1607 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1608 // The option is unused. Choose Local strategy only for in-order cores. When 1609 // scheduling model is unspecified, use MinInstrCount strategy as more 1610 // generic one. 1611 const auto &SchedModel = STI.getSchedModel(); 1612 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1613 ? MachineTraceStrategy::TS_MinInstrCount 1614 : MachineTraceStrategy::TS_Local; 1615 } 1616 // The strategy was forced by the option. 1617 return ForceMachineCombinerStrategy; 1618 } 1619 1620 void RISCVInstrInfo::finalizeInsInstrs( 1621 MachineInstr &Root, MachineCombinerPattern &P, 1622 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1623 int16_t FrmOpIdx = 1624 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1625 if (FrmOpIdx < 0) { 1626 assert(all_of(InsInstrs, 1627 [](MachineInstr *MI) { 1628 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1629 RISCV::OpName::frm) < 0; 1630 }) && 1631 "New instructions require FRM whereas the old one does not have it"); 1632 return; 1633 } 1634 1635 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1636 MachineFunction &MF = *Root.getMF(); 1637 1638 for (auto *NewMI : InsInstrs) { 1639 assert(static_cast<unsigned>(RISCV::getNamedOperandIdx( 1640 NewMI->getOpcode(), RISCV::OpName::frm)) == 1641 NewMI->getNumOperands() && 1642 "Instruction has unexpected number of operands"); 1643 MachineInstrBuilder MIB(MF, NewMI); 1644 MIB.add(FRM); 1645 if (FRM.getImm() == RISCVFPRndMode::DYN) 1646 MIB.addUse(RISCV::FRM, RegState::Implicit); 1647 } 1648 } 1649 1650 static bool isFADD(unsigned Opc) { 1651 switch (Opc) { 1652 default: 1653 return false; 1654 case RISCV::FADD_H: 1655 case RISCV::FADD_S: 1656 case RISCV::FADD_D: 1657 return true; 1658 } 1659 } 1660 1661 static bool isFSUB(unsigned Opc) { 1662 switch (Opc) { 1663 default: 1664 return false; 1665 case RISCV::FSUB_H: 1666 case RISCV::FSUB_S: 1667 case RISCV::FSUB_D: 1668 return true; 1669 } 1670 } 1671 1672 static bool isFMUL(unsigned Opc) { 1673 switch (Opc) { 1674 default: 1675 return false; 1676 case RISCV::FMUL_H: 1677 case RISCV::FMUL_S: 1678 case RISCV::FMUL_D: 1679 return true; 1680 } 1681 } 1682 1683 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1684 bool &Commuted) const { 1685 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1686 return false; 1687 1688 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1689 unsigned OperandIdx = Commuted ? 2 : 1; 1690 const MachineInstr &Sibling = 1691 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1692 1693 int16_t InstFrmOpIdx = 1694 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1695 int16_t SiblingFrmOpIdx = 1696 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1697 1698 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1699 RISCV::hasEqualFRM(Inst, Sibling); 1700 } 1701 1702 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1703 bool Invert) const { 1704 unsigned Opc = Inst.getOpcode(); 1705 if (Invert) { 1706 auto InverseOpcode = getInverseOpcode(Opc); 1707 if (!InverseOpcode) 1708 return false; 1709 Opc = *InverseOpcode; 1710 } 1711 1712 if (isFADD(Opc) || isFMUL(Opc)) 1713 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 1714 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 1715 1716 switch (Opc) { 1717 default: 1718 return false; 1719 case RISCV::ADD: 1720 case RISCV::ADDW: 1721 case RISCV::AND: 1722 case RISCV::OR: 1723 case RISCV::XOR: 1724 // From RISC-V ISA spec, if both the high and low bits of the same product 1725 // are required, then the recommended code sequence is: 1726 // 1727 // MULH[[S]U] rdh, rs1, rs2 1728 // MUL rdl, rs1, rs2 1729 // (source register specifiers must be in same order and rdh cannot be the 1730 // same as rs1 or rs2) 1731 // 1732 // Microarchitectures can then fuse these into a single multiply operation 1733 // instead of performing two separate multiplies. 1734 // MachineCombiner may reassociate MUL operands and lose the fusion 1735 // opportunity. 1736 case RISCV::MUL: 1737 case RISCV::MULW: 1738 case RISCV::MIN: 1739 case RISCV::MINU: 1740 case RISCV::MAX: 1741 case RISCV::MAXU: 1742 case RISCV::FMIN_H: 1743 case RISCV::FMIN_S: 1744 case RISCV::FMIN_D: 1745 case RISCV::FMAX_H: 1746 case RISCV::FMAX_S: 1747 case RISCV::FMAX_D: 1748 return true; 1749 } 1750 1751 return false; 1752 } 1753 1754 std::optional<unsigned> 1755 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 1756 switch (Opcode) { 1757 default: 1758 return std::nullopt; 1759 case RISCV::FADD_H: 1760 return RISCV::FSUB_H; 1761 case RISCV::FADD_S: 1762 return RISCV::FSUB_S; 1763 case RISCV::FADD_D: 1764 return RISCV::FSUB_D; 1765 case RISCV::FSUB_H: 1766 return RISCV::FADD_H; 1767 case RISCV::FSUB_S: 1768 return RISCV::FADD_S; 1769 case RISCV::FSUB_D: 1770 return RISCV::FADD_D; 1771 case RISCV::ADD: 1772 return RISCV::SUB; 1773 case RISCV::SUB: 1774 return RISCV::ADD; 1775 case RISCV::ADDW: 1776 return RISCV::SUBW; 1777 case RISCV::SUBW: 1778 return RISCV::ADDW; 1779 } 1780 } 1781 1782 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 1783 const MachineOperand &MO, 1784 bool DoRegPressureReduce) { 1785 if (!MO.isReg() || !MO.getReg().isVirtual()) 1786 return false; 1787 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1788 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 1789 if (!MI || !isFMUL(MI->getOpcode())) 1790 return false; 1791 1792 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 1793 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 1794 return false; 1795 1796 // Try combining even if fmul has more than one use as it eliminates 1797 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 1798 // for fmul operands, so reject the transformation in register pressure 1799 // reduction mode. 1800 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1801 return false; 1802 1803 // Do not combine instructions from different basic blocks. 1804 if (Root.getParent() != MI->getParent()) 1805 return false; 1806 return RISCV::hasEqualFRM(Root, *MI); 1807 } 1808 1809 static bool 1810 getFPFusedMultiplyPatterns(MachineInstr &Root, 1811 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1812 bool DoRegPressureReduce) { 1813 unsigned Opc = Root.getOpcode(); 1814 bool IsFAdd = isFADD(Opc); 1815 if (!IsFAdd && !isFSUB(Opc)) 1816 return false; 1817 bool Added = false; 1818 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 1819 DoRegPressureReduce)) { 1820 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX 1821 : MachineCombinerPattern::FMSUB); 1822 Added = true; 1823 } 1824 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 1825 DoRegPressureReduce)) { 1826 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA 1827 : MachineCombinerPattern::FNMSUB); 1828 Added = true; 1829 } 1830 return Added; 1831 } 1832 1833 static bool getFPPatterns(MachineInstr &Root, 1834 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1835 bool DoRegPressureReduce) { 1836 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 1837 } 1838 1839 bool RISCVInstrInfo::getMachineCombinerPatterns( 1840 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 1841 bool DoRegPressureReduce) const { 1842 1843 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 1844 return true; 1845 1846 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 1847 DoRegPressureReduce); 1848 } 1849 1850 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, 1851 MachineCombinerPattern Pattern) { 1852 switch (RootOpc) { 1853 default: 1854 llvm_unreachable("Unexpected opcode"); 1855 case RISCV::FADD_H: 1856 return RISCV::FMADD_H; 1857 case RISCV::FADD_S: 1858 return RISCV::FMADD_S; 1859 case RISCV::FADD_D: 1860 return RISCV::FMADD_D; 1861 case RISCV::FSUB_H: 1862 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 1863 : RISCV::FNMSUB_H; 1864 case RISCV::FSUB_S: 1865 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 1866 : RISCV::FNMSUB_S; 1867 case RISCV::FSUB_D: 1868 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 1869 : RISCV::FNMSUB_D; 1870 } 1871 } 1872 1873 static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) { 1874 switch (Pattern) { 1875 default: 1876 llvm_unreachable("Unexpected pattern"); 1877 case MachineCombinerPattern::FMADD_AX: 1878 case MachineCombinerPattern::FMSUB: 1879 return 2; 1880 case MachineCombinerPattern::FMADD_XA: 1881 case MachineCombinerPattern::FNMSUB: 1882 return 1; 1883 } 1884 } 1885 1886 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 1887 MachineCombinerPattern Pattern, 1888 SmallVectorImpl<MachineInstr *> &InsInstrs, 1889 SmallVectorImpl<MachineInstr *> &DelInstrs) { 1890 MachineFunction *MF = Root.getMF(); 1891 MachineRegisterInfo &MRI = MF->getRegInfo(); 1892 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1893 1894 MachineOperand &Mul1 = Prev.getOperand(1); 1895 MachineOperand &Mul2 = Prev.getOperand(2); 1896 MachineOperand &Dst = Root.getOperand(0); 1897 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 1898 1899 Register DstReg = Dst.getReg(); 1900 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 1901 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 1902 DebugLoc MergedLoc = 1903 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 1904 1905 bool Mul1IsKill = Mul1.isKill(); 1906 bool Mul2IsKill = Mul2.isKill(); 1907 bool AddendIsKill = Addend.isKill(); 1908 1909 // We need to clear kill flags since we may be extending the live range past 1910 // a kill. If the mul had kill flags, we can preserve those since we know 1911 // where the previous range stopped. 1912 MRI.clearKillFlags(Mul1.getReg()); 1913 MRI.clearKillFlags(Mul2.getReg()); 1914 1915 MachineInstrBuilder MIB = 1916 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 1917 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 1918 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 1919 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 1920 .setMIFlags(IntersectedFlags); 1921 1922 InsInstrs.push_back(MIB); 1923 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 1924 DelInstrs.push_back(&Prev); 1925 DelInstrs.push_back(&Root); 1926 } 1927 1928 void RISCVInstrInfo::genAlternativeCodeSequence( 1929 MachineInstr &Root, MachineCombinerPattern Pattern, 1930 SmallVectorImpl<MachineInstr *> &InsInstrs, 1931 SmallVectorImpl<MachineInstr *> &DelInstrs, 1932 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 1933 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1934 switch (Pattern) { 1935 default: 1936 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 1937 DelInstrs, InstrIdxForVirtReg); 1938 return; 1939 case MachineCombinerPattern::FMADD_AX: 1940 case MachineCombinerPattern::FMSUB: { 1941 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 1942 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1943 return; 1944 } 1945 case MachineCombinerPattern::FMADD_XA: 1946 case MachineCombinerPattern::FNMSUB: { 1947 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 1948 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1949 return; 1950 } 1951 } 1952 } 1953 1954 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 1955 StringRef &ErrInfo) const { 1956 MCInstrDesc const &Desc = MI.getDesc(); 1957 1958 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 1959 unsigned OpType = Operand.OperandType; 1960 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 1961 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 1962 const MachineOperand &MO = MI.getOperand(Index); 1963 if (MO.isImm()) { 1964 int64_t Imm = MO.getImm(); 1965 bool Ok; 1966 switch (OpType) { 1967 default: 1968 llvm_unreachable("Unexpected operand type"); 1969 1970 // clang-format off 1971 #define CASE_OPERAND_UIMM(NUM) \ 1972 case RISCVOp::OPERAND_UIMM##NUM: \ 1973 Ok = isUInt<NUM>(Imm); \ 1974 break; 1975 CASE_OPERAND_UIMM(1) 1976 CASE_OPERAND_UIMM(2) 1977 CASE_OPERAND_UIMM(3) 1978 CASE_OPERAND_UIMM(4) 1979 CASE_OPERAND_UIMM(5) 1980 CASE_OPERAND_UIMM(6) 1981 CASE_OPERAND_UIMM(7) 1982 CASE_OPERAND_UIMM(8) 1983 CASE_OPERAND_UIMM(12) 1984 CASE_OPERAND_UIMM(20) 1985 // clang-format on 1986 case RISCVOp::OPERAND_UIMM2_LSB0: 1987 Ok = isShiftedUInt<1, 1>(Imm); 1988 break; 1989 case RISCVOp::OPERAND_UIMM7_LSB00: 1990 Ok = isShiftedUInt<5, 2>(Imm); 1991 break; 1992 case RISCVOp::OPERAND_UIMM8_LSB00: 1993 Ok = isShiftedUInt<6, 2>(Imm); 1994 break; 1995 case RISCVOp::OPERAND_UIMM8_LSB000: 1996 Ok = isShiftedUInt<5, 3>(Imm); 1997 break; 1998 case RISCVOp::OPERAND_UIMM8_GE32: 1999 Ok = isUInt<8>(Imm) && Imm >= 32; 2000 break; 2001 case RISCVOp::OPERAND_UIMM9_LSB000: 2002 Ok = isShiftedUInt<6, 3>(Imm); 2003 break; 2004 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 2005 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2006 break; 2007 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2008 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2009 break; 2010 case RISCVOp::OPERAND_ZERO: 2011 Ok = Imm == 0; 2012 break; 2013 case RISCVOp::OPERAND_SIMM5: 2014 Ok = isInt<5>(Imm); 2015 break; 2016 case RISCVOp::OPERAND_SIMM5_PLUS1: 2017 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2018 break; 2019 case RISCVOp::OPERAND_SIMM6: 2020 Ok = isInt<6>(Imm); 2021 break; 2022 case RISCVOp::OPERAND_SIMM6_NONZERO: 2023 Ok = Imm != 0 && isInt<6>(Imm); 2024 break; 2025 case RISCVOp::OPERAND_VTYPEI10: 2026 Ok = isUInt<10>(Imm); 2027 break; 2028 case RISCVOp::OPERAND_VTYPEI11: 2029 Ok = isUInt<11>(Imm); 2030 break; 2031 case RISCVOp::OPERAND_SIMM12: 2032 Ok = isInt<12>(Imm); 2033 break; 2034 case RISCVOp::OPERAND_SIMM12_LSB00000: 2035 Ok = isShiftedInt<7, 5>(Imm); 2036 break; 2037 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2038 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2039 break; 2040 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2041 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2042 Ok = Ok && Imm != 0; 2043 break; 2044 case RISCVOp::OPERAND_CLUI_IMM: 2045 Ok = (isUInt<5>(Imm) && Imm != 0) || 2046 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2047 break; 2048 case RISCVOp::OPERAND_RVKRNUM: 2049 Ok = Imm >= 0 && Imm <= 10; 2050 break; 2051 case RISCVOp::OPERAND_RVKRNUM_0_7: 2052 Ok = Imm >= 0 && Imm <= 7; 2053 break; 2054 case RISCVOp::OPERAND_RVKRNUM_1_10: 2055 Ok = Imm >= 1 && Imm <= 10; 2056 break; 2057 case RISCVOp::OPERAND_RVKRNUM_2_14: 2058 Ok = Imm >= 2 && Imm <= 14; 2059 break; 2060 } 2061 if (!Ok) { 2062 ErrInfo = "Invalid immediate"; 2063 return false; 2064 } 2065 } 2066 } 2067 } 2068 2069 const uint64_t TSFlags = Desc.TSFlags; 2070 if (RISCVII::hasVLOp(TSFlags)) { 2071 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2072 if (!Op.isImm() && !Op.isReg()) { 2073 ErrInfo = "Invalid operand type for VL operand"; 2074 return false; 2075 } 2076 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2077 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2078 auto *RC = MRI.getRegClass(Op.getReg()); 2079 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2080 ErrInfo = "Invalid register class for VL operand"; 2081 return false; 2082 } 2083 } 2084 if (!RISCVII::hasSEWOp(TSFlags)) { 2085 ErrInfo = "VL operand w/o SEW operand?"; 2086 return false; 2087 } 2088 } 2089 if (RISCVII::hasSEWOp(TSFlags)) { 2090 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2091 if (!MI.getOperand(OpIdx).isImm()) { 2092 ErrInfo = "SEW value expected to be an immediate"; 2093 return false; 2094 } 2095 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2096 if (Log2SEW > 31) { 2097 ErrInfo = "Unexpected SEW value"; 2098 return false; 2099 } 2100 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2101 if (!RISCVVType::isValidSEW(SEW)) { 2102 ErrInfo = "Unexpected SEW value"; 2103 return false; 2104 } 2105 } 2106 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2107 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2108 if (!MI.getOperand(OpIdx).isImm()) { 2109 ErrInfo = "Policy operand expected to be an immediate"; 2110 return false; 2111 } 2112 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2113 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2114 ErrInfo = "Invalid Policy Value"; 2115 return false; 2116 } 2117 if (!RISCVII::hasVLOp(TSFlags)) { 2118 ErrInfo = "policy operand w/o VL operand?"; 2119 return false; 2120 } 2121 2122 // VecPolicy operands can only exist on instructions with passthru/merge 2123 // arguments. Note that not all arguments with passthru have vec policy 2124 // operands- some instructions have implicit policies. 2125 unsigned UseOpIdx; 2126 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2127 ErrInfo = "policy operand w/o tied operand?"; 2128 return false; 2129 } 2130 } 2131 2132 return true; 2133 } 2134 2135 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2136 const MachineInstr &AddrI, 2137 ExtAddrMode &AM) const { 2138 switch (MemI.getOpcode()) { 2139 default: 2140 return false; 2141 case RISCV::LB: 2142 case RISCV::LBU: 2143 case RISCV::LH: 2144 case RISCV::LHU: 2145 case RISCV::LW: 2146 case RISCV::LWU: 2147 case RISCV::LD: 2148 case RISCV::FLH: 2149 case RISCV::FLW: 2150 case RISCV::FLD: 2151 case RISCV::SB: 2152 case RISCV::SH: 2153 case RISCV::SW: 2154 case RISCV::SD: 2155 case RISCV::FSH: 2156 case RISCV::FSW: 2157 case RISCV::FSD: 2158 break; 2159 } 2160 2161 if (MemI.getOperand(0).getReg() == Reg) 2162 return false; 2163 2164 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2165 !AddrI.getOperand(2).isImm()) 2166 return false; 2167 2168 int64_t OldOffset = MemI.getOperand(2).getImm(); 2169 int64_t Disp = AddrI.getOperand(2).getImm(); 2170 int64_t NewOffset = OldOffset + Disp; 2171 if (!STI.is64Bit()) 2172 NewOffset = SignExtend64<32>(NewOffset); 2173 2174 if (!isInt<12>(NewOffset)) 2175 return false; 2176 2177 AM.BaseReg = AddrI.getOperand(1).getReg(); 2178 AM.ScaledReg = 0; 2179 AM.Scale = 0; 2180 AM.Displacement = NewOffset; 2181 AM.Form = ExtAddrMode::Formula::Basic; 2182 return true; 2183 } 2184 2185 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2186 const ExtAddrMode &AM) const { 2187 2188 const DebugLoc &DL = MemI.getDebugLoc(); 2189 MachineBasicBlock &MBB = *MemI.getParent(); 2190 2191 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2192 "Addressing mode not supported for folding"); 2193 2194 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2195 .addReg(MemI.getOperand(0).getReg(), 2196 MemI.mayLoad() ? RegState::Define : 0) 2197 .addReg(AM.BaseReg) 2198 .addImm(AM.Displacement) 2199 .setMemRefs(MemI.memoperands()) 2200 .setMIFlags(MemI.getFlags()); 2201 } 2202 2203 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2204 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2205 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2206 const TargetRegisterInfo *TRI) const { 2207 if (!LdSt.mayLoadOrStore()) 2208 return false; 2209 2210 // Conservatively, only handle scalar loads/stores for now. 2211 switch (LdSt.getOpcode()) { 2212 case RISCV::LB: 2213 case RISCV::LBU: 2214 case RISCV::SB: 2215 case RISCV::LH: 2216 case RISCV::LHU: 2217 case RISCV::FLH: 2218 case RISCV::SH: 2219 case RISCV::FSH: 2220 case RISCV::LW: 2221 case RISCV::LWU: 2222 case RISCV::FLW: 2223 case RISCV::SW: 2224 case RISCV::FSW: 2225 case RISCV::LD: 2226 case RISCV::FLD: 2227 case RISCV::SD: 2228 case RISCV::FSD: 2229 break; 2230 default: 2231 return false; 2232 } 2233 const MachineOperand *BaseOp; 2234 OffsetIsScalable = false; 2235 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2236 return false; 2237 BaseOps.push_back(BaseOp); 2238 return true; 2239 } 2240 2241 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2242 // helper? 2243 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2244 ArrayRef<const MachineOperand *> BaseOps1, 2245 const MachineInstr &MI2, 2246 ArrayRef<const MachineOperand *> BaseOps2) { 2247 // Only examine the first "base" operand of each instruction, on the 2248 // assumption that it represents the real base address of the memory access. 2249 // Other operands are typically offsets or indices from this base address. 2250 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2251 return true; 2252 2253 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2254 return false; 2255 2256 auto MO1 = *MI1.memoperands_begin(); 2257 auto MO2 = *MI2.memoperands_begin(); 2258 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2259 return false; 2260 2261 auto Base1 = MO1->getValue(); 2262 auto Base2 = MO2->getValue(); 2263 if (!Base1 || !Base2) 2264 return false; 2265 Base1 = getUnderlyingObject(Base1); 2266 Base2 = getUnderlyingObject(Base2); 2267 2268 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2269 return false; 2270 2271 return Base1 == Base2; 2272 } 2273 2274 bool RISCVInstrInfo::shouldClusterMemOps( 2275 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2276 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2277 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2278 unsigned NumBytes) const { 2279 // If the mem ops (to be clustered) do not have the same base ptr, then they 2280 // should not be clustered 2281 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2282 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2283 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2284 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2285 return false; 2286 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2287 // If only one base op is empty, they do not have the same base ptr 2288 return false; 2289 } 2290 2291 unsigned CacheLineSize = 2292 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2293 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2294 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2295 // Cluster if the memory operations are on the same or a neighbouring cache 2296 // line, but limit the maximum ClusterSize to avoid creating too much 2297 // additional register pressure. 2298 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2299 } 2300 2301 // Set BaseReg (the base register operand), Offset (the byte offset being 2302 // accessed) and the access Width of the passed instruction that reads/writes 2303 // memory. Returns false if the instruction does not read/write memory or the 2304 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2305 // recognise base operands and offsets in all cases. 2306 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2307 // function) and set it as appropriate. 2308 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2309 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2310 unsigned &Width, const TargetRegisterInfo *TRI) const { 2311 if (!LdSt.mayLoadOrStore()) 2312 return false; 2313 2314 // Here we assume the standard RISC-V ISA, which uses a base+offset 2315 // addressing mode. You'll need to relax these conditions to support custom 2316 // load/store instructions. 2317 if (LdSt.getNumExplicitOperands() != 3) 2318 return false; 2319 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2320 !LdSt.getOperand(2).isImm()) 2321 return false; 2322 2323 if (!LdSt.hasOneMemOperand()) 2324 return false; 2325 2326 Width = (*LdSt.memoperands_begin())->getSize(); 2327 BaseReg = &LdSt.getOperand(1); 2328 Offset = LdSt.getOperand(2).getImm(); 2329 return true; 2330 } 2331 2332 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2333 const MachineInstr &MIa, const MachineInstr &MIb) const { 2334 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2335 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2336 2337 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2338 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2339 return false; 2340 2341 // Retrieve the base register, offset from the base register and width. Width 2342 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2343 // base registers are identical, and the offset of a lower memory access + 2344 // the width doesn't overlap the offset of a higher memory access, 2345 // then the memory accesses are different. 2346 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2347 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2348 int64_t OffsetA = 0, OffsetB = 0; 2349 unsigned int WidthA = 0, WidthB = 0; 2350 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2351 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2352 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2353 int LowOffset = std::min(OffsetA, OffsetB); 2354 int HighOffset = std::max(OffsetA, OffsetB); 2355 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2356 if (LowOffset + LowWidth <= HighOffset) 2357 return true; 2358 } 2359 } 2360 return false; 2361 } 2362 2363 std::pair<unsigned, unsigned> 2364 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2365 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2366 return std::make_pair(TF & Mask, TF & ~Mask); 2367 } 2368 2369 ArrayRef<std::pair<unsigned, const char *>> 2370 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2371 using namespace RISCVII; 2372 static const std::pair<unsigned, const char *> TargetFlags[] = { 2373 {MO_CALL, "riscv-call"}, 2374 {MO_LO, "riscv-lo"}, 2375 {MO_HI, "riscv-hi"}, 2376 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2377 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2378 {MO_GOT_HI, "riscv-got-hi"}, 2379 {MO_TPREL_LO, "riscv-tprel-lo"}, 2380 {MO_TPREL_HI, "riscv-tprel-hi"}, 2381 {MO_TPREL_ADD, "riscv-tprel-add"}, 2382 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2383 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}, 2384 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"}, 2385 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"}, 2386 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"}, 2387 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}}; 2388 return ArrayRef(TargetFlags); 2389 } 2390 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2391 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2392 const Function &F = MF.getFunction(); 2393 2394 // Can F be deduplicated by the linker? If it can, don't outline from it. 2395 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2396 return false; 2397 2398 // Don't outline from functions with section markings; the program could 2399 // expect that all the code is in the named section. 2400 if (F.hasSection()) 2401 return false; 2402 2403 // It's safe to outline from MF. 2404 return true; 2405 } 2406 2407 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2408 unsigned &Flags) const { 2409 // More accurate safety checking is done in getOutliningCandidateInfo. 2410 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2411 } 2412 2413 // Enum values indicating how an outlined call should be constructed. 2414 enum MachineOutlinerConstructionID { 2415 MachineOutlinerDefault 2416 }; 2417 2418 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2419 MachineFunction &MF) const { 2420 return MF.getFunction().hasMinSize(); 2421 } 2422 2423 std::optional<outliner::OutlinedFunction> 2424 RISCVInstrInfo::getOutliningCandidateInfo( 2425 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 2426 2427 // First we need to filter out candidates where the X5 register (IE t0) can't 2428 // be used to setup the function call. 2429 auto CannotInsertCall = [](outliner::Candidate &C) { 2430 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 2431 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 2432 }; 2433 2434 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); 2435 2436 // If the sequence doesn't have enough candidates left, then we're done. 2437 if (RepeatedSequenceLocs.size() < 2) 2438 return std::nullopt; 2439 2440 unsigned SequenceSize = 0; 2441 2442 for (auto &MI : RepeatedSequenceLocs[0]) 2443 SequenceSize += getInstSizeInBytes(MI); 2444 2445 // call t0, function = 8 bytes. 2446 unsigned CallOverhead = 8; 2447 for (auto &C : RepeatedSequenceLocs) 2448 C.setCallInfo(MachineOutlinerDefault, CallOverhead); 2449 2450 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 2451 unsigned FrameOverhead = 4; 2452 if (RepeatedSequenceLocs[0] 2453 .getMF() 2454 ->getSubtarget<RISCVSubtarget>() 2455 .hasStdExtCOrZca()) 2456 FrameOverhead = 2; 2457 2458 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 2459 FrameOverhead, MachineOutlinerDefault); 2460 } 2461 2462 outliner::InstrType 2463 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, 2464 unsigned Flags) const { 2465 MachineInstr &MI = *MBBI; 2466 MachineBasicBlock *MBB = MI.getParent(); 2467 const TargetRegisterInfo *TRI = 2468 MBB->getParent()->getSubtarget().getRegisterInfo(); 2469 const auto &F = MI.getMF()->getFunction(); 2470 2471 // We can manually strip out CFI instructions later. 2472 if (MI.isCFIInstruction()) 2473 // If current function has exception handling code, we can't outline & 2474 // strip these CFI instructions since it may break .eh_frame section 2475 // needed in unwinding. 2476 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 2477 : outliner::InstrType::Invisible; 2478 2479 // We need support for tail calls to outlined functions before return 2480 // statements can be allowed. 2481 if (MI.isReturn()) 2482 return outliner::InstrType::Illegal; 2483 2484 // Don't allow modifying the X5 register which we use for return addresses for 2485 // these outlined functions. 2486 if (MI.modifiesRegister(RISCV::X5, TRI) || 2487 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) 2488 return outliner::InstrType::Illegal; 2489 2490 // Make sure the operands don't reference something unsafe. 2491 for (const auto &MO : MI.operands()) { 2492 2493 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 2494 // if any possible. 2495 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 2496 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 2497 F.hasSection())) 2498 return outliner::InstrType::Illegal; 2499 } 2500 2501 return outliner::InstrType::Legal; 2502 } 2503 2504 void RISCVInstrInfo::buildOutlinedFrame( 2505 MachineBasicBlock &MBB, MachineFunction &MF, 2506 const outliner::OutlinedFunction &OF) const { 2507 2508 // Strip out any CFI instructions 2509 bool Changed = true; 2510 while (Changed) { 2511 Changed = false; 2512 auto I = MBB.begin(); 2513 auto E = MBB.end(); 2514 for (; I != E; ++I) { 2515 if (I->isCFIInstruction()) { 2516 I->removeFromParent(); 2517 Changed = true; 2518 break; 2519 } 2520 } 2521 } 2522 2523 MBB.addLiveIn(RISCV::X5); 2524 2525 // Add in a return instruction to the end of the outlined frame. 2526 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 2527 .addReg(RISCV::X0, RegState::Define) 2528 .addReg(RISCV::X5) 2529 .addImm(0)); 2530 } 2531 2532 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 2533 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 2534 MachineFunction &MF, outliner::Candidate &C) const { 2535 2536 // Add in a call instruction to the outlined function at the given location. 2537 It = MBB.insert(It, 2538 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 2539 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 2540 RISCVII::MO_CALL)); 2541 return It; 2542 } 2543 2544 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 2545 Register Reg) const { 2546 // TODO: Handle cases where Reg is a super- or sub-register of the 2547 // destination register. 2548 const MachineOperand &Op0 = MI.getOperand(0); 2549 if (!Op0.isReg() || Reg != Op0.getReg()) 2550 return std::nullopt; 2551 2552 // Don't consider ADDIW as a candidate because the caller may not be aware 2553 // of its sign extension behaviour. 2554 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 2555 MI.getOperand(2).isImm()) 2556 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 2557 2558 return std::nullopt; 2559 } 2560 2561 // MIR printer helper function to annotate Operands with a comment. 2562 std::string RISCVInstrInfo::createMIROperandComment( 2563 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2564 const TargetRegisterInfo *TRI) const { 2565 // Print a generic comment for this operand if there is one. 2566 std::string GenericComment = 2567 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 2568 if (!GenericComment.empty()) 2569 return GenericComment; 2570 2571 // If not, we must have an immediate operand. 2572 if (!Op.isImm()) 2573 return std::string(); 2574 2575 std::string Comment; 2576 raw_string_ostream OS(Comment); 2577 2578 uint64_t TSFlags = MI.getDesc().TSFlags; 2579 2580 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 2581 // operand of vector codegen pseudos. 2582 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || 2583 MI.getOpcode() == RISCV::PseudoVSETVLI || 2584 MI.getOpcode() == RISCV::PseudoVSETIVLI || 2585 MI.getOpcode() == RISCV::PseudoVSETVLIX0) && 2586 OpIdx == 2) { 2587 unsigned Imm = MI.getOperand(OpIdx).getImm(); 2588 RISCVVType::printVType(Imm, OS); 2589 } else if (RISCVII::hasSEWOp(TSFlags) && 2590 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) { 2591 unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); 2592 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2593 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2594 OS << "e" << SEW; 2595 } else if (RISCVII::hasVecPolicyOp(TSFlags) && 2596 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) { 2597 unsigned Policy = MI.getOperand(OpIdx).getImm(); 2598 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 2599 "Invalid Policy Value"); 2600 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 2601 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 2602 } 2603 2604 OS.flush(); 2605 return Comment; 2606 } 2607 2608 // clang-format off 2609 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 2610 RISCV::PseudoV##OP##_##TYPE##_##LMUL 2611 2612 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \ 2613 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ 2614 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ 2615 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ 2616 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) 2617 2618 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \ 2619 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ 2620 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) 2621 2622 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \ 2623 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ 2624 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) 2625 2626 #define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ 2627 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ 2628 case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) 2629 2630 #define CASE_VFMA_SPLATS(OP) \ 2631 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \ 2632 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \ 2633 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64) 2634 // clang-format on 2635 2636 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 2637 unsigned &SrcOpIdx1, 2638 unsigned &SrcOpIdx2) const { 2639 const MCInstrDesc &Desc = MI.getDesc(); 2640 if (!Desc.isCommutable()) 2641 return false; 2642 2643 switch (MI.getOpcode()) { 2644 case RISCV::TH_MVEQZ: 2645 case RISCV::TH_MVNEZ: 2646 // We can't commute operands if operand 2 (i.e., rs1 in 2647 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 2648 // not valid as the in/out-operand 1). 2649 if (MI.getOperand(2).getReg() == RISCV::X0) 2650 return false; 2651 // Operands 1 and 2 are commutable, if we switch the opcode. 2652 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 2653 case RISCV::TH_MULA: 2654 case RISCV::TH_MULAW: 2655 case RISCV::TH_MULAH: 2656 case RISCV::TH_MULS: 2657 case RISCV::TH_MULSW: 2658 case RISCV::TH_MULSH: 2659 // Operands 2 and 3 are commutable. 2660 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 2661 case RISCV::PseudoCCMOVGPRNoX0: 2662 case RISCV::PseudoCCMOVGPR: 2663 // Operands 4 and 5 are commutable. 2664 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 2665 case CASE_VFMA_SPLATS(FMADD): 2666 case CASE_VFMA_SPLATS(FMSUB): 2667 case CASE_VFMA_SPLATS(FMACC): 2668 case CASE_VFMA_SPLATS(FMSAC): 2669 case CASE_VFMA_SPLATS(FNMADD): 2670 case CASE_VFMA_SPLATS(FNMSUB): 2671 case CASE_VFMA_SPLATS(FNMACC): 2672 case CASE_VFMA_SPLATS(FNMSAC): 2673 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2674 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2675 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2676 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2677 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2678 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2679 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2680 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2681 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2682 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2683 // If the tail policy is undisturbed we can't commute. 2684 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2685 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2686 return false; 2687 2688 // For these instructions we can only swap operand 1 and operand 3 by 2689 // changing the opcode. 2690 unsigned CommutableOpIdx1 = 1; 2691 unsigned CommutableOpIdx2 = 3; 2692 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2693 CommutableOpIdx2)) 2694 return false; 2695 return true; 2696 } 2697 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2698 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2699 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2700 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2701 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2702 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2703 // If the tail policy is undisturbed we can't commute. 2704 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2705 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2706 return false; 2707 2708 // For these instructions we have more freedom. We can commute with the 2709 // other multiplicand or with the addend/subtrahend/minuend. 2710 2711 // Any fixed operand must be from source 1, 2 or 3. 2712 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 2713 return false; 2714 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 2715 return false; 2716 2717 // It both ops are fixed one must be the tied source. 2718 if (SrcOpIdx1 != CommuteAnyOperandIndex && 2719 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 2720 return false; 2721 2722 // Look for two different register operands assumed to be commutable 2723 // regardless of the FMA opcode. The FMA opcode is adjusted later if 2724 // needed. 2725 if (SrcOpIdx1 == CommuteAnyOperandIndex || 2726 SrcOpIdx2 == CommuteAnyOperandIndex) { 2727 // At least one of operands to be commuted is not specified and 2728 // this method is free to choose appropriate commutable operands. 2729 unsigned CommutableOpIdx1 = SrcOpIdx1; 2730 if (SrcOpIdx1 == SrcOpIdx2) { 2731 // Both of operands are not fixed. Set one of commutable 2732 // operands to the tied source. 2733 CommutableOpIdx1 = 1; 2734 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 2735 // Only one of the operands is not fixed. 2736 CommutableOpIdx1 = SrcOpIdx2; 2737 } 2738 2739 // CommutableOpIdx1 is well defined now. Let's choose another commutable 2740 // operand and assign its index to CommutableOpIdx2. 2741 unsigned CommutableOpIdx2; 2742 if (CommutableOpIdx1 != 1) { 2743 // If we haven't already used the tied source, we must use it now. 2744 CommutableOpIdx2 = 1; 2745 } else { 2746 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 2747 2748 // The commuted operands should have different registers. 2749 // Otherwise, the commute transformation does not change anything and 2750 // is useless. We use this as a hint to make our decision. 2751 if (Op1Reg != MI.getOperand(2).getReg()) 2752 CommutableOpIdx2 = 2; 2753 else 2754 CommutableOpIdx2 = 3; 2755 } 2756 2757 // Assign the found pair of commutable indices to SrcOpIdx1 and 2758 // SrcOpIdx2 to return those values. 2759 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2760 CommutableOpIdx2)) 2761 return false; 2762 } 2763 2764 return true; 2765 } 2766 } 2767 2768 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2769 } 2770 2771 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 2772 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 2773 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 2774 break; 2775 2776 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 2777 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 2778 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 2779 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 2780 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 2781 2782 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 2783 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 2784 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 2785 2786 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 2787 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 2788 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 2789 2790 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 2791 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 2792 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 2793 2794 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 2795 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 2796 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 2797 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 2798 2799 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2800 bool NewMI, 2801 unsigned OpIdx1, 2802 unsigned OpIdx2) const { 2803 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 2804 if (NewMI) 2805 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 2806 return MI; 2807 }; 2808 2809 switch (MI.getOpcode()) { 2810 case RISCV::TH_MVEQZ: 2811 case RISCV::TH_MVNEZ: { 2812 auto &WorkingMI = cloneIfNew(MI); 2813 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 2814 : RISCV::TH_MVEQZ)); 2815 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 2816 OpIdx2); 2817 } 2818 case RISCV::PseudoCCMOVGPRNoX0: 2819 case RISCV::PseudoCCMOVGPR: { 2820 // CCMOV can be commuted by inverting the condition. 2821 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 2822 CC = RISCVCC::getOppositeBranchCondition(CC); 2823 auto &WorkingMI = cloneIfNew(MI); 2824 WorkingMI.getOperand(3).setImm(CC); 2825 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 2826 OpIdx1, OpIdx2); 2827 } 2828 case CASE_VFMA_SPLATS(FMACC): 2829 case CASE_VFMA_SPLATS(FMADD): 2830 case CASE_VFMA_SPLATS(FMSAC): 2831 case CASE_VFMA_SPLATS(FMSUB): 2832 case CASE_VFMA_SPLATS(FNMACC): 2833 case CASE_VFMA_SPLATS(FNMADD): 2834 case CASE_VFMA_SPLATS(FNMSAC): 2835 case CASE_VFMA_SPLATS(FNMSUB): 2836 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2837 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2838 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2839 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2840 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2841 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2842 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2843 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2844 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2845 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2846 // It only make sense to toggle these between clobbering the 2847 // addend/subtrahend/minuend one of the multiplicands. 2848 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2849 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 2850 unsigned Opc; 2851 switch (MI.getOpcode()) { 2852 default: 2853 llvm_unreachable("Unexpected opcode"); 2854 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 2855 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 2856 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 2857 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 2858 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 2859 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 2860 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 2861 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 2862 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV) 2863 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV) 2864 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV) 2865 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV) 2866 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 2867 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 2868 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 2869 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 2870 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 2871 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 2872 } 2873 2874 auto &WorkingMI = cloneIfNew(MI); 2875 WorkingMI.setDesc(get(Opc)); 2876 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2877 OpIdx1, OpIdx2); 2878 } 2879 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2880 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2881 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2882 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2883 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2884 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2885 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2886 // If one of the operands, is the addend we need to change opcode. 2887 // Otherwise we're just swapping 2 of the multiplicands. 2888 if (OpIdx1 == 3 || OpIdx2 == 3) { 2889 unsigned Opc; 2890 switch (MI.getOpcode()) { 2891 default: 2892 llvm_unreachable("Unexpected opcode"); 2893 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV) 2894 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV) 2895 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV) 2896 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV) 2897 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 2898 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 2899 } 2900 2901 auto &WorkingMI = cloneIfNew(MI); 2902 WorkingMI.setDesc(get(Opc)); 2903 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2904 OpIdx1, OpIdx2); 2905 } 2906 // Let the default code handle it. 2907 break; 2908 } 2909 } 2910 2911 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2912 } 2913 2914 #undef CASE_VFMA_CHANGE_OPCODE_SPLATS 2915 #undef CASE_VFMA_CHANGE_OPCODE_LMULS 2916 #undef CASE_VFMA_CHANGE_OPCODE_COMMON 2917 #undef CASE_VFMA_SPLATS 2918 #undef CASE_VFMA_OPCODE_LMULS 2919 #undef CASE_VFMA_OPCODE_COMMON 2920 2921 // clang-format off 2922 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 2923 RISCV::PseudoV##OP##_##LMUL##_TIED 2924 2925 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 2926 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 2927 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 2928 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 2929 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 2930 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 2931 2932 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 2933 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 2934 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 2935 // clang-format on 2936 2937 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 2938 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 2939 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 2940 break; 2941 2942 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 2943 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 2944 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 2945 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 2946 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 2947 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 2948 2949 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 2950 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 2951 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 2952 2953 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 2954 LiveVariables *LV, 2955 LiveIntervals *LIS) const { 2956 MachineInstrBuilder MIB; 2957 switch (MI.getOpcode()) { 2958 default: 2959 return nullptr; 2960 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 2961 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 2962 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2963 MI.getNumExplicitOperands() == 7 && 2964 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 2965 // If the tail policy is undisturbed we can't convert. 2966 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 2967 1) == 0) 2968 return nullptr; 2969 // clang-format off 2970 unsigned NewOpc; 2971 switch (MI.getOpcode()) { 2972 default: 2973 llvm_unreachable("Unexpected opcode"); 2974 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 2975 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 2976 } 2977 // clang-format on 2978 2979 MachineBasicBlock &MBB = *MI.getParent(); 2980 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 2981 .add(MI.getOperand(0)) 2982 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 2983 .add(MI.getOperand(1)) 2984 .add(MI.getOperand(2)) 2985 .add(MI.getOperand(3)) 2986 .add(MI.getOperand(4)) 2987 .add(MI.getOperand(5)) 2988 .add(MI.getOperand(6)); 2989 break; 2990 } 2991 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 2992 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 2993 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 2994 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 2995 // If the tail policy is undisturbed we can't convert. 2996 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2997 MI.getNumExplicitOperands() == 6); 2998 if ((MI.getOperand(5).getImm() & 1) == 0) 2999 return nullptr; 3000 3001 // clang-format off 3002 unsigned NewOpc; 3003 switch (MI.getOpcode()) { 3004 default: 3005 llvm_unreachable("Unexpected opcode"); 3006 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 3007 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 3008 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3009 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3010 } 3011 // clang-format on 3012 3013 MachineBasicBlock &MBB = *MI.getParent(); 3014 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3015 .add(MI.getOperand(0)) 3016 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3017 .add(MI.getOperand(1)) 3018 .add(MI.getOperand(2)) 3019 .add(MI.getOperand(3)) 3020 .add(MI.getOperand(4)) 3021 .add(MI.getOperand(5)); 3022 break; 3023 } 3024 } 3025 MIB.copyImplicitOps(MI); 3026 3027 if (LV) { 3028 unsigned NumOps = MI.getNumOperands(); 3029 for (unsigned I = 1; I < NumOps; ++I) { 3030 MachineOperand &Op = MI.getOperand(I); 3031 if (Op.isReg() && Op.isKill()) 3032 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3033 } 3034 } 3035 3036 if (LIS) { 3037 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3038 3039 if (MI.getOperand(0).isEarlyClobber()) { 3040 // Use operand 1 was tied to early-clobber def operand 0, so its live 3041 // interval could have ended at an early-clobber slot. Now they are not 3042 // tied we need to update it to the normal register slot. 3043 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3044 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3045 if (S->end == Idx.getRegSlot(true)) 3046 S->end = Idx.getRegSlot(); 3047 } 3048 } 3049 3050 return MIB; 3051 } 3052 3053 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3054 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3055 #undef CASE_WIDEOP_OPCODE_LMULS 3056 #undef CASE_WIDEOP_OPCODE_COMMON 3057 3058 void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, 3059 MachineBasicBlock &MBB, 3060 MachineBasicBlock::iterator II, 3061 const DebugLoc &DL, Register DestReg, 3062 int64_t Amount, 3063 MachineInstr::MIFlag Flag) const { 3064 assert(Amount > 0 && "There is no need to get VLEN scaled value."); 3065 assert(Amount % 8 == 0 && 3066 "Reserve the stack by the multiple of one vector size."); 3067 3068 MachineRegisterInfo &MRI = MF.getRegInfo(); 3069 int64_t NumOfVReg = Amount / 8; 3070 3071 BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag); 3072 assert(isInt<32>(NumOfVReg) && 3073 "Expect the number of vector registers within 32-bits."); 3074 if (llvm::has_single_bit<uint32_t>(NumOfVReg)) { 3075 uint32_t ShiftAmount = Log2_32(NumOfVReg); 3076 if (ShiftAmount == 0) 3077 return; 3078 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3079 .addReg(DestReg, RegState::Kill) 3080 .addImm(ShiftAmount) 3081 .setMIFlag(Flag); 3082 } else if (STI.hasStdExtZba() && 3083 ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || 3084 (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || 3085 (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { 3086 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3087 unsigned Opc; 3088 uint32_t ShiftAmount; 3089 if (NumOfVReg % 9 == 0) { 3090 Opc = RISCV::SH3ADD; 3091 ShiftAmount = Log2_64(NumOfVReg / 9); 3092 } else if (NumOfVReg % 5 == 0) { 3093 Opc = RISCV::SH2ADD; 3094 ShiftAmount = Log2_64(NumOfVReg / 5); 3095 } else if (NumOfVReg % 3 == 0) { 3096 Opc = RISCV::SH1ADD; 3097 ShiftAmount = Log2_64(NumOfVReg / 3); 3098 } else { 3099 llvm_unreachable("Unexpected number of vregs"); 3100 } 3101 if (ShiftAmount) 3102 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3103 .addReg(DestReg, RegState::Kill) 3104 .addImm(ShiftAmount) 3105 .setMIFlag(Flag); 3106 BuildMI(MBB, II, DL, get(Opc), DestReg) 3107 .addReg(DestReg, RegState::Kill) 3108 .addReg(DestReg) 3109 .setMIFlag(Flag); 3110 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) { 3111 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3112 uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); 3113 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3114 .addReg(DestReg) 3115 .addImm(ShiftAmount) 3116 .setMIFlag(Flag); 3117 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3118 .addReg(ScaledRegister, RegState::Kill) 3119 .addReg(DestReg, RegState::Kill) 3120 .setMIFlag(Flag); 3121 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) { 3122 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3123 uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); 3124 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3125 .addReg(DestReg) 3126 .addImm(ShiftAmount) 3127 .setMIFlag(Flag); 3128 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3129 .addReg(ScaledRegister, RegState::Kill) 3130 .addReg(DestReg, RegState::Kill) 3131 .setMIFlag(Flag); 3132 } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) { 3133 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3134 movImm(MBB, II, DL, N, NumOfVReg, Flag); 3135 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3136 .addReg(DestReg, RegState::Kill) 3137 .addReg(N, RegState::Kill) 3138 .setMIFlag(Flag); 3139 } else { 3140 Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3141 BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc) 3142 .addReg(RISCV::X0) 3143 .addImm(0) 3144 .setMIFlag(Flag); 3145 uint32_t PrevShiftAmount = 0; 3146 for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) { 3147 if (NumOfVReg & (1LL << ShiftAmount)) { 3148 if (ShiftAmount) 3149 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3150 .addReg(DestReg, RegState::Kill) 3151 .addImm(ShiftAmount - PrevShiftAmount) 3152 .setMIFlag(Flag); 3153 if (NumOfVReg >> (ShiftAmount + 1)) 3154 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc) 3155 .addReg(Acc, RegState::Kill) 3156 .addReg(DestReg) 3157 .setMIFlag(Flag); 3158 PrevShiftAmount = ShiftAmount; 3159 } 3160 } 3161 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3162 .addReg(DestReg, RegState::Kill) 3163 .addReg(Acc) 3164 .setMIFlag(Flag); 3165 } 3166 } 3167 3168 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3169 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3170 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3171 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3172 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3173 return ArrayRef(TargetFlags); 3174 } 3175 3176 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3177 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3178 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3179 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 3180 } 3181 3182 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 3183 bool RISCV::isZEXT_W(const MachineInstr &MI) { 3184 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 3185 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 3186 } 3187 3188 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 3189 bool RISCV::isZEXT_B(const MachineInstr &MI) { 3190 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 3191 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 3192 } 3193 3194 static bool isRVVWholeLoadStore(unsigned Opcode) { 3195 switch (Opcode) { 3196 default: 3197 return false; 3198 case RISCV::VS1R_V: 3199 case RISCV::VS2R_V: 3200 case RISCV::VS4R_V: 3201 case RISCV::VS8R_V: 3202 case RISCV::VL1RE8_V: 3203 case RISCV::VL2RE8_V: 3204 case RISCV::VL4RE8_V: 3205 case RISCV::VL8RE8_V: 3206 case RISCV::VL1RE16_V: 3207 case RISCV::VL2RE16_V: 3208 case RISCV::VL4RE16_V: 3209 case RISCV::VL8RE16_V: 3210 case RISCV::VL1RE32_V: 3211 case RISCV::VL2RE32_V: 3212 case RISCV::VL4RE32_V: 3213 case RISCV::VL8RE32_V: 3214 case RISCV::VL1RE64_V: 3215 case RISCV::VL2RE64_V: 3216 case RISCV::VL4RE64_V: 3217 case RISCV::VL8RE64_V: 3218 return true; 3219 } 3220 } 3221 3222 bool RISCV::isRVVSpill(const MachineInstr &MI) { 3223 // RVV lacks any support for immediate addressing for stack addresses, so be 3224 // conservative. 3225 unsigned Opcode = MI.getOpcode(); 3226 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 3227 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 3228 return false; 3229 return true; 3230 } 3231 3232 std::optional<std::pair<unsigned, unsigned>> 3233 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 3234 switch (Opcode) { 3235 default: 3236 return std::nullopt; 3237 case RISCV::PseudoVSPILL2_M1: 3238 case RISCV::PseudoVRELOAD2_M1: 3239 return std::make_pair(2u, 1u); 3240 case RISCV::PseudoVSPILL2_M2: 3241 case RISCV::PseudoVRELOAD2_M2: 3242 return std::make_pair(2u, 2u); 3243 case RISCV::PseudoVSPILL2_M4: 3244 case RISCV::PseudoVRELOAD2_M4: 3245 return std::make_pair(2u, 4u); 3246 case RISCV::PseudoVSPILL3_M1: 3247 case RISCV::PseudoVRELOAD3_M1: 3248 return std::make_pair(3u, 1u); 3249 case RISCV::PseudoVSPILL3_M2: 3250 case RISCV::PseudoVRELOAD3_M2: 3251 return std::make_pair(3u, 2u); 3252 case RISCV::PseudoVSPILL4_M1: 3253 case RISCV::PseudoVRELOAD4_M1: 3254 return std::make_pair(4u, 1u); 3255 case RISCV::PseudoVSPILL4_M2: 3256 case RISCV::PseudoVRELOAD4_M2: 3257 return std::make_pair(4u, 2u); 3258 case RISCV::PseudoVSPILL5_M1: 3259 case RISCV::PseudoVRELOAD5_M1: 3260 return std::make_pair(5u, 1u); 3261 case RISCV::PseudoVSPILL6_M1: 3262 case RISCV::PseudoVRELOAD6_M1: 3263 return std::make_pair(6u, 1u); 3264 case RISCV::PseudoVSPILL7_M1: 3265 case RISCV::PseudoVRELOAD7_M1: 3266 return std::make_pair(7u, 1u); 3267 case RISCV::PseudoVSPILL8_M1: 3268 case RISCV::PseudoVRELOAD8_M1: 3269 return std::make_pair(8u, 1u); 3270 } 3271 } 3272 3273 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 3274 return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) && 3275 !MI.isInlineAsm(); 3276 } 3277 3278 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 3279 int16_t MI1FrmOpIdx = 3280 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 3281 int16_t MI2FrmOpIdx = 3282 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 3283 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 3284 return false; 3285 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 3286 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 3287 return FrmOp1.getImm() == FrmOp2.getImm(); 3288 } 3289 3290 std::optional<unsigned> 3291 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 3292 // TODO: Handle Zvbb instructions 3293 switch (Opcode) { 3294 default: 3295 return std::nullopt; 3296 3297 // 11.6. Vector Single-Width Shift Instructions 3298 case RISCV::VSLL_VX: 3299 case RISCV::VSRL_VX: 3300 case RISCV::VSRA_VX: 3301 // 12.4. Vector Single-Width Scaling Shift Instructions 3302 case RISCV::VSSRL_VX: 3303 case RISCV::VSSRA_VX: 3304 // Only the low lg2(SEW) bits of the shift-amount value are used. 3305 return Log2SEW; 3306 3307 // 11.7 Vector Narrowing Integer Right Shift Instructions 3308 case RISCV::VNSRL_WX: 3309 case RISCV::VNSRA_WX: 3310 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 3311 case RISCV::VNCLIPU_WX: 3312 case RISCV::VNCLIP_WX: 3313 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 3314 return Log2SEW + 1; 3315 3316 // 11.1. Vector Single-Width Integer Add and Subtract 3317 case RISCV::VADD_VX: 3318 case RISCV::VSUB_VX: 3319 case RISCV::VRSUB_VX: 3320 // 11.2. Vector Widening Integer Add/Subtract 3321 case RISCV::VWADDU_VX: 3322 case RISCV::VWSUBU_VX: 3323 case RISCV::VWADD_VX: 3324 case RISCV::VWSUB_VX: 3325 case RISCV::VWADDU_WX: 3326 case RISCV::VWSUBU_WX: 3327 case RISCV::VWADD_WX: 3328 case RISCV::VWSUB_WX: 3329 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 3330 case RISCV::VADC_VXM: 3331 case RISCV::VADC_VIM: 3332 case RISCV::VMADC_VXM: 3333 case RISCV::VMADC_VIM: 3334 case RISCV::VMADC_VX: 3335 case RISCV::VSBC_VXM: 3336 case RISCV::VMSBC_VXM: 3337 case RISCV::VMSBC_VX: 3338 // 11.5 Vector Bitwise Logical Instructions 3339 case RISCV::VAND_VX: 3340 case RISCV::VOR_VX: 3341 case RISCV::VXOR_VX: 3342 // 11.8. Vector Integer Compare Instructions 3343 case RISCV::VMSEQ_VX: 3344 case RISCV::VMSNE_VX: 3345 case RISCV::VMSLTU_VX: 3346 case RISCV::VMSLT_VX: 3347 case RISCV::VMSLEU_VX: 3348 case RISCV::VMSLE_VX: 3349 case RISCV::VMSGTU_VX: 3350 case RISCV::VMSGT_VX: 3351 // 11.9. Vector Integer Min/Max Instructions 3352 case RISCV::VMINU_VX: 3353 case RISCV::VMIN_VX: 3354 case RISCV::VMAXU_VX: 3355 case RISCV::VMAX_VX: 3356 // 11.10. Vector Single-Width Integer Multiply Instructions 3357 case RISCV::VMUL_VX: 3358 case RISCV::VMULH_VX: 3359 case RISCV::VMULHU_VX: 3360 case RISCV::VMULHSU_VX: 3361 // 11.11. Vector Integer Divide Instructions 3362 case RISCV::VDIVU_VX: 3363 case RISCV::VDIV_VX: 3364 case RISCV::VREMU_VX: 3365 case RISCV::VREM_VX: 3366 // 11.12. Vector Widening Integer Multiply Instructions 3367 case RISCV::VWMUL_VX: 3368 case RISCV::VWMULU_VX: 3369 case RISCV::VWMULSU_VX: 3370 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 3371 case RISCV::VMACC_VX: 3372 case RISCV::VNMSAC_VX: 3373 case RISCV::VMADD_VX: 3374 case RISCV::VNMSUB_VX: 3375 // 11.14. Vector Widening Integer Multiply-Add Instructions 3376 case RISCV::VWMACCU_VX: 3377 case RISCV::VWMACC_VX: 3378 case RISCV::VWMACCSU_VX: 3379 case RISCV::VWMACCUS_VX: 3380 // 11.15. Vector Integer Merge Instructions 3381 case RISCV::VMERGE_VXM: 3382 // 11.16. Vector Integer Move Instructions 3383 case RISCV::VMV_V_X: 3384 // 12.1. Vector Single-Width Saturating Add and Subtract 3385 case RISCV::VSADDU_VX: 3386 case RISCV::VSADD_VX: 3387 case RISCV::VSSUBU_VX: 3388 case RISCV::VSSUB_VX: 3389 // 12.2. Vector Single-Width Averaging Add and Subtract 3390 case RISCV::VAADDU_VX: 3391 case RISCV::VAADD_VX: 3392 case RISCV::VASUBU_VX: 3393 case RISCV::VASUB_VX: 3394 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 3395 case RISCV::VSMUL_VX: 3396 // 16.1. Integer Scalar Move Instructions 3397 case RISCV::VMV_S_X: 3398 return 1U << Log2SEW; 3399 } 3400 } 3401 3402 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 3403 const RISCVVPseudosTable::PseudoInfo *RVV = 3404 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 3405 if (!RVV) 3406 return 0; 3407 return RVV->BaseInstr; 3408 } 3409