1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVSubtarget.h" 18 #include "RISCVTargetMachine.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineTraceMetrics.h" 30 #include "llvm/CodeGen/RegisterScavenging.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/MC/MCInstBuilder.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/ErrorHandling.h" 36 37 using namespace llvm; 38 39 #define GEN_CHECK_COMPRESS_INSTR 40 #include "RISCVGenCompressInstEmitter.inc" 41 42 #define GET_INSTRINFO_CTOR_DTOR 43 #define GET_INSTRINFO_NAMED_OPS 44 #include "RISCVGenInstrInfo.inc" 45 46 static cl::opt<bool> PreferWholeRegisterMove( 47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 48 cl::desc("Prefer whole register move for vector registers.")); 49 50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 51 "riscv-force-machine-combiner-strategy", cl::Hidden, 52 cl::desc("Force machine combiner to use a specific strategy for machine " 53 "trace metrics evaluation."), 54 cl::init(MachineTraceStrategy::TS_NumStrategies), 55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 56 "Local strategy."), 57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 58 "MinInstrCount strategy."))); 59 60 namespace llvm::RISCVVPseudosTable { 61 62 using namespace RISCV; 63 64 #define GET_RISCVVPseudosTable_IMPL 65 #include "RISCVGenSearchableTables.inc" 66 67 } // namespace llvm::RISCVVPseudosTable 68 69 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 70 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 71 STI(STI) {} 72 73 MCInst RISCVInstrInfo::getNop() const { 74 if (STI.hasStdExtCOrZca()) 75 return MCInstBuilder(RISCV::C_NOP); 76 return MCInstBuilder(RISCV::ADDI) 77 .addReg(RISCV::X0) 78 .addReg(RISCV::X0) 79 .addImm(0); 80 } 81 82 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 83 int &FrameIndex) const { 84 unsigned Dummy; 85 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 86 } 87 88 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 89 int &FrameIndex, 90 unsigned &MemBytes) const { 91 switch (MI.getOpcode()) { 92 default: 93 return 0; 94 case RISCV::LB: 95 case RISCV::LBU: 96 MemBytes = 1; 97 break; 98 case RISCV::LH: 99 case RISCV::LHU: 100 case RISCV::FLH: 101 MemBytes = 2; 102 break; 103 case RISCV::LW: 104 case RISCV::FLW: 105 case RISCV::LWU: 106 MemBytes = 4; 107 break; 108 case RISCV::LD: 109 case RISCV::FLD: 110 MemBytes = 8; 111 break; 112 } 113 114 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 115 MI.getOperand(2).getImm() == 0) { 116 FrameIndex = MI.getOperand(1).getIndex(); 117 return MI.getOperand(0).getReg(); 118 } 119 120 return 0; 121 } 122 123 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 124 int &FrameIndex) const { 125 unsigned Dummy; 126 return isStoreToStackSlot(MI, FrameIndex, Dummy); 127 } 128 129 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 130 int &FrameIndex, 131 unsigned &MemBytes) const { 132 switch (MI.getOpcode()) { 133 default: 134 return 0; 135 case RISCV::SB: 136 MemBytes = 1; 137 break; 138 case RISCV::SH: 139 case RISCV::FSH: 140 MemBytes = 2; 141 break; 142 case RISCV::SW: 143 case RISCV::FSW: 144 MemBytes = 4; 145 break; 146 case RISCV::SD: 147 case RISCV::FSD: 148 MemBytes = 8; 149 break; 150 } 151 152 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 153 MI.getOperand(2).getImm() == 0) { 154 FrameIndex = MI.getOperand(1).getIndex(); 155 return MI.getOperand(0).getReg(); 156 } 157 158 return 0; 159 } 160 161 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 162 unsigned NumRegs) { 163 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 164 } 165 166 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 167 const MachineBasicBlock &MBB, 168 MachineBasicBlock::const_iterator MBBI, 169 MachineBasicBlock::const_iterator &DefMBBI, 170 RISCVII::VLMUL LMul) { 171 if (PreferWholeRegisterMove) 172 return false; 173 174 assert(MBBI->getOpcode() == TargetOpcode::COPY && 175 "Unexpected COPY instruction."); 176 Register SrcReg = MBBI->getOperand(1).getReg(); 177 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 178 179 bool FoundDef = false; 180 bool FirstVSetVLI = false; 181 unsigned FirstSEW = 0; 182 while (MBBI != MBB.begin()) { 183 --MBBI; 184 if (MBBI->isMetaInstruction()) 185 continue; 186 187 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 188 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 189 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 190 // There is a vsetvli between COPY and source define instruction. 191 // vy = def_vop ... (producing instruction) 192 // ... 193 // vsetvli 194 // ... 195 // vx = COPY vy 196 if (!FoundDef) { 197 if (!FirstVSetVLI) { 198 FirstVSetVLI = true; 199 unsigned FirstVType = MBBI->getOperand(2).getImm(); 200 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 201 FirstSEW = RISCVVType::getSEW(FirstVType); 202 // The first encountered vsetvli must have the same lmul as the 203 // register class of COPY. 204 if (FirstLMul != LMul) 205 return false; 206 } 207 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 208 // define instruction. 209 if (MBBI->getOperand(0).getReg() != RISCV::X0) 210 return false; 211 if (MBBI->getOperand(1).isImm()) 212 return false; 213 if (MBBI->getOperand(1).getReg() != RISCV::X0) 214 return false; 215 continue; 216 } 217 218 // MBBI is the first vsetvli before the producing instruction. 219 unsigned VType = MBBI->getOperand(2).getImm(); 220 // If there is a vsetvli between COPY and the producing instruction. 221 if (FirstVSetVLI) { 222 // If SEW is different, return false. 223 if (RISCVVType::getSEW(VType) != FirstSEW) 224 return false; 225 } 226 227 // If the vsetvli is tail undisturbed, keep the whole register move. 228 if (!RISCVVType::isTailAgnostic(VType)) 229 return false; 230 231 // The checking is conservative. We only have register classes for 232 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 233 // for fractional LMUL operations. However, we could not use the vsetvli 234 // lmul for widening operations. The result of widening operation is 235 // 2 x LMUL. 236 return LMul == RISCVVType::getVLMUL(VType); 237 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 238 return false; 239 } else if (MBBI->getNumDefs()) { 240 // Check all the instructions which will change VL. 241 // For example, vleff has implicit def VL. 242 if (MBBI->modifiesRegister(RISCV::VL)) 243 return false; 244 245 // Only converting whole register copies to vmv.v.v when the defining 246 // value appears in the explicit operands. 247 for (const MachineOperand &MO : MBBI->explicit_operands()) { 248 if (!MO.isReg() || !MO.isDef()) 249 continue; 250 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 251 // We only permit the source of COPY has the same LMUL as the defined 252 // operand. 253 // There are cases we need to keep the whole register copy if the LMUL 254 // is different. 255 // For example, 256 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 257 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 258 // # The COPY may be created by vlmul_trunc intrinsic. 259 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 260 // 261 // After widening, the valid value will be 4 x e32 elements. If we 262 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 263 // FIXME: The COPY of subregister of Zvlsseg register will not be able 264 // to convert to vmv.v.[v|i] under the constraint. 265 if (MO.getReg() != SrcReg) 266 return false; 267 268 // In widening reduction instructions with LMUL_1 input vector case, 269 // only checking the LMUL is insufficient due to reduction result is 270 // always LMUL_1. 271 // For example, 272 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 273 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 274 // $v26 = COPY killed renamable $v8 275 // After widening, The valid value will be 1 x e16 elements. If we 276 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 277 uint64_t TSFlags = MBBI->getDesc().TSFlags; 278 if (RISCVII::isRVVWideningReduction(TSFlags)) 279 return false; 280 281 // If the producing instruction does not depend on vsetvli, do not 282 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 283 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 284 return false; 285 286 // Found the definition. 287 FoundDef = true; 288 DefMBBI = MBBI; 289 break; 290 } 291 } 292 } 293 } 294 295 return false; 296 } 297 298 void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, 299 MachineBasicBlock::iterator MBBI, 300 const DebugLoc &DL, MCRegister DstReg, 301 MCRegister SrcReg, bool KillSrc, 302 unsigned Opc, unsigned NF) const { 303 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 304 305 RISCVII::VLMUL LMul; 306 unsigned SubRegIdx; 307 unsigned VVOpc, VIOpc; 308 switch (Opc) { 309 default: 310 llvm_unreachable("Impossible LMUL for vector register copy."); 311 case RISCV::VMV1R_V: 312 LMul = RISCVII::LMUL_1; 313 SubRegIdx = RISCV::sub_vrm1_0; 314 VVOpc = RISCV::PseudoVMV_V_V_M1; 315 VIOpc = RISCV::PseudoVMV_V_I_M1; 316 break; 317 case RISCV::VMV2R_V: 318 LMul = RISCVII::LMUL_2; 319 SubRegIdx = RISCV::sub_vrm2_0; 320 VVOpc = RISCV::PseudoVMV_V_V_M2; 321 VIOpc = RISCV::PseudoVMV_V_I_M2; 322 break; 323 case RISCV::VMV4R_V: 324 LMul = RISCVII::LMUL_4; 325 SubRegIdx = RISCV::sub_vrm4_0; 326 VVOpc = RISCV::PseudoVMV_V_V_M4; 327 VIOpc = RISCV::PseudoVMV_V_I_M4; 328 break; 329 case RISCV::VMV8R_V: 330 assert(NF == 1); 331 LMul = RISCVII::LMUL_8; 332 SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. 333 VVOpc = RISCV::PseudoVMV_V_V_M8; 334 VIOpc = RISCV::PseudoVMV_V_I_M8; 335 break; 336 } 337 338 bool UseVMV_V_V = false; 339 bool UseVMV_V_I = false; 340 MachineBasicBlock::const_iterator DefMBBI; 341 if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 342 UseVMV_V_V = true; 343 Opc = VVOpc; 344 345 if (DefMBBI->getOpcode() == VIOpc) { 346 UseVMV_V_I = true; 347 Opc = VIOpc; 348 } 349 } 350 351 if (NF == 1) { 352 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); 353 if (UseVMV_V_V) 354 MIB.addReg(DstReg, RegState::Undef); 355 if (UseVMV_V_I) 356 MIB = MIB.add(DefMBBI->getOperand(2)); 357 else 358 MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); 359 if (UseVMV_V_V) { 360 const MCInstrDesc &Desc = DefMBBI->getDesc(); 361 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 362 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 363 MIB.addImm(0); // tu, mu 364 MIB.addReg(RISCV::VL, RegState::Implicit); 365 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 366 } 367 return; 368 } 369 370 int I = 0, End = NF, Incr = 1; 371 unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); 372 unsigned DstEncoding = TRI->getEncodingValue(DstReg); 373 unsigned LMulVal; 374 bool Fractional; 375 std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); 376 assert(!Fractional && "It is impossible be fractional lmul here."); 377 if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { 378 I = NF - 1; 379 End = -1; 380 Incr = -1; 381 } 382 383 for (; I != End; I += Incr) { 384 auto MIB = 385 BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); 386 if (UseVMV_V_V) 387 MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); 388 if (UseVMV_V_I) 389 MIB = MIB.add(DefMBBI->getOperand(2)); 390 else 391 MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), 392 getKillRegState(KillSrc)); 393 if (UseVMV_V_V) { 394 const MCInstrDesc &Desc = DefMBBI->getDesc(); 395 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 396 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 397 MIB.addImm(0); // tu, mu 398 MIB.addReg(RISCV::VL, RegState::Implicit); 399 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 400 } 401 } 402 } 403 404 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 405 MachineBasicBlock::iterator MBBI, 406 const DebugLoc &DL, MCRegister DstReg, 407 MCRegister SrcReg, bool KillSrc) const { 408 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 409 410 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 411 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 412 .addReg(SrcReg, getKillRegState(KillSrc)) 413 .addImm(0); 414 return; 415 } 416 417 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { 418 // Emit an ADDI for both parts of GPRPair. 419 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 420 TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) 421 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), 422 getKillRegState(KillSrc)) 423 .addImm(0); 424 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 425 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) 426 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), 427 getKillRegState(KillSrc)) 428 .addImm(0); 429 return; 430 } 431 432 // Handle copy from csr 433 if (RISCV::VCSRRegClass.contains(SrcReg) && 434 RISCV::GPRRegClass.contains(DstReg)) { 435 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 436 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 437 .addReg(RISCV::X0); 438 return; 439 } 440 441 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 442 unsigned Opc; 443 if (STI.hasStdExtZfh()) { 444 Opc = RISCV::FSGNJ_H; 445 } else { 446 assert(STI.hasStdExtF() && 447 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 448 "Unexpected extensions"); 449 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 450 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 451 &RISCV::FPR32RegClass); 452 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 453 &RISCV::FPR32RegClass); 454 Opc = RISCV::FSGNJ_S; 455 } 456 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 457 .addReg(SrcReg, getKillRegState(KillSrc)) 458 .addReg(SrcReg, getKillRegState(KillSrc)); 459 return; 460 } 461 462 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 463 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 464 .addReg(SrcReg, getKillRegState(KillSrc)) 465 .addReg(SrcReg, getKillRegState(KillSrc)); 466 return; 467 } 468 469 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 470 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 471 .addReg(SrcReg, getKillRegState(KillSrc)) 472 .addReg(SrcReg, getKillRegState(KillSrc)); 473 return; 474 } 475 476 if (RISCV::FPR32RegClass.contains(DstReg) && 477 RISCV::GPRRegClass.contains(SrcReg)) { 478 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 479 .addReg(SrcReg, getKillRegState(KillSrc)); 480 return; 481 } 482 483 if (RISCV::GPRRegClass.contains(DstReg) && 484 RISCV::FPR32RegClass.contains(SrcReg)) { 485 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 486 .addReg(SrcReg, getKillRegState(KillSrc)); 487 return; 488 } 489 490 if (RISCV::FPR64RegClass.contains(DstReg) && 491 RISCV::GPRRegClass.contains(SrcReg)) { 492 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 493 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 494 .addReg(SrcReg, getKillRegState(KillSrc)); 495 return; 496 } 497 498 if (RISCV::GPRRegClass.contains(DstReg) && 499 RISCV::FPR64RegClass.contains(SrcReg)) { 500 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 501 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 502 .addReg(SrcReg, getKillRegState(KillSrc)); 503 return; 504 } 505 506 // VR->VR copies. 507 if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { 508 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); 509 return; 510 } 511 512 if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { 513 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V); 514 return; 515 } 516 517 if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { 518 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V); 519 return; 520 } 521 522 if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { 523 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V); 524 return; 525 } 526 527 if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { 528 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 529 /*NF=*/2); 530 return; 531 } 532 533 if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { 534 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 535 /*NF=*/2); 536 return; 537 } 538 539 if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { 540 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V, 541 /*NF=*/2); 542 return; 543 } 544 545 if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { 546 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 547 /*NF=*/3); 548 return; 549 } 550 551 if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { 552 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 553 /*NF=*/3); 554 return; 555 } 556 557 if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { 558 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 559 /*NF=*/4); 560 return; 561 } 562 563 if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { 564 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 565 /*NF=*/4); 566 return; 567 } 568 569 if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { 570 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 571 /*NF=*/5); 572 return; 573 } 574 575 if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { 576 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 577 /*NF=*/6); 578 return; 579 } 580 581 if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { 582 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 583 /*NF=*/7); 584 return; 585 } 586 587 if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { 588 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 589 /*NF=*/8); 590 return; 591 } 592 593 llvm_unreachable("Impossible reg-to-reg copy"); 594 } 595 596 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 597 MachineBasicBlock::iterator I, 598 Register SrcReg, bool IsKill, int FI, 599 const TargetRegisterClass *RC, 600 const TargetRegisterInfo *TRI, 601 Register VReg) const { 602 MachineFunction *MF = MBB.getParent(); 603 MachineFrameInfo &MFI = MF->getFrameInfo(); 604 605 unsigned Opcode; 606 bool IsScalableVector = true; 607 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 608 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 609 RISCV::SW : RISCV::SD; 610 IsScalableVector = false; 611 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 612 Opcode = RISCV::PseudoRV32ZdinxSD; 613 IsScalableVector = false; 614 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 615 Opcode = RISCV::FSH; 616 IsScalableVector = false; 617 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 618 Opcode = RISCV::FSW; 619 IsScalableVector = false; 620 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 621 Opcode = RISCV::FSD; 622 IsScalableVector = false; 623 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 624 Opcode = RISCV::VS1R_V; 625 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 626 Opcode = RISCV::VS2R_V; 627 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 628 Opcode = RISCV::VS4R_V; 629 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 630 Opcode = RISCV::VS8R_V; 631 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 632 Opcode = RISCV::PseudoVSPILL2_M1; 633 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 634 Opcode = RISCV::PseudoVSPILL2_M2; 635 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 636 Opcode = RISCV::PseudoVSPILL2_M4; 637 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 638 Opcode = RISCV::PseudoVSPILL3_M1; 639 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 640 Opcode = RISCV::PseudoVSPILL3_M2; 641 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 642 Opcode = RISCV::PseudoVSPILL4_M1; 643 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 644 Opcode = RISCV::PseudoVSPILL4_M2; 645 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 646 Opcode = RISCV::PseudoVSPILL5_M1; 647 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 648 Opcode = RISCV::PseudoVSPILL6_M1; 649 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 650 Opcode = RISCV::PseudoVSPILL7_M1; 651 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 652 Opcode = RISCV::PseudoVSPILL8_M1; 653 else 654 llvm_unreachable("Can't store this register to stack slot"); 655 656 if (IsScalableVector) { 657 MachineMemOperand *MMO = MF->getMachineMemOperand( 658 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 659 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 660 661 MFI.setStackID(FI, TargetStackID::ScalableVector); 662 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 663 .addReg(SrcReg, getKillRegState(IsKill)) 664 .addFrameIndex(FI) 665 .addMemOperand(MMO); 666 } else { 667 MachineMemOperand *MMO = MF->getMachineMemOperand( 668 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 669 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 670 671 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 672 .addReg(SrcReg, getKillRegState(IsKill)) 673 .addFrameIndex(FI) 674 .addImm(0) 675 .addMemOperand(MMO); 676 } 677 } 678 679 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 680 MachineBasicBlock::iterator I, 681 Register DstReg, int FI, 682 const TargetRegisterClass *RC, 683 const TargetRegisterInfo *TRI, 684 Register VReg) const { 685 MachineFunction *MF = MBB.getParent(); 686 MachineFrameInfo &MFI = MF->getFrameInfo(); 687 688 unsigned Opcode; 689 bool IsScalableVector = true; 690 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 691 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 692 RISCV::LW : RISCV::LD; 693 IsScalableVector = false; 694 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 695 Opcode = RISCV::PseudoRV32ZdinxLD; 696 IsScalableVector = false; 697 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 698 Opcode = RISCV::FLH; 699 IsScalableVector = false; 700 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 701 Opcode = RISCV::FLW; 702 IsScalableVector = false; 703 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 704 Opcode = RISCV::FLD; 705 IsScalableVector = false; 706 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 707 Opcode = RISCV::VL1RE8_V; 708 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 709 Opcode = RISCV::VL2RE8_V; 710 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 711 Opcode = RISCV::VL4RE8_V; 712 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 713 Opcode = RISCV::VL8RE8_V; 714 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 715 Opcode = RISCV::PseudoVRELOAD2_M1; 716 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 717 Opcode = RISCV::PseudoVRELOAD2_M2; 718 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 719 Opcode = RISCV::PseudoVRELOAD2_M4; 720 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 721 Opcode = RISCV::PseudoVRELOAD3_M1; 722 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 723 Opcode = RISCV::PseudoVRELOAD3_M2; 724 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 725 Opcode = RISCV::PseudoVRELOAD4_M1; 726 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 727 Opcode = RISCV::PseudoVRELOAD4_M2; 728 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 729 Opcode = RISCV::PseudoVRELOAD5_M1; 730 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 731 Opcode = RISCV::PseudoVRELOAD6_M1; 732 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 733 Opcode = RISCV::PseudoVRELOAD7_M1; 734 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 735 Opcode = RISCV::PseudoVRELOAD8_M1; 736 else 737 llvm_unreachable("Can't load this register from stack slot"); 738 739 if (IsScalableVector) { 740 MachineMemOperand *MMO = MF->getMachineMemOperand( 741 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 742 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 743 744 MFI.setStackID(FI, TargetStackID::ScalableVector); 745 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 746 .addFrameIndex(FI) 747 .addMemOperand(MMO); 748 } else { 749 MachineMemOperand *MMO = MF->getMachineMemOperand( 750 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 751 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 752 753 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 754 .addFrameIndex(FI) 755 .addImm(0) 756 .addMemOperand(MMO); 757 } 758 } 759 760 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 761 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 762 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 763 VirtRegMap *VRM) const { 764 const MachineFrameInfo &MFI = MF.getFrameInfo(); 765 766 // The below optimizations narrow the load so they are only valid for little 767 // endian. 768 // TODO: Support big endian by adding an offset into the frame object? 769 if (MF.getDataLayout().isBigEndian()) 770 return nullptr; 771 772 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 773 if (Ops.size() != 1 || Ops[0] != 1) 774 return nullptr; 775 776 unsigned LoadOpc; 777 switch (MI.getOpcode()) { 778 default: 779 if (RISCV::isSEXT_W(MI)) { 780 LoadOpc = RISCV::LW; 781 break; 782 } 783 if (RISCV::isZEXT_W(MI)) { 784 LoadOpc = RISCV::LWU; 785 break; 786 } 787 if (RISCV::isZEXT_B(MI)) { 788 LoadOpc = RISCV::LBU; 789 break; 790 } 791 return nullptr; 792 case RISCV::SEXT_H: 793 LoadOpc = RISCV::LH; 794 break; 795 case RISCV::SEXT_B: 796 LoadOpc = RISCV::LB; 797 break; 798 case RISCV::ZEXT_H_RV32: 799 case RISCV::ZEXT_H_RV64: 800 LoadOpc = RISCV::LHU; 801 break; 802 } 803 804 MachineMemOperand *MMO = MF.getMachineMemOperand( 805 MachinePointerInfo::getFixedStack(MF, FrameIndex), 806 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), 807 MFI.getObjectAlign(FrameIndex)); 808 809 Register DstReg = MI.getOperand(0).getReg(); 810 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 811 DstReg) 812 .addFrameIndex(FrameIndex) 813 .addImm(0) 814 .addMemOperand(MMO); 815 } 816 817 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 818 MachineBasicBlock::iterator MBBI, 819 const DebugLoc &DL, Register DstReg, uint64_t Val, 820 MachineInstr::MIFlag Flag, bool DstRenamable, 821 bool DstIsDead) const { 822 Register SrcReg = RISCV::X0; 823 824 if (!STI.is64Bit() && !isInt<32>(Val)) 825 report_fatal_error("Should only materialize 32-bit constants for RV32"); 826 827 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 828 assert(!Seq.empty()); 829 830 bool SrcRenamable = false; 831 unsigned Num = 0; 832 833 for (const RISCVMatInt::Inst &Inst : Seq) { 834 bool LastItem = ++Num == Seq.size(); 835 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 836 getRenamableRegState(DstRenamable); 837 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 838 getRenamableRegState(SrcRenamable); 839 switch (Inst.getOpndKind()) { 840 case RISCVMatInt::Imm: 841 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 842 .addReg(DstReg, RegState::Define | DstRegState) 843 .addImm(Inst.getImm()) 844 .setMIFlag(Flag); 845 break; 846 case RISCVMatInt::RegX0: 847 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 848 .addReg(DstReg, RegState::Define | DstRegState) 849 .addReg(SrcReg, SrcRegState) 850 .addReg(RISCV::X0) 851 .setMIFlag(Flag); 852 break; 853 case RISCVMatInt::RegReg: 854 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 855 .addReg(DstReg, RegState::Define | DstRegState) 856 .addReg(SrcReg, SrcRegState) 857 .addReg(SrcReg, SrcRegState) 858 .setMIFlag(Flag); 859 break; 860 case RISCVMatInt::RegImm: 861 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 862 .addReg(DstReg, RegState::Define | DstRegState) 863 .addReg(SrcReg, SrcRegState) 864 .addImm(Inst.getImm()) 865 .setMIFlag(Flag); 866 break; 867 } 868 869 // Only the first instruction has X0 as its source. 870 SrcReg = DstReg; 871 SrcRenamable = DstRenamable; 872 } 873 } 874 875 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 876 switch (Opc) { 877 default: 878 return RISCVCC::COND_INVALID; 879 case RISCV::BEQ: 880 return RISCVCC::COND_EQ; 881 case RISCV::BNE: 882 return RISCVCC::COND_NE; 883 case RISCV::BLT: 884 return RISCVCC::COND_LT; 885 case RISCV::BGE: 886 return RISCVCC::COND_GE; 887 case RISCV::BLTU: 888 return RISCVCC::COND_LTU; 889 case RISCV::BGEU: 890 return RISCVCC::COND_GEU; 891 } 892 } 893 894 // The contents of values added to Cond are not examined outside of 895 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 896 // push BranchOpcode, Reg1, Reg2. 897 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 898 SmallVectorImpl<MachineOperand> &Cond) { 899 // Block ends with fall-through condbranch. 900 assert(LastInst.getDesc().isConditionalBranch() && 901 "Unknown conditional branch"); 902 Target = LastInst.getOperand(2).getMBB(); 903 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 904 Cond.push_back(MachineOperand::CreateImm(CC)); 905 Cond.push_back(LastInst.getOperand(0)); 906 Cond.push_back(LastInst.getOperand(1)); 907 } 908 909 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) { 910 switch (CC) { 911 default: 912 llvm_unreachable("Unknown condition code!"); 913 case RISCVCC::COND_EQ: 914 return RISCV::BEQ; 915 case RISCVCC::COND_NE: 916 return RISCV::BNE; 917 case RISCVCC::COND_LT: 918 return RISCV::BLT; 919 case RISCVCC::COND_GE: 920 return RISCV::BGE; 921 case RISCVCC::COND_LTU: 922 return RISCV::BLTU; 923 case RISCVCC::COND_GEU: 924 return RISCV::BGEU; 925 } 926 } 927 928 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { 929 return get(RISCVCC::getBrCond(CC)); 930 } 931 932 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 933 switch (CC) { 934 default: 935 llvm_unreachable("Unrecognized conditional branch"); 936 case RISCVCC::COND_EQ: 937 return RISCVCC::COND_NE; 938 case RISCVCC::COND_NE: 939 return RISCVCC::COND_EQ; 940 case RISCVCC::COND_LT: 941 return RISCVCC::COND_GE; 942 case RISCVCC::COND_GE: 943 return RISCVCC::COND_LT; 944 case RISCVCC::COND_LTU: 945 return RISCVCC::COND_GEU; 946 case RISCVCC::COND_GEU: 947 return RISCVCC::COND_LTU; 948 } 949 } 950 951 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 952 MachineBasicBlock *&TBB, 953 MachineBasicBlock *&FBB, 954 SmallVectorImpl<MachineOperand> &Cond, 955 bool AllowModify) const { 956 TBB = FBB = nullptr; 957 Cond.clear(); 958 959 // If the block has no terminators, it just falls into the block after it. 960 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 961 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 962 return false; 963 964 // Count the number of terminators and find the first unconditional or 965 // indirect branch. 966 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 967 int NumTerminators = 0; 968 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 969 J++) { 970 NumTerminators++; 971 if (J->getDesc().isUnconditionalBranch() || 972 J->getDesc().isIndirectBranch()) { 973 FirstUncondOrIndirectBr = J.getReverse(); 974 } 975 } 976 977 // If AllowModify is true, we can erase any terminators after 978 // FirstUncondOrIndirectBR. 979 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 980 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 981 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 982 NumTerminators--; 983 } 984 I = FirstUncondOrIndirectBr; 985 } 986 987 // We can't handle blocks that end in an indirect branch. 988 if (I->getDesc().isIndirectBranch()) 989 return true; 990 991 // We can't handle Generic branch opcodes from Global ISel. 992 if (I->isPreISelOpcode()) 993 return true; 994 995 // We can't handle blocks with more than 2 terminators. 996 if (NumTerminators > 2) 997 return true; 998 999 // Handle a single unconditional branch. 1000 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 1001 TBB = getBranchDestBlock(*I); 1002 return false; 1003 } 1004 1005 // Handle a single conditional branch. 1006 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 1007 parseCondBranch(*I, TBB, Cond); 1008 return false; 1009 } 1010 1011 // Handle a conditional branch followed by an unconditional branch. 1012 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 1013 I->getDesc().isUnconditionalBranch()) { 1014 parseCondBranch(*std::prev(I), TBB, Cond); 1015 FBB = getBranchDestBlock(*I); 1016 return false; 1017 } 1018 1019 // Otherwise, we can't handle this. 1020 return true; 1021 } 1022 1023 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 1024 int *BytesRemoved) const { 1025 if (BytesRemoved) 1026 *BytesRemoved = 0; 1027 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1028 if (I == MBB.end()) 1029 return 0; 1030 1031 if (!I->getDesc().isUnconditionalBranch() && 1032 !I->getDesc().isConditionalBranch()) 1033 return 0; 1034 1035 // Remove the branch. 1036 if (BytesRemoved) 1037 *BytesRemoved += getInstSizeInBytes(*I); 1038 I->eraseFromParent(); 1039 1040 I = MBB.end(); 1041 1042 if (I == MBB.begin()) 1043 return 1; 1044 --I; 1045 if (!I->getDesc().isConditionalBranch()) 1046 return 1; 1047 1048 // Remove the branch. 1049 if (BytesRemoved) 1050 *BytesRemoved += getInstSizeInBytes(*I); 1051 I->eraseFromParent(); 1052 return 2; 1053 } 1054 1055 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1056 // the number of instructions inserted. 1057 unsigned RISCVInstrInfo::insertBranch( 1058 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1059 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1060 if (BytesAdded) 1061 *BytesAdded = 0; 1062 1063 // Shouldn't be a fall through. 1064 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1065 assert((Cond.size() == 3 || Cond.size() == 0) && 1066 "RISC-V branch conditions have two components!"); 1067 1068 // Unconditional branch. 1069 if (Cond.empty()) { 1070 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1071 if (BytesAdded) 1072 *BytesAdded += getInstSizeInBytes(MI); 1073 return 1; 1074 } 1075 1076 // Either a one or two-way conditional branch. 1077 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1078 MachineInstr &CondMI = 1079 *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB); 1080 if (BytesAdded) 1081 *BytesAdded += getInstSizeInBytes(CondMI); 1082 1083 // One-way conditional branch. 1084 if (!FBB) 1085 return 1; 1086 1087 // Two-way conditional branch. 1088 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1089 if (BytesAdded) 1090 *BytesAdded += getInstSizeInBytes(MI); 1091 return 2; 1092 } 1093 1094 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1095 MachineBasicBlock &DestBB, 1096 MachineBasicBlock &RestoreBB, 1097 const DebugLoc &DL, int64_t BrOffset, 1098 RegScavenger *RS) const { 1099 assert(RS && "RegScavenger required for long branching"); 1100 assert(MBB.empty() && 1101 "new block should be inserted for expanding unconditional branch"); 1102 assert(MBB.pred_size() == 1); 1103 assert(RestoreBB.empty() && 1104 "restore block should be inserted for restoring clobbered registers"); 1105 1106 MachineFunction *MF = MBB.getParent(); 1107 MachineRegisterInfo &MRI = MF->getRegInfo(); 1108 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1109 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1110 1111 if (!isInt<32>(BrOffset)) 1112 report_fatal_error( 1113 "Branch offsets outside of the signed 32-bit range not supported"); 1114 1115 // FIXME: A virtual register must be used initially, as the register 1116 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1117 // uses the same workaround). 1118 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 1119 auto II = MBB.end(); 1120 // We may also update the jump target to RestoreBB later. 1121 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1122 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1123 .addMBB(&DestBB, RISCVII::MO_CALL); 1124 1125 RS->enterBasicBlockEnd(MBB); 1126 Register TmpGPR = 1127 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1128 /*RestoreAfter=*/false, /*SpAdj=*/0, 1129 /*AllowSpill=*/false); 1130 if (TmpGPR != RISCV::NoRegister) 1131 RS->setRegUsed(TmpGPR); 1132 else { 1133 // The case when there is no scavenged register needs special handling. 1134 1135 // Pick s11 because it doesn't make a difference. 1136 TmpGPR = RISCV::X27; 1137 1138 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1139 if (FrameIndex == -1) 1140 report_fatal_error("underestimated function size"); 1141 1142 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1143 &RISCV::GPRRegClass, TRI, Register()); 1144 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1145 /*SpAdj=*/0, /*FIOperandNum=*/1); 1146 1147 MI.getOperand(1).setMBB(&RestoreBB); 1148 1149 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1150 &RISCV::GPRRegClass, TRI, Register()); 1151 TRI->eliminateFrameIndex(RestoreBB.back(), 1152 /*SpAdj=*/0, /*FIOperandNum=*/1); 1153 } 1154 1155 MRI.replaceRegWith(ScratchReg, TmpGPR); 1156 MRI.clearVirtRegs(); 1157 } 1158 1159 bool RISCVInstrInfo::reverseBranchCondition( 1160 SmallVectorImpl<MachineOperand> &Cond) const { 1161 assert((Cond.size() == 3) && "Invalid branch condition!"); 1162 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1163 Cond[0].setImm(getOppositeBranchCondition(CC)); 1164 return false; 1165 } 1166 1167 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1168 MachineBasicBlock *MBB = MI.getParent(); 1169 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1170 1171 MachineBasicBlock *TBB, *FBB; 1172 SmallVector<MachineOperand, 3> Cond; 1173 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1174 return false; 1175 (void)FBB; 1176 1177 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1178 assert(CC != RISCVCC::COND_INVALID); 1179 1180 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1181 return false; 1182 1183 // For two constants C0 and C1 from 1184 // ``` 1185 // li Y, C0 1186 // li Z, C1 1187 // ``` 1188 // 1. if C1 = C0 + 1 1189 // we can turn: 1190 // (a) blt Y, X -> bge X, Z 1191 // (b) bge Y, X -> blt X, Z 1192 // 1193 // 2. if C1 = C0 - 1 1194 // we can turn: 1195 // (a) blt X, Y -> bge Z, X 1196 // (b) bge X, Y -> blt Z, X 1197 // 1198 // To make sure this optimization is really beneficial, we only 1199 // optimize for cases where Y had only one use (i.e. only used by the branch). 1200 1201 // Right now we only care about LI (i.e. ADDI x0, imm) 1202 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1203 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1204 MI->getOperand(1).getReg() == RISCV::X0) { 1205 Imm = MI->getOperand(2).getImm(); 1206 return true; 1207 } 1208 return false; 1209 }; 1210 // Either a load from immediate instruction or X0. 1211 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1212 if (!Op.isReg()) 1213 return false; 1214 Register Reg = Op.getReg(); 1215 if (Reg == RISCV::X0) { 1216 Imm = 0; 1217 return true; 1218 } 1219 if (!Reg.isVirtual()) 1220 return false; 1221 return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm); 1222 }; 1223 1224 MachineOperand &LHS = MI.getOperand(0); 1225 MachineOperand &RHS = MI.getOperand(1); 1226 // Try to find the register for constant Z; return 1227 // invalid register otherwise. 1228 auto searchConst = [&](int64_t C1) -> Register { 1229 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1230 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1231 int64_t Imm; 1232 return isLoadImm(&I, Imm) && Imm == C1; 1233 }); 1234 if (DefC1 != E) 1235 return DefC1->getOperand(0).getReg(); 1236 1237 return Register(); 1238 }; 1239 1240 bool Modify = false; 1241 int64_t C0; 1242 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1243 // Might be case 1. 1244 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1245 // to worry about unsigned overflow here) 1246 if (C0 < INT64_MAX) 1247 if (Register RegZ = searchConst(C0 + 1)) { 1248 reverseBranchCondition(Cond); 1249 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1250 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1251 // We might extend the live range of Z, clear its kill flag to 1252 // account for this. 1253 MRI.clearKillFlags(RegZ); 1254 Modify = true; 1255 } 1256 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1257 // Might be case 2. 1258 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1259 // when C0 is zero. 1260 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1261 if (Register RegZ = searchConst(C0 - 1)) { 1262 reverseBranchCondition(Cond); 1263 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1264 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1265 // We might extend the live range of Z, clear its kill flag to 1266 // account for this. 1267 MRI.clearKillFlags(RegZ); 1268 Modify = true; 1269 } 1270 } 1271 1272 if (!Modify) 1273 return false; 1274 1275 // Build the new branch and remove the old one. 1276 BuildMI(*MBB, MI, MI.getDebugLoc(), 1277 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1278 .add(Cond[1]) 1279 .add(Cond[2]) 1280 .addMBB(TBB); 1281 MI.eraseFromParent(); 1282 1283 return true; 1284 } 1285 1286 MachineBasicBlock * 1287 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1288 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1289 // The branch target is always the last operand. 1290 int NumOp = MI.getNumExplicitOperands(); 1291 return MI.getOperand(NumOp - 1).getMBB(); 1292 } 1293 1294 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1295 int64_t BrOffset) const { 1296 unsigned XLen = STI.getXLen(); 1297 // Ideally we could determine the supported branch offset from the 1298 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1299 // PseudoBR. 1300 switch (BranchOp) { 1301 default: 1302 llvm_unreachable("Unexpected opcode!"); 1303 case RISCV::BEQ: 1304 case RISCV::BNE: 1305 case RISCV::BLT: 1306 case RISCV::BGE: 1307 case RISCV::BLTU: 1308 case RISCV::BGEU: 1309 return isIntN(13, BrOffset); 1310 case RISCV::JAL: 1311 case RISCV::PseudoBR: 1312 return isIntN(21, BrOffset); 1313 case RISCV::PseudoJump: 1314 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1315 } 1316 } 1317 1318 // If the operation has a predicated pseudo instruction, return the pseudo 1319 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1320 // TODO: Support more operations. 1321 unsigned getPredicatedOpcode(unsigned Opcode) { 1322 switch (Opcode) { 1323 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1324 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1325 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1326 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1327 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1328 case RISCV::AND: return RISCV::PseudoCCAND; break; 1329 case RISCV::OR: return RISCV::PseudoCCOR; break; 1330 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1331 1332 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1333 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1334 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1335 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1336 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1337 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1338 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1339 1340 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1341 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1342 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1343 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1344 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1345 1346 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1347 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1348 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1349 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1350 1351 case RISCV::ANDN: return RISCV::PseudoCCANDN; break; 1352 case RISCV::ORN: return RISCV::PseudoCCORN; break; 1353 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; 1354 } 1355 1356 return RISCV::INSTRUCTION_LIST_END; 1357 } 1358 1359 /// Identify instructions that can be folded into a CCMOV instruction, and 1360 /// return the defining instruction. 1361 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1362 const MachineRegisterInfo &MRI, 1363 const TargetInstrInfo *TII) { 1364 if (!Reg.isVirtual()) 1365 return nullptr; 1366 if (!MRI.hasOneNonDBGUse(Reg)) 1367 return nullptr; 1368 MachineInstr *MI = MRI.getVRegDef(Reg); 1369 if (!MI) 1370 return nullptr; 1371 // Check if MI can be predicated and folded into the CCMOV. 1372 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1373 return nullptr; 1374 // Don't predicate li idiom. 1375 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1376 MI->getOperand(1).getReg() == RISCV::X0) 1377 return nullptr; 1378 // Check if MI has any other defs or physreg uses. 1379 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1380 // Reject frame index operands, PEI can't handle the predicated pseudos. 1381 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1382 return nullptr; 1383 if (!MO.isReg()) 1384 continue; 1385 // MI can't have any tied operands, that would conflict with predication. 1386 if (MO.isTied()) 1387 return nullptr; 1388 if (MO.isDef()) 1389 return nullptr; 1390 // Allow constant physregs. 1391 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1392 return nullptr; 1393 } 1394 bool DontMoveAcrossStores = true; 1395 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1396 return nullptr; 1397 return MI; 1398 } 1399 1400 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1401 SmallVectorImpl<MachineOperand> &Cond, 1402 unsigned &TrueOp, unsigned &FalseOp, 1403 bool &Optimizable) const { 1404 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1405 "Unknown select instruction"); 1406 // CCMOV operands: 1407 // 0: Def. 1408 // 1: LHS of compare. 1409 // 2: RHS of compare. 1410 // 3: Condition code. 1411 // 4: False use. 1412 // 5: True use. 1413 TrueOp = 5; 1414 FalseOp = 4; 1415 Cond.push_back(MI.getOperand(1)); 1416 Cond.push_back(MI.getOperand(2)); 1417 Cond.push_back(MI.getOperand(3)); 1418 // We can only fold when we support short forward branch opt. 1419 Optimizable = STI.hasShortForwardBranchOpt(); 1420 return false; 1421 } 1422 1423 MachineInstr * 1424 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1425 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1426 bool PreferFalse) const { 1427 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1428 "Unknown select instruction"); 1429 if (!STI.hasShortForwardBranchOpt()) 1430 return nullptr; 1431 1432 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1433 MachineInstr *DefMI = 1434 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1435 bool Invert = !DefMI; 1436 if (!DefMI) 1437 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1438 if (!DefMI) 1439 return nullptr; 1440 1441 // Find new register class to use. 1442 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1443 Register DestReg = MI.getOperand(0).getReg(); 1444 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1445 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1446 return nullptr; 1447 1448 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1449 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1450 1451 // Create a new predicated version of DefMI. 1452 MachineInstrBuilder NewMI = 1453 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1454 1455 // Copy the condition portion. 1456 NewMI.add(MI.getOperand(1)); 1457 NewMI.add(MI.getOperand(2)); 1458 1459 // Add condition code, inverting if necessary. 1460 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1461 if (Invert) 1462 CC = RISCVCC::getOppositeBranchCondition(CC); 1463 NewMI.addImm(CC); 1464 1465 // Copy the false register. 1466 NewMI.add(FalseReg); 1467 1468 // Copy all the DefMI operands. 1469 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1470 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1471 NewMI.add(DefMI->getOperand(i)); 1472 1473 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1474 SeenMIs.insert(NewMI); 1475 SeenMIs.erase(DefMI); 1476 1477 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1478 // DefMI would be invalid when tranferred inside the loop. Checking for a 1479 // loop is expensive, but at least remove kill flags if they are in different 1480 // BBs. 1481 if (DefMI->getParent() != MI.getParent()) 1482 NewMI->clearKillInfo(); 1483 1484 // The caller will erase MI, but not DefMI. 1485 DefMI->eraseFromParent(); 1486 return NewMI; 1487 } 1488 1489 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1490 if (MI.isMetaInstruction()) 1491 return 0; 1492 1493 unsigned Opcode = MI.getOpcode(); 1494 1495 if (Opcode == TargetOpcode::INLINEASM || 1496 Opcode == TargetOpcode::INLINEASM_BR) { 1497 const MachineFunction &MF = *MI.getParent()->getParent(); 1498 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget()); 1499 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1500 *TM.getMCAsmInfo()); 1501 } 1502 1503 if (!MI.memoperands_empty()) { 1504 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1505 const MachineFunction &MF = *MI.getParent()->getParent(); 1506 const auto &ST = MF.getSubtarget<RISCVSubtarget>(); 1507 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1508 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) { 1509 if (isCompressibleInst(MI, STI)) 1510 return 4; // c.ntl.all + c.load/c.store 1511 return 6; // c.ntl.all + load/store 1512 } 1513 return 8; // ntl.all + load/store 1514 } 1515 } 1516 1517 if (Opcode == TargetOpcode::BUNDLE) 1518 return getInstBundleLength(MI); 1519 1520 if (MI.getParent() && MI.getParent()->getParent()) { 1521 if (isCompressibleInst(MI, STI)) 1522 return 2; 1523 } 1524 1525 switch (Opcode) { 1526 case TargetOpcode::STACKMAP: 1527 // The upper bound for a stackmap intrinsic is the full length of its shadow 1528 return StackMapOpers(&MI).getNumPatchBytes(); 1529 case TargetOpcode::PATCHPOINT: 1530 // The size of the patchpoint intrinsic is the number of bytes requested 1531 return PatchPointOpers(&MI).getNumPatchBytes(); 1532 case TargetOpcode::STATEPOINT: 1533 // The size of the statepoint intrinsic is the number of bytes requested 1534 return StatepointOpers(&MI).getNumPatchBytes(); 1535 default: 1536 return get(Opcode).getSize(); 1537 } 1538 } 1539 1540 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1541 unsigned Size = 0; 1542 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1543 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1544 while (++I != E && I->isInsideBundle()) { 1545 assert(!I->isBundle() && "No nested bundle!"); 1546 Size += getInstSizeInBytes(*I); 1547 } 1548 return Size; 1549 } 1550 1551 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1552 const unsigned Opcode = MI.getOpcode(); 1553 switch (Opcode) { 1554 default: 1555 break; 1556 case RISCV::FSGNJ_D: 1557 case RISCV::FSGNJ_S: 1558 case RISCV::FSGNJ_H: 1559 case RISCV::FSGNJ_D_INX: 1560 case RISCV::FSGNJ_D_IN32X: 1561 case RISCV::FSGNJ_S_INX: 1562 case RISCV::FSGNJ_H_INX: 1563 // The canonical floating-point move is fsgnj rd, rs, rs. 1564 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1565 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1566 case RISCV::ADDI: 1567 case RISCV::ORI: 1568 case RISCV::XORI: 1569 return (MI.getOperand(1).isReg() && 1570 MI.getOperand(1).getReg() == RISCV::X0) || 1571 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1572 } 1573 return MI.isAsCheapAsAMove(); 1574 } 1575 1576 std::optional<DestSourcePair> 1577 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1578 if (MI.isMoveReg()) 1579 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1580 switch (MI.getOpcode()) { 1581 default: 1582 break; 1583 case RISCV::ADDI: 1584 // Operand 1 can be a frameindex but callers expect registers 1585 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1586 MI.getOperand(2).getImm() == 0) 1587 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1588 break; 1589 case RISCV::FSGNJ_D: 1590 case RISCV::FSGNJ_S: 1591 case RISCV::FSGNJ_H: 1592 case RISCV::FSGNJ_D_INX: 1593 case RISCV::FSGNJ_D_IN32X: 1594 case RISCV::FSGNJ_S_INX: 1595 case RISCV::FSGNJ_H_INX: 1596 // The canonical floating-point move is fsgnj rd, rs, rs. 1597 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1598 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1599 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1600 break; 1601 } 1602 return std::nullopt; 1603 } 1604 1605 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1606 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1607 // The option is unused. Choose Local strategy only for in-order cores. When 1608 // scheduling model is unspecified, use MinInstrCount strategy as more 1609 // generic one. 1610 const auto &SchedModel = STI.getSchedModel(); 1611 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1612 ? MachineTraceStrategy::TS_MinInstrCount 1613 : MachineTraceStrategy::TS_Local; 1614 } 1615 // The strategy was forced by the option. 1616 return ForceMachineCombinerStrategy; 1617 } 1618 1619 void RISCVInstrInfo::finalizeInsInstrs( 1620 MachineInstr &Root, MachineCombinerPattern &P, 1621 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1622 int16_t FrmOpIdx = 1623 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1624 if (FrmOpIdx < 0) { 1625 assert(all_of(InsInstrs, 1626 [](MachineInstr *MI) { 1627 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1628 RISCV::OpName::frm) < 0; 1629 }) && 1630 "New instructions require FRM whereas the old one does not have it"); 1631 return; 1632 } 1633 1634 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1635 MachineFunction &MF = *Root.getMF(); 1636 1637 for (auto *NewMI : InsInstrs) { 1638 assert(static_cast<unsigned>(RISCV::getNamedOperandIdx( 1639 NewMI->getOpcode(), RISCV::OpName::frm)) == 1640 NewMI->getNumOperands() && 1641 "Instruction has unexpected number of operands"); 1642 MachineInstrBuilder MIB(MF, NewMI); 1643 MIB.add(FRM); 1644 if (FRM.getImm() == RISCVFPRndMode::DYN) 1645 MIB.addUse(RISCV::FRM, RegState::Implicit); 1646 } 1647 } 1648 1649 static bool isFADD(unsigned Opc) { 1650 switch (Opc) { 1651 default: 1652 return false; 1653 case RISCV::FADD_H: 1654 case RISCV::FADD_S: 1655 case RISCV::FADD_D: 1656 return true; 1657 } 1658 } 1659 1660 static bool isFSUB(unsigned Opc) { 1661 switch (Opc) { 1662 default: 1663 return false; 1664 case RISCV::FSUB_H: 1665 case RISCV::FSUB_S: 1666 case RISCV::FSUB_D: 1667 return true; 1668 } 1669 } 1670 1671 static bool isFMUL(unsigned Opc) { 1672 switch (Opc) { 1673 default: 1674 return false; 1675 case RISCV::FMUL_H: 1676 case RISCV::FMUL_S: 1677 case RISCV::FMUL_D: 1678 return true; 1679 } 1680 } 1681 1682 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1683 bool &Commuted) const { 1684 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1685 return false; 1686 1687 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1688 unsigned OperandIdx = Commuted ? 2 : 1; 1689 const MachineInstr &Sibling = 1690 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1691 1692 int16_t InstFrmOpIdx = 1693 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1694 int16_t SiblingFrmOpIdx = 1695 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1696 1697 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1698 RISCV::hasEqualFRM(Inst, Sibling); 1699 } 1700 1701 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1702 bool Invert) const { 1703 unsigned Opc = Inst.getOpcode(); 1704 if (Invert) { 1705 auto InverseOpcode = getInverseOpcode(Opc); 1706 if (!InverseOpcode) 1707 return false; 1708 Opc = *InverseOpcode; 1709 } 1710 1711 if (isFADD(Opc) || isFMUL(Opc)) 1712 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 1713 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 1714 1715 switch (Opc) { 1716 default: 1717 return false; 1718 case RISCV::ADD: 1719 case RISCV::ADDW: 1720 case RISCV::AND: 1721 case RISCV::OR: 1722 case RISCV::XOR: 1723 // From RISC-V ISA spec, if both the high and low bits of the same product 1724 // are required, then the recommended code sequence is: 1725 // 1726 // MULH[[S]U] rdh, rs1, rs2 1727 // MUL rdl, rs1, rs2 1728 // (source register specifiers must be in same order and rdh cannot be the 1729 // same as rs1 or rs2) 1730 // 1731 // Microarchitectures can then fuse these into a single multiply operation 1732 // instead of performing two separate multiplies. 1733 // MachineCombiner may reassociate MUL operands and lose the fusion 1734 // opportunity. 1735 case RISCV::MUL: 1736 case RISCV::MULW: 1737 case RISCV::MIN: 1738 case RISCV::MINU: 1739 case RISCV::MAX: 1740 case RISCV::MAXU: 1741 case RISCV::FMIN_H: 1742 case RISCV::FMIN_S: 1743 case RISCV::FMIN_D: 1744 case RISCV::FMAX_H: 1745 case RISCV::FMAX_S: 1746 case RISCV::FMAX_D: 1747 return true; 1748 } 1749 1750 return false; 1751 } 1752 1753 std::optional<unsigned> 1754 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 1755 switch (Opcode) { 1756 default: 1757 return std::nullopt; 1758 case RISCV::FADD_H: 1759 return RISCV::FSUB_H; 1760 case RISCV::FADD_S: 1761 return RISCV::FSUB_S; 1762 case RISCV::FADD_D: 1763 return RISCV::FSUB_D; 1764 case RISCV::FSUB_H: 1765 return RISCV::FADD_H; 1766 case RISCV::FSUB_S: 1767 return RISCV::FADD_S; 1768 case RISCV::FSUB_D: 1769 return RISCV::FADD_D; 1770 case RISCV::ADD: 1771 return RISCV::SUB; 1772 case RISCV::SUB: 1773 return RISCV::ADD; 1774 case RISCV::ADDW: 1775 return RISCV::SUBW; 1776 case RISCV::SUBW: 1777 return RISCV::ADDW; 1778 } 1779 } 1780 1781 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 1782 const MachineOperand &MO, 1783 bool DoRegPressureReduce) { 1784 if (!MO.isReg() || !MO.getReg().isVirtual()) 1785 return false; 1786 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1787 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 1788 if (!MI || !isFMUL(MI->getOpcode())) 1789 return false; 1790 1791 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 1792 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 1793 return false; 1794 1795 // Try combining even if fmul has more than one use as it eliminates 1796 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 1797 // for fmul operands, so reject the transformation in register pressure 1798 // reduction mode. 1799 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1800 return false; 1801 1802 // Do not combine instructions from different basic blocks. 1803 if (Root.getParent() != MI->getParent()) 1804 return false; 1805 return RISCV::hasEqualFRM(Root, *MI); 1806 } 1807 1808 static bool 1809 getFPFusedMultiplyPatterns(MachineInstr &Root, 1810 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1811 bool DoRegPressureReduce) { 1812 unsigned Opc = Root.getOpcode(); 1813 bool IsFAdd = isFADD(Opc); 1814 if (!IsFAdd && !isFSUB(Opc)) 1815 return false; 1816 bool Added = false; 1817 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 1818 DoRegPressureReduce)) { 1819 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX 1820 : MachineCombinerPattern::FMSUB); 1821 Added = true; 1822 } 1823 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 1824 DoRegPressureReduce)) { 1825 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA 1826 : MachineCombinerPattern::FNMSUB); 1827 Added = true; 1828 } 1829 return Added; 1830 } 1831 1832 static bool getFPPatterns(MachineInstr &Root, 1833 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1834 bool DoRegPressureReduce) { 1835 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 1836 } 1837 1838 bool RISCVInstrInfo::getMachineCombinerPatterns( 1839 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 1840 bool DoRegPressureReduce) const { 1841 1842 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 1843 return true; 1844 1845 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 1846 DoRegPressureReduce); 1847 } 1848 1849 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, 1850 MachineCombinerPattern Pattern) { 1851 switch (RootOpc) { 1852 default: 1853 llvm_unreachable("Unexpected opcode"); 1854 case RISCV::FADD_H: 1855 return RISCV::FMADD_H; 1856 case RISCV::FADD_S: 1857 return RISCV::FMADD_S; 1858 case RISCV::FADD_D: 1859 return RISCV::FMADD_D; 1860 case RISCV::FSUB_H: 1861 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 1862 : RISCV::FNMSUB_H; 1863 case RISCV::FSUB_S: 1864 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 1865 : RISCV::FNMSUB_S; 1866 case RISCV::FSUB_D: 1867 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 1868 : RISCV::FNMSUB_D; 1869 } 1870 } 1871 1872 static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) { 1873 switch (Pattern) { 1874 default: 1875 llvm_unreachable("Unexpected pattern"); 1876 case MachineCombinerPattern::FMADD_AX: 1877 case MachineCombinerPattern::FMSUB: 1878 return 2; 1879 case MachineCombinerPattern::FMADD_XA: 1880 case MachineCombinerPattern::FNMSUB: 1881 return 1; 1882 } 1883 } 1884 1885 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 1886 MachineCombinerPattern Pattern, 1887 SmallVectorImpl<MachineInstr *> &InsInstrs, 1888 SmallVectorImpl<MachineInstr *> &DelInstrs) { 1889 MachineFunction *MF = Root.getMF(); 1890 MachineRegisterInfo &MRI = MF->getRegInfo(); 1891 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1892 1893 MachineOperand &Mul1 = Prev.getOperand(1); 1894 MachineOperand &Mul2 = Prev.getOperand(2); 1895 MachineOperand &Dst = Root.getOperand(0); 1896 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 1897 1898 Register DstReg = Dst.getReg(); 1899 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 1900 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 1901 DebugLoc MergedLoc = 1902 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 1903 1904 bool Mul1IsKill = Mul1.isKill(); 1905 bool Mul2IsKill = Mul2.isKill(); 1906 bool AddendIsKill = Addend.isKill(); 1907 1908 // We need to clear kill flags since we may be extending the live range past 1909 // a kill. If the mul had kill flags, we can preserve those since we know 1910 // where the previous range stopped. 1911 MRI.clearKillFlags(Mul1.getReg()); 1912 MRI.clearKillFlags(Mul2.getReg()); 1913 1914 MachineInstrBuilder MIB = 1915 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 1916 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 1917 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 1918 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 1919 .setMIFlags(IntersectedFlags); 1920 1921 InsInstrs.push_back(MIB); 1922 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 1923 DelInstrs.push_back(&Prev); 1924 DelInstrs.push_back(&Root); 1925 } 1926 1927 void RISCVInstrInfo::genAlternativeCodeSequence( 1928 MachineInstr &Root, MachineCombinerPattern Pattern, 1929 SmallVectorImpl<MachineInstr *> &InsInstrs, 1930 SmallVectorImpl<MachineInstr *> &DelInstrs, 1931 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 1932 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1933 switch (Pattern) { 1934 default: 1935 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 1936 DelInstrs, InstrIdxForVirtReg); 1937 return; 1938 case MachineCombinerPattern::FMADD_AX: 1939 case MachineCombinerPattern::FMSUB: { 1940 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 1941 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1942 return; 1943 } 1944 case MachineCombinerPattern::FMADD_XA: 1945 case MachineCombinerPattern::FNMSUB: { 1946 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 1947 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1948 return; 1949 } 1950 } 1951 } 1952 1953 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 1954 StringRef &ErrInfo) const { 1955 MCInstrDesc const &Desc = MI.getDesc(); 1956 1957 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 1958 unsigned OpType = Operand.OperandType; 1959 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 1960 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 1961 const MachineOperand &MO = MI.getOperand(Index); 1962 if (MO.isImm()) { 1963 int64_t Imm = MO.getImm(); 1964 bool Ok; 1965 switch (OpType) { 1966 default: 1967 llvm_unreachable("Unexpected operand type"); 1968 1969 // clang-format off 1970 #define CASE_OPERAND_UIMM(NUM) \ 1971 case RISCVOp::OPERAND_UIMM##NUM: \ 1972 Ok = isUInt<NUM>(Imm); \ 1973 break; 1974 CASE_OPERAND_UIMM(1) 1975 CASE_OPERAND_UIMM(2) 1976 CASE_OPERAND_UIMM(3) 1977 CASE_OPERAND_UIMM(4) 1978 CASE_OPERAND_UIMM(5) 1979 CASE_OPERAND_UIMM(6) 1980 CASE_OPERAND_UIMM(7) 1981 CASE_OPERAND_UIMM(8) 1982 CASE_OPERAND_UIMM(12) 1983 CASE_OPERAND_UIMM(20) 1984 // clang-format on 1985 case RISCVOp::OPERAND_UIMM2_LSB0: 1986 Ok = isShiftedUInt<1, 1>(Imm); 1987 break; 1988 case RISCVOp::OPERAND_UIMM7_LSB00: 1989 Ok = isShiftedUInt<5, 2>(Imm); 1990 break; 1991 case RISCVOp::OPERAND_UIMM8_LSB00: 1992 Ok = isShiftedUInt<6, 2>(Imm); 1993 break; 1994 case RISCVOp::OPERAND_UIMM8_LSB000: 1995 Ok = isShiftedUInt<5, 3>(Imm); 1996 break; 1997 case RISCVOp::OPERAND_UIMM8_GE32: 1998 Ok = isUInt<8>(Imm) && Imm >= 32; 1999 break; 2000 case RISCVOp::OPERAND_UIMM9_LSB000: 2001 Ok = isShiftedUInt<6, 3>(Imm); 2002 break; 2003 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 2004 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2005 break; 2006 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2007 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2008 break; 2009 case RISCVOp::OPERAND_ZERO: 2010 Ok = Imm == 0; 2011 break; 2012 case RISCVOp::OPERAND_SIMM5: 2013 Ok = isInt<5>(Imm); 2014 break; 2015 case RISCVOp::OPERAND_SIMM5_PLUS1: 2016 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2017 break; 2018 case RISCVOp::OPERAND_SIMM6: 2019 Ok = isInt<6>(Imm); 2020 break; 2021 case RISCVOp::OPERAND_SIMM6_NONZERO: 2022 Ok = Imm != 0 && isInt<6>(Imm); 2023 break; 2024 case RISCVOp::OPERAND_VTYPEI10: 2025 Ok = isUInt<10>(Imm); 2026 break; 2027 case RISCVOp::OPERAND_VTYPEI11: 2028 Ok = isUInt<11>(Imm); 2029 break; 2030 case RISCVOp::OPERAND_SIMM12: 2031 Ok = isInt<12>(Imm); 2032 break; 2033 case RISCVOp::OPERAND_SIMM12_LSB00000: 2034 Ok = isShiftedInt<7, 5>(Imm); 2035 break; 2036 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2037 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2038 break; 2039 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2040 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2041 Ok = Ok && Imm != 0; 2042 break; 2043 case RISCVOp::OPERAND_CLUI_IMM: 2044 Ok = (isUInt<5>(Imm) && Imm != 0) || 2045 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2046 break; 2047 case RISCVOp::OPERAND_RVKRNUM: 2048 Ok = Imm >= 0 && Imm <= 10; 2049 break; 2050 case RISCVOp::OPERAND_RVKRNUM_0_7: 2051 Ok = Imm >= 0 && Imm <= 7; 2052 break; 2053 case RISCVOp::OPERAND_RVKRNUM_1_10: 2054 Ok = Imm >= 1 && Imm <= 10; 2055 break; 2056 case RISCVOp::OPERAND_RVKRNUM_2_14: 2057 Ok = Imm >= 2 && Imm <= 14; 2058 break; 2059 } 2060 if (!Ok) { 2061 ErrInfo = "Invalid immediate"; 2062 return false; 2063 } 2064 } 2065 } 2066 } 2067 2068 const uint64_t TSFlags = Desc.TSFlags; 2069 if (RISCVII::hasVLOp(TSFlags)) { 2070 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2071 if (!Op.isImm() && !Op.isReg()) { 2072 ErrInfo = "Invalid operand type for VL operand"; 2073 return false; 2074 } 2075 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2076 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2077 auto *RC = MRI.getRegClass(Op.getReg()); 2078 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2079 ErrInfo = "Invalid register class for VL operand"; 2080 return false; 2081 } 2082 } 2083 if (!RISCVII::hasSEWOp(TSFlags)) { 2084 ErrInfo = "VL operand w/o SEW operand?"; 2085 return false; 2086 } 2087 } 2088 if (RISCVII::hasSEWOp(TSFlags)) { 2089 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2090 if (!MI.getOperand(OpIdx).isImm()) { 2091 ErrInfo = "SEW value expected to be an immediate"; 2092 return false; 2093 } 2094 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2095 if (Log2SEW > 31) { 2096 ErrInfo = "Unexpected SEW value"; 2097 return false; 2098 } 2099 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2100 if (!RISCVVType::isValidSEW(SEW)) { 2101 ErrInfo = "Unexpected SEW value"; 2102 return false; 2103 } 2104 } 2105 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2106 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2107 if (!MI.getOperand(OpIdx).isImm()) { 2108 ErrInfo = "Policy operand expected to be an immediate"; 2109 return false; 2110 } 2111 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2112 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2113 ErrInfo = "Invalid Policy Value"; 2114 return false; 2115 } 2116 if (!RISCVII::hasVLOp(TSFlags)) { 2117 ErrInfo = "policy operand w/o VL operand?"; 2118 return false; 2119 } 2120 2121 // VecPolicy operands can only exist on instructions with passthru/merge 2122 // arguments. Note that not all arguments with passthru have vec policy 2123 // operands- some instructions have implicit policies. 2124 unsigned UseOpIdx; 2125 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2126 ErrInfo = "policy operand w/o tied operand?"; 2127 return false; 2128 } 2129 } 2130 2131 return true; 2132 } 2133 2134 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2135 const MachineInstr &AddrI, 2136 ExtAddrMode &AM) const { 2137 switch (MemI.getOpcode()) { 2138 default: 2139 return false; 2140 case RISCV::LB: 2141 case RISCV::LBU: 2142 case RISCV::LH: 2143 case RISCV::LHU: 2144 case RISCV::LW: 2145 case RISCV::LWU: 2146 case RISCV::LD: 2147 case RISCV::FLH: 2148 case RISCV::FLW: 2149 case RISCV::FLD: 2150 case RISCV::SB: 2151 case RISCV::SH: 2152 case RISCV::SW: 2153 case RISCV::SD: 2154 case RISCV::FSH: 2155 case RISCV::FSW: 2156 case RISCV::FSD: 2157 break; 2158 } 2159 2160 if (MemI.getOperand(0).getReg() == Reg) 2161 return false; 2162 2163 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2164 !AddrI.getOperand(2).isImm()) 2165 return false; 2166 2167 int64_t OldOffset = MemI.getOperand(2).getImm(); 2168 int64_t Disp = AddrI.getOperand(2).getImm(); 2169 int64_t NewOffset = OldOffset + Disp; 2170 if (!STI.is64Bit()) 2171 NewOffset = SignExtend64<32>(NewOffset); 2172 2173 if (!isInt<12>(NewOffset)) 2174 return false; 2175 2176 AM.BaseReg = AddrI.getOperand(1).getReg(); 2177 AM.ScaledReg = 0; 2178 AM.Scale = 0; 2179 AM.Displacement = NewOffset; 2180 AM.Form = ExtAddrMode::Formula::Basic; 2181 return true; 2182 } 2183 2184 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2185 const ExtAddrMode &AM) const { 2186 2187 const DebugLoc &DL = MemI.getDebugLoc(); 2188 MachineBasicBlock &MBB = *MemI.getParent(); 2189 2190 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2191 "Addressing mode not supported for folding"); 2192 2193 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2194 .addReg(MemI.getOperand(0).getReg(), 2195 MemI.mayLoad() ? RegState::Define : 0) 2196 .addReg(AM.BaseReg) 2197 .addImm(AM.Displacement) 2198 .setMemRefs(MemI.memoperands()) 2199 .setMIFlags(MemI.getFlags()); 2200 } 2201 2202 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2203 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2204 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2205 const TargetRegisterInfo *TRI) const { 2206 if (!LdSt.mayLoadOrStore()) 2207 return false; 2208 2209 // Conservatively, only handle scalar loads/stores for now. 2210 switch (LdSt.getOpcode()) { 2211 case RISCV::LB: 2212 case RISCV::LBU: 2213 case RISCV::SB: 2214 case RISCV::LH: 2215 case RISCV::LHU: 2216 case RISCV::FLH: 2217 case RISCV::SH: 2218 case RISCV::FSH: 2219 case RISCV::LW: 2220 case RISCV::LWU: 2221 case RISCV::FLW: 2222 case RISCV::SW: 2223 case RISCV::FSW: 2224 case RISCV::LD: 2225 case RISCV::FLD: 2226 case RISCV::SD: 2227 case RISCV::FSD: 2228 break; 2229 default: 2230 return false; 2231 } 2232 const MachineOperand *BaseOp; 2233 OffsetIsScalable = false; 2234 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2235 return false; 2236 BaseOps.push_back(BaseOp); 2237 return true; 2238 } 2239 2240 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2241 // helper? 2242 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2243 ArrayRef<const MachineOperand *> BaseOps1, 2244 const MachineInstr &MI2, 2245 ArrayRef<const MachineOperand *> BaseOps2) { 2246 // Only examine the first "base" operand of each instruction, on the 2247 // assumption that it represents the real base address of the memory access. 2248 // Other operands are typically offsets or indices from this base address. 2249 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2250 return true; 2251 2252 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2253 return false; 2254 2255 auto MO1 = *MI1.memoperands_begin(); 2256 auto MO2 = *MI2.memoperands_begin(); 2257 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2258 return false; 2259 2260 auto Base1 = MO1->getValue(); 2261 auto Base2 = MO2->getValue(); 2262 if (!Base1 || !Base2) 2263 return false; 2264 Base1 = getUnderlyingObject(Base1); 2265 Base2 = getUnderlyingObject(Base2); 2266 2267 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2268 return false; 2269 2270 return Base1 == Base2; 2271 } 2272 2273 bool RISCVInstrInfo::shouldClusterMemOps( 2274 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2275 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2276 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2277 unsigned NumBytes) const { 2278 // If the mem ops (to be clustered) do not have the same base ptr, then they 2279 // should not be clustered 2280 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2281 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2282 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2283 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2284 return false; 2285 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2286 // If only one base op is empty, they do not have the same base ptr 2287 return false; 2288 } 2289 2290 unsigned CacheLineSize = 2291 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2292 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2293 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2294 // Cluster if the memory operations are on the same or a neighbouring cache 2295 // line, but limit the maximum ClusterSize to avoid creating too much 2296 // additional register pressure. 2297 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2298 } 2299 2300 // Set BaseReg (the base register operand), Offset (the byte offset being 2301 // accessed) and the access Width of the passed instruction that reads/writes 2302 // memory. Returns false if the instruction does not read/write memory or the 2303 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2304 // recognise base operands and offsets in all cases. 2305 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2306 // function) and set it as appropriate. 2307 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2308 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2309 unsigned &Width, const TargetRegisterInfo *TRI) const { 2310 if (!LdSt.mayLoadOrStore()) 2311 return false; 2312 2313 // Here we assume the standard RISC-V ISA, which uses a base+offset 2314 // addressing mode. You'll need to relax these conditions to support custom 2315 // load/store instructions. 2316 if (LdSt.getNumExplicitOperands() != 3) 2317 return false; 2318 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2319 !LdSt.getOperand(2).isImm()) 2320 return false; 2321 2322 if (!LdSt.hasOneMemOperand()) 2323 return false; 2324 2325 Width = (*LdSt.memoperands_begin())->getSize(); 2326 BaseReg = &LdSt.getOperand(1); 2327 Offset = LdSt.getOperand(2).getImm(); 2328 return true; 2329 } 2330 2331 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2332 const MachineInstr &MIa, const MachineInstr &MIb) const { 2333 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2334 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2335 2336 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2337 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2338 return false; 2339 2340 // Retrieve the base register, offset from the base register and width. Width 2341 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2342 // base registers are identical, and the offset of a lower memory access + 2343 // the width doesn't overlap the offset of a higher memory access, 2344 // then the memory accesses are different. 2345 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2346 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2347 int64_t OffsetA = 0, OffsetB = 0; 2348 unsigned int WidthA = 0, WidthB = 0; 2349 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2350 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2351 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2352 int LowOffset = std::min(OffsetA, OffsetB); 2353 int HighOffset = std::max(OffsetA, OffsetB); 2354 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2355 if (LowOffset + LowWidth <= HighOffset) 2356 return true; 2357 } 2358 } 2359 return false; 2360 } 2361 2362 std::pair<unsigned, unsigned> 2363 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2364 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2365 return std::make_pair(TF & Mask, TF & ~Mask); 2366 } 2367 2368 ArrayRef<std::pair<unsigned, const char *>> 2369 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2370 using namespace RISCVII; 2371 static const std::pair<unsigned, const char *> TargetFlags[] = { 2372 {MO_CALL, "riscv-call"}, 2373 {MO_LO, "riscv-lo"}, 2374 {MO_HI, "riscv-hi"}, 2375 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2376 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2377 {MO_GOT_HI, "riscv-got-hi"}, 2378 {MO_TPREL_LO, "riscv-tprel-lo"}, 2379 {MO_TPREL_HI, "riscv-tprel-hi"}, 2380 {MO_TPREL_ADD, "riscv-tprel-add"}, 2381 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2382 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}}; 2383 return ArrayRef(TargetFlags); 2384 } 2385 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2386 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2387 const Function &F = MF.getFunction(); 2388 2389 // Can F be deduplicated by the linker? If it can, don't outline from it. 2390 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2391 return false; 2392 2393 // Don't outline from functions with section markings; the program could 2394 // expect that all the code is in the named section. 2395 if (F.hasSection()) 2396 return false; 2397 2398 // It's safe to outline from MF. 2399 return true; 2400 } 2401 2402 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2403 unsigned &Flags) const { 2404 // More accurate safety checking is done in getOutliningCandidateInfo. 2405 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2406 } 2407 2408 // Enum values indicating how an outlined call should be constructed. 2409 enum MachineOutlinerConstructionID { 2410 MachineOutlinerDefault 2411 }; 2412 2413 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2414 MachineFunction &MF) const { 2415 return MF.getFunction().hasMinSize(); 2416 } 2417 2418 std::optional<outliner::OutlinedFunction> 2419 RISCVInstrInfo::getOutliningCandidateInfo( 2420 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 2421 2422 // First we need to filter out candidates where the X5 register (IE t0) can't 2423 // be used to setup the function call. 2424 auto CannotInsertCall = [](outliner::Candidate &C) { 2425 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 2426 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 2427 }; 2428 2429 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); 2430 2431 // If the sequence doesn't have enough candidates left, then we're done. 2432 if (RepeatedSequenceLocs.size() < 2) 2433 return std::nullopt; 2434 2435 unsigned SequenceSize = 0; 2436 2437 auto I = RepeatedSequenceLocs[0].front(); 2438 auto E = std::next(RepeatedSequenceLocs[0].back()); 2439 for (; I != E; ++I) 2440 SequenceSize += getInstSizeInBytes(*I); 2441 2442 // call t0, function = 8 bytes. 2443 unsigned CallOverhead = 8; 2444 for (auto &C : RepeatedSequenceLocs) 2445 C.setCallInfo(MachineOutlinerDefault, CallOverhead); 2446 2447 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 2448 unsigned FrameOverhead = 4; 2449 if (RepeatedSequenceLocs[0] 2450 .getMF() 2451 ->getSubtarget<RISCVSubtarget>() 2452 .hasStdExtCOrZca()) 2453 FrameOverhead = 2; 2454 2455 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 2456 FrameOverhead, MachineOutlinerDefault); 2457 } 2458 2459 outliner::InstrType 2460 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, 2461 unsigned Flags) const { 2462 MachineInstr &MI = *MBBI; 2463 MachineBasicBlock *MBB = MI.getParent(); 2464 const TargetRegisterInfo *TRI = 2465 MBB->getParent()->getSubtarget().getRegisterInfo(); 2466 const auto &F = MI.getMF()->getFunction(); 2467 2468 // We can manually strip out CFI instructions later. 2469 if (MI.isCFIInstruction()) 2470 // If current function has exception handling code, we can't outline & 2471 // strip these CFI instructions since it may break .eh_frame section 2472 // needed in unwinding. 2473 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 2474 : outliner::InstrType::Invisible; 2475 2476 // We need support for tail calls to outlined functions before return 2477 // statements can be allowed. 2478 if (MI.isReturn()) 2479 return outliner::InstrType::Illegal; 2480 2481 // Don't allow modifying the X5 register which we use for return addresses for 2482 // these outlined functions. 2483 if (MI.modifiesRegister(RISCV::X5, TRI) || 2484 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) 2485 return outliner::InstrType::Illegal; 2486 2487 // Make sure the operands don't reference something unsafe. 2488 for (const auto &MO : MI.operands()) { 2489 2490 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 2491 // if any possible. 2492 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 2493 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 2494 F.hasSection())) 2495 return outliner::InstrType::Illegal; 2496 } 2497 2498 return outliner::InstrType::Legal; 2499 } 2500 2501 void RISCVInstrInfo::buildOutlinedFrame( 2502 MachineBasicBlock &MBB, MachineFunction &MF, 2503 const outliner::OutlinedFunction &OF) const { 2504 2505 // Strip out any CFI instructions 2506 bool Changed = true; 2507 while (Changed) { 2508 Changed = false; 2509 auto I = MBB.begin(); 2510 auto E = MBB.end(); 2511 for (; I != E; ++I) { 2512 if (I->isCFIInstruction()) { 2513 I->removeFromParent(); 2514 Changed = true; 2515 break; 2516 } 2517 } 2518 } 2519 2520 MBB.addLiveIn(RISCV::X5); 2521 2522 // Add in a return instruction to the end of the outlined frame. 2523 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 2524 .addReg(RISCV::X0, RegState::Define) 2525 .addReg(RISCV::X5) 2526 .addImm(0)); 2527 } 2528 2529 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 2530 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 2531 MachineFunction &MF, outliner::Candidate &C) const { 2532 2533 // Add in a call instruction to the outlined function at the given location. 2534 It = MBB.insert(It, 2535 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 2536 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 2537 RISCVII::MO_CALL)); 2538 return It; 2539 } 2540 2541 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 2542 Register Reg) const { 2543 // TODO: Handle cases where Reg is a super- or sub-register of the 2544 // destination register. 2545 const MachineOperand &Op0 = MI.getOperand(0); 2546 if (!Op0.isReg() || Reg != Op0.getReg()) 2547 return std::nullopt; 2548 2549 // Don't consider ADDIW as a candidate because the caller may not be aware 2550 // of its sign extension behaviour. 2551 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 2552 MI.getOperand(2).isImm()) 2553 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 2554 2555 return std::nullopt; 2556 } 2557 2558 // MIR printer helper function to annotate Operands with a comment. 2559 std::string RISCVInstrInfo::createMIROperandComment( 2560 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2561 const TargetRegisterInfo *TRI) const { 2562 // Print a generic comment for this operand if there is one. 2563 std::string GenericComment = 2564 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 2565 if (!GenericComment.empty()) 2566 return GenericComment; 2567 2568 // If not, we must have an immediate operand. 2569 if (!Op.isImm()) 2570 return std::string(); 2571 2572 std::string Comment; 2573 raw_string_ostream OS(Comment); 2574 2575 uint64_t TSFlags = MI.getDesc().TSFlags; 2576 2577 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 2578 // operand of vector codegen pseudos. 2579 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || 2580 MI.getOpcode() == RISCV::PseudoVSETVLI || 2581 MI.getOpcode() == RISCV::PseudoVSETIVLI || 2582 MI.getOpcode() == RISCV::PseudoVSETVLIX0) && 2583 OpIdx == 2) { 2584 unsigned Imm = MI.getOperand(OpIdx).getImm(); 2585 RISCVVType::printVType(Imm, OS); 2586 } else if (RISCVII::hasSEWOp(TSFlags) && 2587 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) { 2588 unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); 2589 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2590 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2591 OS << "e" << SEW; 2592 } else if (RISCVII::hasVecPolicyOp(TSFlags) && 2593 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) { 2594 unsigned Policy = MI.getOperand(OpIdx).getImm(); 2595 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 2596 "Invalid Policy Value"); 2597 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 2598 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 2599 } 2600 2601 OS.flush(); 2602 return Comment; 2603 } 2604 2605 // clang-format off 2606 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 2607 RISCV::PseudoV##OP##_##TYPE##_##LMUL 2608 2609 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \ 2610 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ 2611 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ 2612 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ 2613 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) 2614 2615 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \ 2616 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ 2617 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) 2618 2619 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \ 2620 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ 2621 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) 2622 2623 #define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ 2624 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ 2625 case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) 2626 2627 #define CASE_VFMA_SPLATS(OP) \ 2628 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \ 2629 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \ 2630 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64) 2631 // clang-format on 2632 2633 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 2634 unsigned &SrcOpIdx1, 2635 unsigned &SrcOpIdx2) const { 2636 const MCInstrDesc &Desc = MI.getDesc(); 2637 if (!Desc.isCommutable()) 2638 return false; 2639 2640 switch (MI.getOpcode()) { 2641 case RISCV::TH_MVEQZ: 2642 case RISCV::TH_MVNEZ: 2643 // We can't commute operands if operand 2 (i.e., rs1 in 2644 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 2645 // not valid as the in/out-operand 1). 2646 if (MI.getOperand(2).getReg() == RISCV::X0) 2647 return false; 2648 // Operands 1 and 2 are commutable, if we switch the opcode. 2649 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 2650 case RISCV::TH_MULA: 2651 case RISCV::TH_MULAW: 2652 case RISCV::TH_MULAH: 2653 case RISCV::TH_MULS: 2654 case RISCV::TH_MULSW: 2655 case RISCV::TH_MULSH: 2656 // Operands 2 and 3 are commutable. 2657 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 2658 case RISCV::PseudoCCMOVGPRNoX0: 2659 case RISCV::PseudoCCMOVGPR: 2660 // Operands 4 and 5 are commutable. 2661 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 2662 case CASE_VFMA_SPLATS(FMADD): 2663 case CASE_VFMA_SPLATS(FMSUB): 2664 case CASE_VFMA_SPLATS(FMACC): 2665 case CASE_VFMA_SPLATS(FMSAC): 2666 case CASE_VFMA_SPLATS(FNMADD): 2667 case CASE_VFMA_SPLATS(FNMSUB): 2668 case CASE_VFMA_SPLATS(FNMACC): 2669 case CASE_VFMA_SPLATS(FNMSAC): 2670 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2671 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2672 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2673 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2674 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2675 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2676 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2677 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2678 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2679 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2680 // If the tail policy is undisturbed we can't commute. 2681 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2682 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2683 return false; 2684 2685 // For these instructions we can only swap operand 1 and operand 3 by 2686 // changing the opcode. 2687 unsigned CommutableOpIdx1 = 1; 2688 unsigned CommutableOpIdx2 = 3; 2689 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2690 CommutableOpIdx2)) 2691 return false; 2692 return true; 2693 } 2694 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2695 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2696 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2697 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2698 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2699 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2700 // If the tail policy is undisturbed we can't commute. 2701 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2702 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2703 return false; 2704 2705 // For these instructions we have more freedom. We can commute with the 2706 // other multiplicand or with the addend/subtrahend/minuend. 2707 2708 // Any fixed operand must be from source 1, 2 or 3. 2709 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 2710 return false; 2711 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 2712 return false; 2713 2714 // It both ops are fixed one must be the tied source. 2715 if (SrcOpIdx1 != CommuteAnyOperandIndex && 2716 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 2717 return false; 2718 2719 // Look for two different register operands assumed to be commutable 2720 // regardless of the FMA opcode. The FMA opcode is adjusted later if 2721 // needed. 2722 if (SrcOpIdx1 == CommuteAnyOperandIndex || 2723 SrcOpIdx2 == CommuteAnyOperandIndex) { 2724 // At least one of operands to be commuted is not specified and 2725 // this method is free to choose appropriate commutable operands. 2726 unsigned CommutableOpIdx1 = SrcOpIdx1; 2727 if (SrcOpIdx1 == SrcOpIdx2) { 2728 // Both of operands are not fixed. Set one of commutable 2729 // operands to the tied source. 2730 CommutableOpIdx1 = 1; 2731 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 2732 // Only one of the operands is not fixed. 2733 CommutableOpIdx1 = SrcOpIdx2; 2734 } 2735 2736 // CommutableOpIdx1 is well defined now. Let's choose another commutable 2737 // operand and assign its index to CommutableOpIdx2. 2738 unsigned CommutableOpIdx2; 2739 if (CommutableOpIdx1 != 1) { 2740 // If we haven't already used the tied source, we must use it now. 2741 CommutableOpIdx2 = 1; 2742 } else { 2743 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 2744 2745 // The commuted operands should have different registers. 2746 // Otherwise, the commute transformation does not change anything and 2747 // is useless. We use this as a hint to make our decision. 2748 if (Op1Reg != MI.getOperand(2).getReg()) 2749 CommutableOpIdx2 = 2; 2750 else 2751 CommutableOpIdx2 = 3; 2752 } 2753 2754 // Assign the found pair of commutable indices to SrcOpIdx1 and 2755 // SrcOpIdx2 to return those values. 2756 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2757 CommutableOpIdx2)) 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 } 2764 2765 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2766 } 2767 2768 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 2769 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 2770 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 2771 break; 2772 2773 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 2774 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 2775 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 2776 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 2777 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 2778 2779 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 2780 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 2781 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 2782 2783 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 2784 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 2785 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 2786 2787 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 2788 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 2789 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 2790 2791 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 2792 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 2793 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 2794 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 2795 2796 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2797 bool NewMI, 2798 unsigned OpIdx1, 2799 unsigned OpIdx2) const { 2800 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 2801 if (NewMI) 2802 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 2803 return MI; 2804 }; 2805 2806 switch (MI.getOpcode()) { 2807 case RISCV::TH_MVEQZ: 2808 case RISCV::TH_MVNEZ: { 2809 auto &WorkingMI = cloneIfNew(MI); 2810 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 2811 : RISCV::TH_MVEQZ)); 2812 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 2813 OpIdx2); 2814 } 2815 case RISCV::PseudoCCMOVGPRNoX0: 2816 case RISCV::PseudoCCMOVGPR: { 2817 // CCMOV can be commuted by inverting the condition. 2818 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 2819 CC = RISCVCC::getOppositeBranchCondition(CC); 2820 auto &WorkingMI = cloneIfNew(MI); 2821 WorkingMI.getOperand(3).setImm(CC); 2822 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 2823 OpIdx1, OpIdx2); 2824 } 2825 case CASE_VFMA_SPLATS(FMACC): 2826 case CASE_VFMA_SPLATS(FMADD): 2827 case CASE_VFMA_SPLATS(FMSAC): 2828 case CASE_VFMA_SPLATS(FMSUB): 2829 case CASE_VFMA_SPLATS(FNMACC): 2830 case CASE_VFMA_SPLATS(FNMADD): 2831 case CASE_VFMA_SPLATS(FNMSAC): 2832 case CASE_VFMA_SPLATS(FNMSUB): 2833 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2834 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2835 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2836 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2837 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2838 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2839 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2840 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2841 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2842 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2843 // It only make sense to toggle these between clobbering the 2844 // addend/subtrahend/minuend one of the multiplicands. 2845 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2846 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 2847 unsigned Opc; 2848 switch (MI.getOpcode()) { 2849 default: 2850 llvm_unreachable("Unexpected opcode"); 2851 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 2852 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 2853 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 2854 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 2855 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 2856 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 2857 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 2858 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 2859 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV) 2860 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV) 2861 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV) 2862 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV) 2863 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 2864 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 2865 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 2866 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 2867 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 2868 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 2869 } 2870 2871 auto &WorkingMI = cloneIfNew(MI); 2872 WorkingMI.setDesc(get(Opc)); 2873 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2874 OpIdx1, OpIdx2); 2875 } 2876 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2877 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2878 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2879 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2880 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2881 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2882 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2883 // If one of the operands, is the addend we need to change opcode. 2884 // Otherwise we're just swapping 2 of the multiplicands. 2885 if (OpIdx1 == 3 || OpIdx2 == 3) { 2886 unsigned Opc; 2887 switch (MI.getOpcode()) { 2888 default: 2889 llvm_unreachable("Unexpected opcode"); 2890 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV) 2891 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV) 2892 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV) 2893 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV) 2894 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 2895 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 2896 } 2897 2898 auto &WorkingMI = cloneIfNew(MI); 2899 WorkingMI.setDesc(get(Opc)); 2900 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2901 OpIdx1, OpIdx2); 2902 } 2903 // Let the default code handle it. 2904 break; 2905 } 2906 } 2907 2908 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2909 } 2910 2911 #undef CASE_VFMA_CHANGE_OPCODE_SPLATS 2912 #undef CASE_VFMA_CHANGE_OPCODE_LMULS 2913 #undef CASE_VFMA_CHANGE_OPCODE_COMMON 2914 #undef CASE_VFMA_SPLATS 2915 #undef CASE_VFMA_OPCODE_LMULS 2916 #undef CASE_VFMA_OPCODE_COMMON 2917 2918 // clang-format off 2919 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 2920 RISCV::PseudoV##OP##_##LMUL##_TIED 2921 2922 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 2923 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 2924 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 2925 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 2926 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 2927 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 2928 2929 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 2930 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 2931 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 2932 // clang-format on 2933 2934 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 2935 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 2936 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 2937 break; 2938 2939 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 2940 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 2941 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 2942 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 2943 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 2944 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 2945 2946 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 2947 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 2948 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 2949 2950 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 2951 LiveVariables *LV, 2952 LiveIntervals *LIS) const { 2953 MachineInstrBuilder MIB; 2954 switch (MI.getOpcode()) { 2955 default: 2956 return nullptr; 2957 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 2958 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 2959 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2960 MI.getNumExplicitOperands() == 7 && 2961 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 2962 // If the tail policy is undisturbed we can't convert. 2963 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 2964 1) == 0) 2965 return nullptr; 2966 // clang-format off 2967 unsigned NewOpc; 2968 switch (MI.getOpcode()) { 2969 default: 2970 llvm_unreachable("Unexpected opcode"); 2971 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 2972 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 2973 } 2974 // clang-format on 2975 2976 MachineBasicBlock &MBB = *MI.getParent(); 2977 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 2978 .add(MI.getOperand(0)) 2979 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 2980 .add(MI.getOperand(1)) 2981 .add(MI.getOperand(2)) 2982 .add(MI.getOperand(3)) 2983 .add(MI.getOperand(4)) 2984 .add(MI.getOperand(5)) 2985 .add(MI.getOperand(6)); 2986 break; 2987 } 2988 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 2989 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 2990 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 2991 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 2992 // If the tail policy is undisturbed we can't convert. 2993 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2994 MI.getNumExplicitOperands() == 6); 2995 if ((MI.getOperand(5).getImm() & 1) == 0) 2996 return nullptr; 2997 2998 // clang-format off 2999 unsigned NewOpc; 3000 switch (MI.getOpcode()) { 3001 default: 3002 llvm_unreachable("Unexpected opcode"); 3003 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 3004 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 3005 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3006 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3007 } 3008 // clang-format on 3009 3010 MachineBasicBlock &MBB = *MI.getParent(); 3011 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3012 .add(MI.getOperand(0)) 3013 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3014 .add(MI.getOperand(1)) 3015 .add(MI.getOperand(2)) 3016 .add(MI.getOperand(3)) 3017 .add(MI.getOperand(4)) 3018 .add(MI.getOperand(5)); 3019 break; 3020 } 3021 } 3022 MIB.copyImplicitOps(MI); 3023 3024 if (LV) { 3025 unsigned NumOps = MI.getNumOperands(); 3026 for (unsigned I = 1; I < NumOps; ++I) { 3027 MachineOperand &Op = MI.getOperand(I); 3028 if (Op.isReg() && Op.isKill()) 3029 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3030 } 3031 } 3032 3033 if (LIS) { 3034 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3035 3036 if (MI.getOperand(0).isEarlyClobber()) { 3037 // Use operand 1 was tied to early-clobber def operand 0, so its live 3038 // interval could have ended at an early-clobber slot. Now they are not 3039 // tied we need to update it to the normal register slot. 3040 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3041 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3042 if (S->end == Idx.getRegSlot(true)) 3043 S->end = Idx.getRegSlot(); 3044 } 3045 } 3046 3047 return MIB; 3048 } 3049 3050 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3051 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3052 #undef CASE_WIDEOP_OPCODE_LMULS 3053 #undef CASE_WIDEOP_OPCODE_COMMON 3054 3055 void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, 3056 MachineBasicBlock &MBB, 3057 MachineBasicBlock::iterator II, 3058 const DebugLoc &DL, Register DestReg, 3059 int64_t Amount, 3060 MachineInstr::MIFlag Flag) const { 3061 assert(Amount > 0 && "There is no need to get VLEN scaled value."); 3062 assert(Amount % 8 == 0 && 3063 "Reserve the stack by the multiple of one vector size."); 3064 3065 MachineRegisterInfo &MRI = MF.getRegInfo(); 3066 int64_t NumOfVReg = Amount / 8; 3067 3068 BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag); 3069 assert(isInt<32>(NumOfVReg) && 3070 "Expect the number of vector registers within 32-bits."); 3071 if (llvm::has_single_bit<uint32_t>(NumOfVReg)) { 3072 uint32_t ShiftAmount = Log2_32(NumOfVReg); 3073 if (ShiftAmount == 0) 3074 return; 3075 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3076 .addReg(DestReg, RegState::Kill) 3077 .addImm(ShiftAmount) 3078 .setMIFlag(Flag); 3079 } else if (STI.hasStdExtZba() && 3080 ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || 3081 (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || 3082 (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { 3083 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3084 unsigned Opc; 3085 uint32_t ShiftAmount; 3086 if (NumOfVReg % 9 == 0) { 3087 Opc = RISCV::SH3ADD; 3088 ShiftAmount = Log2_64(NumOfVReg / 9); 3089 } else if (NumOfVReg % 5 == 0) { 3090 Opc = RISCV::SH2ADD; 3091 ShiftAmount = Log2_64(NumOfVReg / 5); 3092 } else if (NumOfVReg % 3 == 0) { 3093 Opc = RISCV::SH1ADD; 3094 ShiftAmount = Log2_64(NumOfVReg / 3); 3095 } else { 3096 llvm_unreachable("Unexpected number of vregs"); 3097 } 3098 if (ShiftAmount) 3099 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3100 .addReg(DestReg, RegState::Kill) 3101 .addImm(ShiftAmount) 3102 .setMIFlag(Flag); 3103 BuildMI(MBB, II, DL, get(Opc), DestReg) 3104 .addReg(DestReg, RegState::Kill) 3105 .addReg(DestReg) 3106 .setMIFlag(Flag); 3107 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) { 3108 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3109 uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); 3110 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3111 .addReg(DestReg) 3112 .addImm(ShiftAmount) 3113 .setMIFlag(Flag); 3114 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3115 .addReg(ScaledRegister, RegState::Kill) 3116 .addReg(DestReg, RegState::Kill) 3117 .setMIFlag(Flag); 3118 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) { 3119 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3120 uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); 3121 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3122 .addReg(DestReg) 3123 .addImm(ShiftAmount) 3124 .setMIFlag(Flag); 3125 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3126 .addReg(ScaledRegister, RegState::Kill) 3127 .addReg(DestReg, RegState::Kill) 3128 .setMIFlag(Flag); 3129 } else { 3130 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3131 movImm(MBB, II, DL, N, NumOfVReg, Flag); 3132 if (!STI.hasStdExtM() && !STI.hasStdExtZmmul()) 3133 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3134 MF.getFunction(), 3135 "M- or Zmmul-extension must be enabled to calculate the vscaled size/" 3136 "offset."}); 3137 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3138 .addReg(DestReg, RegState::Kill) 3139 .addReg(N, RegState::Kill) 3140 .setMIFlag(Flag); 3141 } 3142 } 3143 3144 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3145 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3146 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3147 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3148 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3149 return ArrayRef(TargetFlags); 3150 } 3151 3152 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3153 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3154 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3155 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 3156 } 3157 3158 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 3159 bool RISCV::isZEXT_W(const MachineInstr &MI) { 3160 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 3161 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 3162 } 3163 3164 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 3165 bool RISCV::isZEXT_B(const MachineInstr &MI) { 3166 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 3167 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 3168 } 3169 3170 static bool isRVVWholeLoadStore(unsigned Opcode) { 3171 switch (Opcode) { 3172 default: 3173 return false; 3174 case RISCV::VS1R_V: 3175 case RISCV::VS2R_V: 3176 case RISCV::VS4R_V: 3177 case RISCV::VS8R_V: 3178 case RISCV::VL1RE8_V: 3179 case RISCV::VL2RE8_V: 3180 case RISCV::VL4RE8_V: 3181 case RISCV::VL8RE8_V: 3182 case RISCV::VL1RE16_V: 3183 case RISCV::VL2RE16_V: 3184 case RISCV::VL4RE16_V: 3185 case RISCV::VL8RE16_V: 3186 case RISCV::VL1RE32_V: 3187 case RISCV::VL2RE32_V: 3188 case RISCV::VL4RE32_V: 3189 case RISCV::VL8RE32_V: 3190 case RISCV::VL1RE64_V: 3191 case RISCV::VL2RE64_V: 3192 case RISCV::VL4RE64_V: 3193 case RISCV::VL8RE64_V: 3194 return true; 3195 } 3196 } 3197 3198 bool RISCV::isRVVSpill(const MachineInstr &MI) { 3199 // RVV lacks any support for immediate addressing for stack addresses, so be 3200 // conservative. 3201 unsigned Opcode = MI.getOpcode(); 3202 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 3203 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 3204 return false; 3205 return true; 3206 } 3207 3208 std::optional<std::pair<unsigned, unsigned>> 3209 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 3210 switch (Opcode) { 3211 default: 3212 return std::nullopt; 3213 case RISCV::PseudoVSPILL2_M1: 3214 case RISCV::PseudoVRELOAD2_M1: 3215 return std::make_pair(2u, 1u); 3216 case RISCV::PseudoVSPILL2_M2: 3217 case RISCV::PseudoVRELOAD2_M2: 3218 return std::make_pair(2u, 2u); 3219 case RISCV::PseudoVSPILL2_M4: 3220 case RISCV::PseudoVRELOAD2_M4: 3221 return std::make_pair(2u, 4u); 3222 case RISCV::PseudoVSPILL3_M1: 3223 case RISCV::PseudoVRELOAD3_M1: 3224 return std::make_pair(3u, 1u); 3225 case RISCV::PseudoVSPILL3_M2: 3226 case RISCV::PseudoVRELOAD3_M2: 3227 return std::make_pair(3u, 2u); 3228 case RISCV::PseudoVSPILL4_M1: 3229 case RISCV::PseudoVRELOAD4_M1: 3230 return std::make_pair(4u, 1u); 3231 case RISCV::PseudoVSPILL4_M2: 3232 case RISCV::PseudoVRELOAD4_M2: 3233 return std::make_pair(4u, 2u); 3234 case RISCV::PseudoVSPILL5_M1: 3235 case RISCV::PseudoVRELOAD5_M1: 3236 return std::make_pair(5u, 1u); 3237 case RISCV::PseudoVSPILL6_M1: 3238 case RISCV::PseudoVRELOAD6_M1: 3239 return std::make_pair(6u, 1u); 3240 case RISCV::PseudoVSPILL7_M1: 3241 case RISCV::PseudoVRELOAD7_M1: 3242 return std::make_pair(7u, 1u); 3243 case RISCV::PseudoVSPILL8_M1: 3244 case RISCV::PseudoVRELOAD8_M1: 3245 return std::make_pair(8u, 1u); 3246 } 3247 } 3248 3249 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 3250 return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) && 3251 !MI.isInlineAsm(); 3252 } 3253 3254 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 3255 int16_t MI1FrmOpIdx = 3256 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 3257 int16_t MI2FrmOpIdx = 3258 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 3259 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 3260 return false; 3261 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 3262 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 3263 return FrmOp1.getImm() == FrmOp2.getImm(); 3264 } 3265 3266 std::optional<unsigned> 3267 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 3268 // TODO: Handle Zvbb instructions 3269 switch (Opcode) { 3270 default: 3271 return std::nullopt; 3272 3273 // 11.6. Vector Single-Width Shift Instructions 3274 case RISCV::VSLL_VX: 3275 case RISCV::VSRL_VX: 3276 case RISCV::VSRA_VX: 3277 // 12.4. Vector Single-Width Scaling Shift Instructions 3278 case RISCV::VSSRL_VX: 3279 case RISCV::VSSRA_VX: 3280 // Only the low lg2(SEW) bits of the shift-amount value are used. 3281 return Log2SEW; 3282 3283 // 11.7 Vector Narrowing Integer Right Shift Instructions 3284 case RISCV::VNSRL_WX: 3285 case RISCV::VNSRA_WX: 3286 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 3287 case RISCV::VNCLIPU_WX: 3288 case RISCV::VNCLIP_WX: 3289 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 3290 return Log2SEW + 1; 3291 3292 // 11.1. Vector Single-Width Integer Add and Subtract 3293 case RISCV::VADD_VX: 3294 case RISCV::VSUB_VX: 3295 case RISCV::VRSUB_VX: 3296 // 11.2. Vector Widening Integer Add/Subtract 3297 case RISCV::VWADDU_VX: 3298 case RISCV::VWSUBU_VX: 3299 case RISCV::VWADD_VX: 3300 case RISCV::VWSUB_VX: 3301 case RISCV::VWADDU_WX: 3302 case RISCV::VWSUBU_WX: 3303 case RISCV::VWADD_WX: 3304 case RISCV::VWSUB_WX: 3305 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 3306 case RISCV::VADC_VXM: 3307 case RISCV::VADC_VIM: 3308 case RISCV::VMADC_VXM: 3309 case RISCV::VMADC_VIM: 3310 case RISCV::VMADC_VX: 3311 case RISCV::VSBC_VXM: 3312 case RISCV::VMSBC_VXM: 3313 case RISCV::VMSBC_VX: 3314 // 11.5 Vector Bitwise Logical Instructions 3315 case RISCV::VAND_VX: 3316 case RISCV::VOR_VX: 3317 case RISCV::VXOR_VX: 3318 // 11.8. Vector Integer Compare Instructions 3319 case RISCV::VMSEQ_VX: 3320 case RISCV::VMSNE_VX: 3321 case RISCV::VMSLTU_VX: 3322 case RISCV::VMSLT_VX: 3323 case RISCV::VMSLEU_VX: 3324 case RISCV::VMSLE_VX: 3325 case RISCV::VMSGTU_VX: 3326 case RISCV::VMSGT_VX: 3327 // 11.9. Vector Integer Min/Max Instructions 3328 case RISCV::VMINU_VX: 3329 case RISCV::VMIN_VX: 3330 case RISCV::VMAXU_VX: 3331 case RISCV::VMAX_VX: 3332 // 11.10. Vector Single-Width Integer Multiply Instructions 3333 case RISCV::VMUL_VX: 3334 case RISCV::VMULH_VX: 3335 case RISCV::VMULHU_VX: 3336 case RISCV::VMULHSU_VX: 3337 // 11.11. Vector Integer Divide Instructions 3338 case RISCV::VDIVU_VX: 3339 case RISCV::VDIV_VX: 3340 case RISCV::VREMU_VX: 3341 case RISCV::VREM_VX: 3342 // 11.12. Vector Widening Integer Multiply Instructions 3343 case RISCV::VWMUL_VX: 3344 case RISCV::VWMULU_VX: 3345 case RISCV::VWMULSU_VX: 3346 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 3347 case RISCV::VMACC_VX: 3348 case RISCV::VNMSAC_VX: 3349 case RISCV::VMADD_VX: 3350 case RISCV::VNMSUB_VX: 3351 // 11.14. Vector Widening Integer Multiply-Add Instructions 3352 case RISCV::VWMACCU_VX: 3353 case RISCV::VWMACC_VX: 3354 case RISCV::VWMACCSU_VX: 3355 case RISCV::VWMACCUS_VX: 3356 // 11.15. Vector Integer Merge Instructions 3357 case RISCV::VMERGE_VXM: 3358 // 11.16. Vector Integer Move Instructions 3359 case RISCV::VMV_V_X: 3360 // 12.1. Vector Single-Width Saturating Add and Subtract 3361 case RISCV::VSADDU_VX: 3362 case RISCV::VSADD_VX: 3363 case RISCV::VSSUBU_VX: 3364 case RISCV::VSSUB_VX: 3365 // 12.2. Vector Single-Width Averaging Add and Subtract 3366 case RISCV::VAADDU_VX: 3367 case RISCV::VAADD_VX: 3368 case RISCV::VASUBU_VX: 3369 case RISCV::VASUB_VX: 3370 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 3371 case RISCV::VSMUL_VX: 3372 // 16.1. Integer Scalar Move Instructions 3373 case RISCV::VMV_S_X: 3374 return 1U << Log2SEW; 3375 } 3376 } 3377 3378 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 3379 const RISCVVPseudosTable::PseudoInfo *RVV = 3380 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 3381 if (!RVV) 3382 return 0; 3383 return RVV->BaseInstr; 3384 } 3385