1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVSubtarget.h" 18 #include "RISCVTargetMachine.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineTraceMetrics.h" 30 #include "llvm/CodeGen/RegisterScavenging.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/MC/MCInstBuilder.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/ErrorHandling.h" 36 37 using namespace llvm; 38 39 #define GEN_CHECK_COMPRESS_INSTR 40 #include "RISCVGenCompressInstEmitter.inc" 41 42 #define GET_INSTRINFO_CTOR_DTOR 43 #define GET_INSTRINFO_NAMED_OPS 44 #include "RISCVGenInstrInfo.inc" 45 46 static cl::opt<bool> PreferWholeRegisterMove( 47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 48 cl::desc("Prefer whole register move for vector registers.")); 49 50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 51 "riscv-force-machine-combiner-strategy", cl::Hidden, 52 cl::desc("Force machine combiner to use a specific strategy for machine " 53 "trace metrics evaluation."), 54 cl::init(MachineTraceStrategy::TS_NumStrategies), 55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 56 "Local strategy."), 57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 58 "MinInstrCount strategy."))); 59 60 namespace llvm::RISCVVPseudosTable { 61 62 using namespace RISCV; 63 64 #define GET_RISCVVPseudosTable_IMPL 65 #include "RISCVGenSearchableTables.inc" 66 67 } // namespace llvm::RISCVVPseudosTable 68 69 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 70 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 71 STI(STI) {} 72 73 MCInst RISCVInstrInfo::getNop() const { 74 if (STI.hasStdExtCOrZca()) 75 return MCInstBuilder(RISCV::C_NOP); 76 return MCInstBuilder(RISCV::ADDI) 77 .addReg(RISCV::X0) 78 .addReg(RISCV::X0) 79 .addImm(0); 80 } 81 82 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 83 int &FrameIndex) const { 84 unsigned Dummy; 85 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 86 } 87 88 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 89 int &FrameIndex, 90 unsigned &MemBytes) const { 91 switch (MI.getOpcode()) { 92 default: 93 return 0; 94 case RISCV::LB: 95 case RISCV::LBU: 96 MemBytes = 1; 97 break; 98 case RISCV::LH: 99 case RISCV::LHU: 100 case RISCV::FLH: 101 MemBytes = 2; 102 break; 103 case RISCV::LW: 104 case RISCV::FLW: 105 case RISCV::LWU: 106 MemBytes = 4; 107 break; 108 case RISCV::LD: 109 case RISCV::FLD: 110 MemBytes = 8; 111 break; 112 } 113 114 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 115 MI.getOperand(2).getImm() == 0) { 116 FrameIndex = MI.getOperand(1).getIndex(); 117 return MI.getOperand(0).getReg(); 118 } 119 120 return 0; 121 } 122 123 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 124 int &FrameIndex) const { 125 unsigned Dummy; 126 return isStoreToStackSlot(MI, FrameIndex, Dummy); 127 } 128 129 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 130 int &FrameIndex, 131 unsigned &MemBytes) const { 132 switch (MI.getOpcode()) { 133 default: 134 return 0; 135 case RISCV::SB: 136 MemBytes = 1; 137 break; 138 case RISCV::SH: 139 case RISCV::FSH: 140 MemBytes = 2; 141 break; 142 case RISCV::SW: 143 case RISCV::FSW: 144 MemBytes = 4; 145 break; 146 case RISCV::SD: 147 case RISCV::FSD: 148 MemBytes = 8; 149 break; 150 } 151 152 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 153 MI.getOperand(2).getImm() == 0) { 154 FrameIndex = MI.getOperand(1).getIndex(); 155 return MI.getOperand(0).getReg(); 156 } 157 158 return 0; 159 } 160 161 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 162 unsigned NumRegs) { 163 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 164 } 165 166 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 167 const MachineBasicBlock &MBB, 168 MachineBasicBlock::const_iterator MBBI, 169 MachineBasicBlock::const_iterator &DefMBBI, 170 RISCVII::VLMUL LMul) { 171 if (PreferWholeRegisterMove) 172 return false; 173 174 assert(MBBI->getOpcode() == TargetOpcode::COPY && 175 "Unexpected COPY instruction."); 176 Register SrcReg = MBBI->getOperand(1).getReg(); 177 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 178 179 bool FoundDef = false; 180 bool FirstVSetVLI = false; 181 unsigned FirstSEW = 0; 182 while (MBBI != MBB.begin()) { 183 --MBBI; 184 if (MBBI->isMetaInstruction()) 185 continue; 186 187 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 188 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 189 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 190 // There is a vsetvli between COPY and source define instruction. 191 // vy = def_vop ... (producing instruction) 192 // ... 193 // vsetvli 194 // ... 195 // vx = COPY vy 196 if (!FoundDef) { 197 if (!FirstVSetVLI) { 198 FirstVSetVLI = true; 199 unsigned FirstVType = MBBI->getOperand(2).getImm(); 200 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 201 FirstSEW = RISCVVType::getSEW(FirstVType); 202 // The first encountered vsetvli must have the same lmul as the 203 // register class of COPY. 204 if (FirstLMul != LMul) 205 return false; 206 } 207 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 208 // define instruction. 209 if (MBBI->getOperand(0).getReg() != RISCV::X0) 210 return false; 211 if (MBBI->getOperand(1).isImm()) 212 return false; 213 if (MBBI->getOperand(1).getReg() != RISCV::X0) 214 return false; 215 continue; 216 } 217 218 // MBBI is the first vsetvli before the producing instruction. 219 unsigned VType = MBBI->getOperand(2).getImm(); 220 // If there is a vsetvli between COPY and the producing instruction. 221 if (FirstVSetVLI) { 222 // If SEW is different, return false. 223 if (RISCVVType::getSEW(VType) != FirstSEW) 224 return false; 225 } 226 227 // If the vsetvli is tail undisturbed, keep the whole register move. 228 if (!RISCVVType::isTailAgnostic(VType)) 229 return false; 230 231 // The checking is conservative. We only have register classes for 232 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 233 // for fractional LMUL operations. However, we could not use the vsetvli 234 // lmul for widening operations. The result of widening operation is 235 // 2 x LMUL. 236 return LMul == RISCVVType::getVLMUL(VType); 237 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 238 return false; 239 } else if (MBBI->getNumDefs()) { 240 // Check all the instructions which will change VL. 241 // For example, vleff has implicit def VL. 242 if (MBBI->modifiesRegister(RISCV::VL)) 243 return false; 244 245 // Only converting whole register copies to vmv.v.v when the defining 246 // value appears in the explicit operands. 247 for (const MachineOperand &MO : MBBI->explicit_operands()) { 248 if (!MO.isReg() || !MO.isDef()) 249 continue; 250 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 251 // We only permit the source of COPY has the same LMUL as the defined 252 // operand. 253 // There are cases we need to keep the whole register copy if the LMUL 254 // is different. 255 // For example, 256 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 257 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 258 // # The COPY may be created by vlmul_trunc intrinsic. 259 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 260 // 261 // After widening, the valid value will be 4 x e32 elements. If we 262 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 263 // FIXME: The COPY of subregister of Zvlsseg register will not be able 264 // to convert to vmv.v.[v|i] under the constraint. 265 if (MO.getReg() != SrcReg) 266 return false; 267 268 // In widening reduction instructions with LMUL_1 input vector case, 269 // only checking the LMUL is insufficient due to reduction result is 270 // always LMUL_1. 271 // For example, 272 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 273 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 274 // $v26 = COPY killed renamable $v8 275 // After widening, The valid value will be 1 x e16 elements. If we 276 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 277 uint64_t TSFlags = MBBI->getDesc().TSFlags; 278 if (RISCVII::isRVVWideningReduction(TSFlags)) 279 return false; 280 281 // If the producing instruction does not depend on vsetvli, do not 282 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 283 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 284 return false; 285 286 // Found the definition. 287 FoundDef = true; 288 DefMBBI = MBBI; 289 break; 290 } 291 } 292 } 293 } 294 295 return false; 296 } 297 298 void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, 299 MachineBasicBlock::iterator MBBI, 300 const DebugLoc &DL, MCRegister DstReg, 301 MCRegister SrcReg, bool KillSrc, 302 unsigned Opc, unsigned NF) const { 303 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 304 305 RISCVII::VLMUL LMul; 306 unsigned SubRegIdx; 307 unsigned VVOpc, VIOpc; 308 switch (Opc) { 309 default: 310 llvm_unreachable("Impossible LMUL for vector register copy."); 311 case RISCV::VMV1R_V: 312 LMul = RISCVII::LMUL_1; 313 SubRegIdx = RISCV::sub_vrm1_0; 314 VVOpc = RISCV::PseudoVMV_V_V_M1; 315 VIOpc = RISCV::PseudoVMV_V_I_M1; 316 break; 317 case RISCV::VMV2R_V: 318 LMul = RISCVII::LMUL_2; 319 SubRegIdx = RISCV::sub_vrm2_0; 320 VVOpc = RISCV::PseudoVMV_V_V_M2; 321 VIOpc = RISCV::PseudoVMV_V_I_M2; 322 break; 323 case RISCV::VMV4R_V: 324 LMul = RISCVII::LMUL_4; 325 SubRegIdx = RISCV::sub_vrm4_0; 326 VVOpc = RISCV::PseudoVMV_V_V_M4; 327 VIOpc = RISCV::PseudoVMV_V_I_M4; 328 break; 329 case RISCV::VMV8R_V: 330 assert(NF == 1); 331 LMul = RISCVII::LMUL_8; 332 SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. 333 VVOpc = RISCV::PseudoVMV_V_V_M8; 334 VIOpc = RISCV::PseudoVMV_V_I_M8; 335 break; 336 } 337 338 bool UseVMV_V_V = false; 339 bool UseVMV_V_I = false; 340 MachineBasicBlock::const_iterator DefMBBI; 341 if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 342 UseVMV_V_V = true; 343 Opc = VVOpc; 344 345 if (DefMBBI->getOpcode() == VIOpc) { 346 UseVMV_V_I = true; 347 Opc = VIOpc; 348 } 349 } 350 351 if (NF == 1) { 352 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); 353 if (UseVMV_V_V) 354 MIB.addReg(DstReg, RegState::Undef); 355 if (UseVMV_V_I) 356 MIB = MIB.add(DefMBBI->getOperand(2)); 357 else 358 MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); 359 if (UseVMV_V_V) { 360 const MCInstrDesc &Desc = DefMBBI->getDesc(); 361 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 362 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 363 MIB.addImm(0); // tu, mu 364 MIB.addReg(RISCV::VL, RegState::Implicit); 365 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 366 } 367 return; 368 } 369 370 int I = 0, End = NF, Incr = 1; 371 unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); 372 unsigned DstEncoding = TRI->getEncodingValue(DstReg); 373 unsigned LMulVal; 374 bool Fractional; 375 std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); 376 assert(!Fractional && "It is impossible be fractional lmul here."); 377 if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { 378 I = NF - 1; 379 End = -1; 380 Incr = -1; 381 } 382 383 for (; I != End; I += Incr) { 384 auto MIB = 385 BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); 386 if (UseVMV_V_V) 387 MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); 388 if (UseVMV_V_I) 389 MIB = MIB.add(DefMBBI->getOperand(2)); 390 else 391 MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), 392 getKillRegState(KillSrc)); 393 if (UseVMV_V_V) { 394 const MCInstrDesc &Desc = DefMBBI->getDesc(); 395 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 396 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 397 MIB.addImm(0); // tu, mu 398 MIB.addReg(RISCV::VL, RegState::Implicit); 399 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 400 } 401 } 402 } 403 404 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 405 MachineBasicBlock::iterator MBBI, 406 const DebugLoc &DL, MCRegister DstReg, 407 MCRegister SrcReg, bool KillSrc) const { 408 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 409 410 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 411 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 412 .addReg(SrcReg, getKillRegState(KillSrc)) 413 .addImm(0); 414 return; 415 } 416 417 if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) { 418 // Emit an ADDI for both parts of GPRPF64. 419 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 420 TRI->getSubReg(DstReg, RISCV::sub_32)) 421 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc)) 422 .addImm(0); 423 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 424 TRI->getSubReg(DstReg, RISCV::sub_32_hi)) 425 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi), 426 getKillRegState(KillSrc)) 427 .addImm(0); 428 return; 429 } 430 431 // Handle copy from csr 432 if (RISCV::VCSRRegClass.contains(SrcReg) && 433 RISCV::GPRRegClass.contains(DstReg)) { 434 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 435 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 436 .addReg(RISCV::X0); 437 return; 438 } 439 440 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 441 unsigned Opc; 442 if (STI.hasStdExtZfh()) { 443 Opc = RISCV::FSGNJ_H; 444 } else { 445 assert(STI.hasStdExtF() && 446 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 447 "Unexpected extensions"); 448 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 449 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 450 &RISCV::FPR32RegClass); 451 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 452 &RISCV::FPR32RegClass); 453 Opc = RISCV::FSGNJ_S; 454 } 455 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 456 .addReg(SrcReg, getKillRegState(KillSrc)) 457 .addReg(SrcReg, getKillRegState(KillSrc)); 458 return; 459 } 460 461 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 462 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 463 .addReg(SrcReg, getKillRegState(KillSrc)) 464 .addReg(SrcReg, getKillRegState(KillSrc)); 465 return; 466 } 467 468 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 469 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 470 .addReg(SrcReg, getKillRegState(KillSrc)) 471 .addReg(SrcReg, getKillRegState(KillSrc)); 472 return; 473 } 474 475 if (RISCV::FPR32RegClass.contains(DstReg) && 476 RISCV::GPRRegClass.contains(SrcReg)) { 477 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 478 .addReg(SrcReg, getKillRegState(KillSrc)); 479 return; 480 } 481 482 if (RISCV::GPRRegClass.contains(DstReg) && 483 RISCV::FPR32RegClass.contains(SrcReg)) { 484 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 485 .addReg(SrcReg, getKillRegState(KillSrc)); 486 return; 487 } 488 489 if (RISCV::FPR64RegClass.contains(DstReg) && 490 RISCV::GPRRegClass.contains(SrcReg)) { 491 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 492 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 493 .addReg(SrcReg, getKillRegState(KillSrc)); 494 return; 495 } 496 497 if (RISCV::GPRRegClass.contains(DstReg) && 498 RISCV::FPR64RegClass.contains(SrcReg)) { 499 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 500 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 501 .addReg(SrcReg, getKillRegState(KillSrc)); 502 return; 503 } 504 505 // VR->VR copies. 506 if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { 507 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); 508 return; 509 } 510 511 if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { 512 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V); 513 return; 514 } 515 516 if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { 517 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V); 518 return; 519 } 520 521 if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { 522 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V); 523 return; 524 } 525 526 if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { 527 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 528 /*NF=*/2); 529 return; 530 } 531 532 if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { 533 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 534 /*NF=*/2); 535 return; 536 } 537 538 if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { 539 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V, 540 /*NF=*/2); 541 return; 542 } 543 544 if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { 545 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 546 /*NF=*/3); 547 return; 548 } 549 550 if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { 551 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 552 /*NF=*/3); 553 return; 554 } 555 556 if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { 557 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 558 /*NF=*/4); 559 return; 560 } 561 562 if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { 563 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 564 /*NF=*/4); 565 return; 566 } 567 568 if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { 569 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 570 /*NF=*/5); 571 return; 572 } 573 574 if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { 575 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 576 /*NF=*/6); 577 return; 578 } 579 580 if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { 581 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 582 /*NF=*/7); 583 return; 584 } 585 586 if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { 587 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 588 /*NF=*/8); 589 return; 590 } 591 592 llvm_unreachable("Impossible reg-to-reg copy"); 593 } 594 595 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 596 MachineBasicBlock::iterator I, 597 Register SrcReg, bool IsKill, int FI, 598 const TargetRegisterClass *RC, 599 const TargetRegisterInfo *TRI, 600 Register VReg) const { 601 MachineFunction *MF = MBB.getParent(); 602 MachineFrameInfo &MFI = MF->getFrameInfo(); 603 604 unsigned Opcode; 605 bool IsScalableVector = true; 606 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 607 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 608 RISCV::SW : RISCV::SD; 609 IsScalableVector = false; 610 } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { 611 Opcode = RISCV::PseudoRV32ZdinxSD; 612 IsScalableVector = false; 613 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 614 Opcode = RISCV::FSH; 615 IsScalableVector = false; 616 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 617 Opcode = RISCV::FSW; 618 IsScalableVector = false; 619 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 620 Opcode = RISCV::FSD; 621 IsScalableVector = false; 622 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 623 Opcode = RISCV::VS1R_V; 624 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 625 Opcode = RISCV::VS2R_V; 626 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 627 Opcode = RISCV::VS4R_V; 628 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 629 Opcode = RISCV::VS8R_V; 630 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 631 Opcode = RISCV::PseudoVSPILL2_M1; 632 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 633 Opcode = RISCV::PseudoVSPILL2_M2; 634 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 635 Opcode = RISCV::PseudoVSPILL2_M4; 636 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 637 Opcode = RISCV::PseudoVSPILL3_M1; 638 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 639 Opcode = RISCV::PseudoVSPILL3_M2; 640 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 641 Opcode = RISCV::PseudoVSPILL4_M1; 642 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 643 Opcode = RISCV::PseudoVSPILL4_M2; 644 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 645 Opcode = RISCV::PseudoVSPILL5_M1; 646 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 647 Opcode = RISCV::PseudoVSPILL6_M1; 648 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 649 Opcode = RISCV::PseudoVSPILL7_M1; 650 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 651 Opcode = RISCV::PseudoVSPILL8_M1; 652 else 653 llvm_unreachable("Can't store this register to stack slot"); 654 655 if (IsScalableVector) { 656 MachineMemOperand *MMO = MF->getMachineMemOperand( 657 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 658 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 659 660 MFI.setStackID(FI, TargetStackID::ScalableVector); 661 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 662 .addReg(SrcReg, getKillRegState(IsKill)) 663 .addFrameIndex(FI) 664 .addMemOperand(MMO); 665 } else { 666 MachineMemOperand *MMO = MF->getMachineMemOperand( 667 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 668 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 669 670 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 671 .addReg(SrcReg, getKillRegState(IsKill)) 672 .addFrameIndex(FI) 673 .addImm(0) 674 .addMemOperand(MMO); 675 } 676 } 677 678 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 679 MachineBasicBlock::iterator I, 680 Register DstReg, int FI, 681 const TargetRegisterClass *RC, 682 const TargetRegisterInfo *TRI, 683 Register VReg) const { 684 MachineFunction *MF = MBB.getParent(); 685 MachineFrameInfo &MFI = MF->getFrameInfo(); 686 687 unsigned Opcode; 688 bool IsScalableVector = true; 689 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 690 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 691 RISCV::LW : RISCV::LD; 692 IsScalableVector = false; 693 } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { 694 Opcode = RISCV::PseudoRV32ZdinxLD; 695 IsScalableVector = false; 696 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 697 Opcode = RISCV::FLH; 698 IsScalableVector = false; 699 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 700 Opcode = RISCV::FLW; 701 IsScalableVector = false; 702 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 703 Opcode = RISCV::FLD; 704 IsScalableVector = false; 705 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 706 Opcode = RISCV::VL1RE8_V; 707 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 708 Opcode = RISCV::VL2RE8_V; 709 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 710 Opcode = RISCV::VL4RE8_V; 711 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 712 Opcode = RISCV::VL8RE8_V; 713 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 714 Opcode = RISCV::PseudoVRELOAD2_M1; 715 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 716 Opcode = RISCV::PseudoVRELOAD2_M2; 717 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 718 Opcode = RISCV::PseudoVRELOAD2_M4; 719 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 720 Opcode = RISCV::PseudoVRELOAD3_M1; 721 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 722 Opcode = RISCV::PseudoVRELOAD3_M2; 723 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 724 Opcode = RISCV::PseudoVRELOAD4_M1; 725 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 726 Opcode = RISCV::PseudoVRELOAD4_M2; 727 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 728 Opcode = RISCV::PseudoVRELOAD5_M1; 729 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 730 Opcode = RISCV::PseudoVRELOAD6_M1; 731 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 732 Opcode = RISCV::PseudoVRELOAD7_M1; 733 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 734 Opcode = RISCV::PseudoVRELOAD8_M1; 735 else 736 llvm_unreachable("Can't load this register from stack slot"); 737 738 if (IsScalableVector) { 739 MachineMemOperand *MMO = MF->getMachineMemOperand( 740 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 741 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 742 743 MFI.setStackID(FI, TargetStackID::ScalableVector); 744 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 745 .addFrameIndex(FI) 746 .addMemOperand(MMO); 747 } else { 748 MachineMemOperand *MMO = MF->getMachineMemOperand( 749 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 750 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 751 752 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 753 .addFrameIndex(FI) 754 .addImm(0) 755 .addMemOperand(MMO); 756 } 757 } 758 759 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 760 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 761 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 762 VirtRegMap *VRM) const { 763 const MachineFrameInfo &MFI = MF.getFrameInfo(); 764 765 // The below optimizations narrow the load so they are only valid for little 766 // endian. 767 // TODO: Support big endian by adding an offset into the frame object? 768 if (MF.getDataLayout().isBigEndian()) 769 return nullptr; 770 771 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 772 if (Ops.size() != 1 || Ops[0] != 1) 773 return nullptr; 774 775 unsigned LoadOpc; 776 switch (MI.getOpcode()) { 777 default: 778 if (RISCV::isSEXT_W(MI)) { 779 LoadOpc = RISCV::LW; 780 break; 781 } 782 if (RISCV::isZEXT_W(MI)) { 783 LoadOpc = RISCV::LWU; 784 break; 785 } 786 if (RISCV::isZEXT_B(MI)) { 787 LoadOpc = RISCV::LBU; 788 break; 789 } 790 return nullptr; 791 case RISCV::SEXT_H: 792 LoadOpc = RISCV::LH; 793 break; 794 case RISCV::SEXT_B: 795 LoadOpc = RISCV::LB; 796 break; 797 case RISCV::ZEXT_H_RV32: 798 case RISCV::ZEXT_H_RV64: 799 LoadOpc = RISCV::LHU; 800 break; 801 } 802 803 MachineMemOperand *MMO = MF.getMachineMemOperand( 804 MachinePointerInfo::getFixedStack(MF, FrameIndex), 805 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), 806 MFI.getObjectAlign(FrameIndex)); 807 808 Register DstReg = MI.getOperand(0).getReg(); 809 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 810 DstReg) 811 .addFrameIndex(FrameIndex) 812 .addImm(0) 813 .addMemOperand(MMO); 814 } 815 816 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 817 MachineBasicBlock::iterator MBBI, 818 const DebugLoc &DL, Register DstReg, uint64_t Val, 819 MachineInstr::MIFlag Flag, bool DstRenamable, 820 bool DstIsDead) const { 821 Register SrcReg = RISCV::X0; 822 823 if (!STI.is64Bit() && !isInt<32>(Val)) 824 report_fatal_error("Should only materialize 32-bit constants for RV32"); 825 826 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 827 assert(!Seq.empty()); 828 829 bool SrcRenamable = false; 830 unsigned Num = 0; 831 832 for (const RISCVMatInt::Inst &Inst : Seq) { 833 bool LastItem = ++Num == Seq.size(); 834 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 835 getRenamableRegState(DstRenamable); 836 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 837 getRenamableRegState(SrcRenamable); 838 switch (Inst.getOpndKind()) { 839 case RISCVMatInt::Imm: 840 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 841 .addReg(DstReg, RegState::Define | DstRegState) 842 .addImm(Inst.getImm()) 843 .setMIFlag(Flag); 844 break; 845 case RISCVMatInt::RegX0: 846 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 847 .addReg(DstReg, RegState::Define | DstRegState) 848 .addReg(SrcReg, SrcRegState) 849 .addReg(RISCV::X0) 850 .setMIFlag(Flag); 851 break; 852 case RISCVMatInt::RegReg: 853 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 854 .addReg(DstReg, RegState::Define | DstRegState) 855 .addReg(SrcReg, SrcRegState) 856 .addReg(SrcReg, SrcRegState) 857 .setMIFlag(Flag); 858 break; 859 case RISCVMatInt::RegImm: 860 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 861 .addReg(DstReg, RegState::Define | DstRegState) 862 .addReg(SrcReg, SrcRegState) 863 .addImm(Inst.getImm()) 864 .setMIFlag(Flag); 865 break; 866 } 867 868 // Only the first instruction has X0 as its source. 869 SrcReg = DstReg; 870 SrcRenamable = DstRenamable; 871 } 872 } 873 874 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 875 switch (Opc) { 876 default: 877 return RISCVCC::COND_INVALID; 878 case RISCV::BEQ: 879 return RISCVCC::COND_EQ; 880 case RISCV::BNE: 881 return RISCVCC::COND_NE; 882 case RISCV::BLT: 883 return RISCVCC::COND_LT; 884 case RISCV::BGE: 885 return RISCVCC::COND_GE; 886 case RISCV::BLTU: 887 return RISCVCC::COND_LTU; 888 case RISCV::BGEU: 889 return RISCVCC::COND_GEU; 890 } 891 } 892 893 // The contents of values added to Cond are not examined outside of 894 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 895 // push BranchOpcode, Reg1, Reg2. 896 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 897 SmallVectorImpl<MachineOperand> &Cond) { 898 // Block ends with fall-through condbranch. 899 assert(LastInst.getDesc().isConditionalBranch() && 900 "Unknown conditional branch"); 901 Target = LastInst.getOperand(2).getMBB(); 902 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 903 Cond.push_back(MachineOperand::CreateImm(CC)); 904 Cond.push_back(LastInst.getOperand(0)); 905 Cond.push_back(LastInst.getOperand(1)); 906 } 907 908 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) { 909 switch (CC) { 910 default: 911 llvm_unreachable("Unknown condition code!"); 912 case RISCVCC::COND_EQ: 913 return RISCV::BEQ; 914 case RISCVCC::COND_NE: 915 return RISCV::BNE; 916 case RISCVCC::COND_LT: 917 return RISCV::BLT; 918 case RISCVCC::COND_GE: 919 return RISCV::BGE; 920 case RISCVCC::COND_LTU: 921 return RISCV::BLTU; 922 case RISCVCC::COND_GEU: 923 return RISCV::BGEU; 924 } 925 } 926 927 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { 928 return get(RISCVCC::getBrCond(CC)); 929 } 930 931 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 932 switch (CC) { 933 default: 934 llvm_unreachable("Unrecognized conditional branch"); 935 case RISCVCC::COND_EQ: 936 return RISCVCC::COND_NE; 937 case RISCVCC::COND_NE: 938 return RISCVCC::COND_EQ; 939 case RISCVCC::COND_LT: 940 return RISCVCC::COND_GE; 941 case RISCVCC::COND_GE: 942 return RISCVCC::COND_LT; 943 case RISCVCC::COND_LTU: 944 return RISCVCC::COND_GEU; 945 case RISCVCC::COND_GEU: 946 return RISCVCC::COND_LTU; 947 } 948 } 949 950 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 951 MachineBasicBlock *&TBB, 952 MachineBasicBlock *&FBB, 953 SmallVectorImpl<MachineOperand> &Cond, 954 bool AllowModify) const { 955 TBB = FBB = nullptr; 956 Cond.clear(); 957 958 // If the block has no terminators, it just falls into the block after it. 959 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 960 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 961 return false; 962 963 // Count the number of terminators and find the first unconditional or 964 // indirect branch. 965 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 966 int NumTerminators = 0; 967 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 968 J++) { 969 NumTerminators++; 970 if (J->getDesc().isUnconditionalBranch() || 971 J->getDesc().isIndirectBranch()) { 972 FirstUncondOrIndirectBr = J.getReverse(); 973 } 974 } 975 976 // If AllowModify is true, we can erase any terminators after 977 // FirstUncondOrIndirectBR. 978 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 979 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 980 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 981 NumTerminators--; 982 } 983 I = FirstUncondOrIndirectBr; 984 } 985 986 // We can't handle blocks that end in an indirect branch. 987 if (I->getDesc().isIndirectBranch()) 988 return true; 989 990 // We can't handle Generic branch opcodes from Global ISel. 991 if (I->isPreISelOpcode()) 992 return true; 993 994 // We can't handle blocks with more than 2 terminators. 995 if (NumTerminators > 2) 996 return true; 997 998 // Handle a single unconditional branch. 999 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 1000 TBB = getBranchDestBlock(*I); 1001 return false; 1002 } 1003 1004 // Handle a single conditional branch. 1005 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 1006 parseCondBranch(*I, TBB, Cond); 1007 return false; 1008 } 1009 1010 // Handle a conditional branch followed by an unconditional branch. 1011 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 1012 I->getDesc().isUnconditionalBranch()) { 1013 parseCondBranch(*std::prev(I), TBB, Cond); 1014 FBB = getBranchDestBlock(*I); 1015 return false; 1016 } 1017 1018 // Otherwise, we can't handle this. 1019 return true; 1020 } 1021 1022 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 1023 int *BytesRemoved) const { 1024 if (BytesRemoved) 1025 *BytesRemoved = 0; 1026 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1027 if (I == MBB.end()) 1028 return 0; 1029 1030 if (!I->getDesc().isUnconditionalBranch() && 1031 !I->getDesc().isConditionalBranch()) 1032 return 0; 1033 1034 // Remove the branch. 1035 if (BytesRemoved) 1036 *BytesRemoved += getInstSizeInBytes(*I); 1037 I->eraseFromParent(); 1038 1039 I = MBB.end(); 1040 1041 if (I == MBB.begin()) 1042 return 1; 1043 --I; 1044 if (!I->getDesc().isConditionalBranch()) 1045 return 1; 1046 1047 // Remove the branch. 1048 if (BytesRemoved) 1049 *BytesRemoved += getInstSizeInBytes(*I); 1050 I->eraseFromParent(); 1051 return 2; 1052 } 1053 1054 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1055 // the number of instructions inserted. 1056 unsigned RISCVInstrInfo::insertBranch( 1057 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1058 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1059 if (BytesAdded) 1060 *BytesAdded = 0; 1061 1062 // Shouldn't be a fall through. 1063 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1064 assert((Cond.size() == 3 || Cond.size() == 0) && 1065 "RISC-V branch conditions have two components!"); 1066 1067 // Unconditional branch. 1068 if (Cond.empty()) { 1069 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1070 if (BytesAdded) 1071 *BytesAdded += getInstSizeInBytes(MI); 1072 return 1; 1073 } 1074 1075 // Either a one or two-way conditional branch. 1076 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1077 MachineInstr &CondMI = 1078 *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB); 1079 if (BytesAdded) 1080 *BytesAdded += getInstSizeInBytes(CondMI); 1081 1082 // One-way conditional branch. 1083 if (!FBB) 1084 return 1; 1085 1086 // Two-way conditional branch. 1087 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1088 if (BytesAdded) 1089 *BytesAdded += getInstSizeInBytes(MI); 1090 return 2; 1091 } 1092 1093 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1094 MachineBasicBlock &DestBB, 1095 MachineBasicBlock &RestoreBB, 1096 const DebugLoc &DL, int64_t BrOffset, 1097 RegScavenger *RS) const { 1098 assert(RS && "RegScavenger required for long branching"); 1099 assert(MBB.empty() && 1100 "new block should be inserted for expanding unconditional branch"); 1101 assert(MBB.pred_size() == 1); 1102 assert(RestoreBB.empty() && 1103 "restore block should be inserted for restoring clobbered registers"); 1104 1105 MachineFunction *MF = MBB.getParent(); 1106 MachineRegisterInfo &MRI = MF->getRegInfo(); 1107 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1108 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1109 1110 if (!isInt<32>(BrOffset)) 1111 report_fatal_error( 1112 "Branch offsets outside of the signed 32-bit range not supported"); 1113 1114 // FIXME: A virtual register must be used initially, as the register 1115 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1116 // uses the same workaround). 1117 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 1118 auto II = MBB.end(); 1119 // We may also update the jump target to RestoreBB later. 1120 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1121 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1122 .addMBB(&DestBB, RISCVII::MO_CALL); 1123 1124 RS->enterBasicBlockEnd(MBB); 1125 Register TmpGPR = 1126 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1127 /*RestoreAfter=*/false, /*SpAdj=*/0, 1128 /*AllowSpill=*/false); 1129 if (TmpGPR != RISCV::NoRegister) 1130 RS->setRegUsed(TmpGPR); 1131 else { 1132 // The case when there is no scavenged register needs special handling. 1133 1134 // Pick s11 because it doesn't make a difference. 1135 TmpGPR = RISCV::X27; 1136 1137 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1138 if (FrameIndex == -1) 1139 report_fatal_error("underestimated function size"); 1140 1141 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1142 &RISCV::GPRRegClass, TRI, Register()); 1143 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1144 /*SpAdj=*/0, /*FIOperandNum=*/1); 1145 1146 MI.getOperand(1).setMBB(&RestoreBB); 1147 1148 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1149 &RISCV::GPRRegClass, TRI, Register()); 1150 TRI->eliminateFrameIndex(RestoreBB.back(), 1151 /*SpAdj=*/0, /*FIOperandNum=*/1); 1152 } 1153 1154 MRI.replaceRegWith(ScratchReg, TmpGPR); 1155 MRI.clearVirtRegs(); 1156 } 1157 1158 bool RISCVInstrInfo::reverseBranchCondition( 1159 SmallVectorImpl<MachineOperand> &Cond) const { 1160 assert((Cond.size() == 3) && "Invalid branch condition!"); 1161 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1162 Cond[0].setImm(getOppositeBranchCondition(CC)); 1163 return false; 1164 } 1165 1166 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1167 MachineBasicBlock *MBB = MI.getParent(); 1168 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1169 1170 MachineBasicBlock *TBB, *FBB; 1171 SmallVector<MachineOperand, 3> Cond; 1172 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1173 return false; 1174 (void)FBB; 1175 1176 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1177 assert(CC != RISCVCC::COND_INVALID); 1178 1179 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1180 return false; 1181 1182 // For two constants C0 and C1 from 1183 // ``` 1184 // li Y, C0 1185 // li Z, C1 1186 // ``` 1187 // 1. if C1 = C0 + 1 1188 // we can turn: 1189 // (a) blt Y, X -> bge X, Z 1190 // (b) bge Y, X -> blt X, Z 1191 // 1192 // 2. if C1 = C0 - 1 1193 // we can turn: 1194 // (a) blt X, Y -> bge Z, X 1195 // (b) bge X, Y -> blt Z, X 1196 // 1197 // To make sure this optimization is really beneficial, we only 1198 // optimize for cases where Y had only one use (i.e. only used by the branch). 1199 1200 // Right now we only care about LI (i.e. ADDI x0, imm) 1201 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1202 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1203 MI->getOperand(1).getReg() == RISCV::X0) { 1204 Imm = MI->getOperand(2).getImm(); 1205 return true; 1206 } 1207 return false; 1208 }; 1209 // Either a load from immediate instruction or X0. 1210 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1211 if (!Op.isReg()) 1212 return false; 1213 Register Reg = Op.getReg(); 1214 if (Reg == RISCV::X0) { 1215 Imm = 0; 1216 return true; 1217 } 1218 if (!Reg.isVirtual()) 1219 return false; 1220 return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm); 1221 }; 1222 1223 MachineOperand &LHS = MI.getOperand(0); 1224 MachineOperand &RHS = MI.getOperand(1); 1225 // Try to find the register for constant Z; return 1226 // invalid register otherwise. 1227 auto searchConst = [&](int64_t C1) -> Register { 1228 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1229 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1230 int64_t Imm; 1231 return isLoadImm(&I, Imm) && Imm == C1; 1232 }); 1233 if (DefC1 != E) 1234 return DefC1->getOperand(0).getReg(); 1235 1236 return Register(); 1237 }; 1238 1239 bool Modify = false; 1240 int64_t C0; 1241 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1242 // Might be case 1. 1243 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1244 // to worry about unsigned overflow here) 1245 if (C0 < INT64_MAX) 1246 if (Register RegZ = searchConst(C0 + 1)) { 1247 reverseBranchCondition(Cond); 1248 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1249 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1250 // We might extend the live range of Z, clear its kill flag to 1251 // account for this. 1252 MRI.clearKillFlags(RegZ); 1253 Modify = true; 1254 } 1255 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1256 // Might be case 2. 1257 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1258 // when C0 is zero. 1259 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1260 if (Register RegZ = searchConst(C0 - 1)) { 1261 reverseBranchCondition(Cond); 1262 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1263 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1264 // We might extend the live range of Z, clear its kill flag to 1265 // account for this. 1266 MRI.clearKillFlags(RegZ); 1267 Modify = true; 1268 } 1269 } 1270 1271 if (!Modify) 1272 return false; 1273 1274 // Build the new branch and remove the old one. 1275 BuildMI(*MBB, MI, MI.getDebugLoc(), 1276 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1277 .add(Cond[1]) 1278 .add(Cond[2]) 1279 .addMBB(TBB); 1280 MI.eraseFromParent(); 1281 1282 return true; 1283 } 1284 1285 MachineBasicBlock * 1286 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1287 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1288 // The branch target is always the last operand. 1289 int NumOp = MI.getNumExplicitOperands(); 1290 return MI.getOperand(NumOp - 1).getMBB(); 1291 } 1292 1293 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1294 int64_t BrOffset) const { 1295 unsigned XLen = STI.getXLen(); 1296 // Ideally we could determine the supported branch offset from the 1297 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1298 // PseudoBR. 1299 switch (BranchOp) { 1300 default: 1301 llvm_unreachable("Unexpected opcode!"); 1302 case RISCV::BEQ: 1303 case RISCV::BNE: 1304 case RISCV::BLT: 1305 case RISCV::BGE: 1306 case RISCV::BLTU: 1307 case RISCV::BGEU: 1308 return isIntN(13, BrOffset); 1309 case RISCV::JAL: 1310 case RISCV::PseudoBR: 1311 return isIntN(21, BrOffset); 1312 case RISCV::PseudoJump: 1313 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1314 } 1315 } 1316 1317 // If the operation has a predicated pseudo instruction, return the pseudo 1318 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1319 // TODO: Support more operations. 1320 unsigned getPredicatedOpcode(unsigned Opcode) { 1321 switch (Opcode) { 1322 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1323 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1324 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1325 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1326 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1327 case RISCV::AND: return RISCV::PseudoCCAND; break; 1328 case RISCV::OR: return RISCV::PseudoCCOR; break; 1329 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1330 1331 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1332 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1333 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1334 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1335 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1336 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1337 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1338 1339 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1340 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1341 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1342 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1343 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1344 1345 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1346 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1347 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1348 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1349 1350 case RISCV::ANDN: return RISCV::PseudoCCANDN; break; 1351 case RISCV::ORN: return RISCV::PseudoCCORN; break; 1352 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; 1353 } 1354 1355 return RISCV::INSTRUCTION_LIST_END; 1356 } 1357 1358 /// Identify instructions that can be folded into a CCMOV instruction, and 1359 /// return the defining instruction. 1360 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1361 const MachineRegisterInfo &MRI, 1362 const TargetInstrInfo *TII) { 1363 if (!Reg.isVirtual()) 1364 return nullptr; 1365 if (!MRI.hasOneNonDBGUse(Reg)) 1366 return nullptr; 1367 MachineInstr *MI = MRI.getVRegDef(Reg); 1368 if (!MI) 1369 return nullptr; 1370 // Check if MI can be predicated and folded into the CCMOV. 1371 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1372 return nullptr; 1373 // Don't predicate li idiom. 1374 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1375 MI->getOperand(1).getReg() == RISCV::X0) 1376 return nullptr; 1377 // Check if MI has any other defs or physreg uses. 1378 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1379 // Reject frame index operands, PEI can't handle the predicated pseudos. 1380 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1381 return nullptr; 1382 if (!MO.isReg()) 1383 continue; 1384 // MI can't have any tied operands, that would conflict with predication. 1385 if (MO.isTied()) 1386 return nullptr; 1387 if (MO.isDef()) 1388 return nullptr; 1389 // Allow constant physregs. 1390 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1391 return nullptr; 1392 } 1393 bool DontMoveAcrossStores = true; 1394 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1395 return nullptr; 1396 return MI; 1397 } 1398 1399 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1400 SmallVectorImpl<MachineOperand> &Cond, 1401 unsigned &TrueOp, unsigned &FalseOp, 1402 bool &Optimizable) const { 1403 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1404 "Unknown select instruction"); 1405 // CCMOV operands: 1406 // 0: Def. 1407 // 1: LHS of compare. 1408 // 2: RHS of compare. 1409 // 3: Condition code. 1410 // 4: False use. 1411 // 5: True use. 1412 TrueOp = 5; 1413 FalseOp = 4; 1414 Cond.push_back(MI.getOperand(1)); 1415 Cond.push_back(MI.getOperand(2)); 1416 Cond.push_back(MI.getOperand(3)); 1417 // We can only fold when we support short forward branch opt. 1418 Optimizable = STI.hasShortForwardBranchOpt(); 1419 return false; 1420 } 1421 1422 MachineInstr * 1423 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1424 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1425 bool PreferFalse) const { 1426 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1427 "Unknown select instruction"); 1428 if (!STI.hasShortForwardBranchOpt()) 1429 return nullptr; 1430 1431 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1432 MachineInstr *DefMI = 1433 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1434 bool Invert = !DefMI; 1435 if (!DefMI) 1436 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1437 if (!DefMI) 1438 return nullptr; 1439 1440 // Find new register class to use. 1441 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1442 Register DestReg = MI.getOperand(0).getReg(); 1443 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1444 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1445 return nullptr; 1446 1447 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1448 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1449 1450 // Create a new predicated version of DefMI. 1451 MachineInstrBuilder NewMI = 1452 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1453 1454 // Copy the condition portion. 1455 NewMI.add(MI.getOperand(1)); 1456 NewMI.add(MI.getOperand(2)); 1457 1458 // Add condition code, inverting if necessary. 1459 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1460 if (Invert) 1461 CC = RISCVCC::getOppositeBranchCondition(CC); 1462 NewMI.addImm(CC); 1463 1464 // Copy the false register. 1465 NewMI.add(FalseReg); 1466 1467 // Copy all the DefMI operands. 1468 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1469 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1470 NewMI.add(DefMI->getOperand(i)); 1471 1472 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1473 SeenMIs.insert(NewMI); 1474 SeenMIs.erase(DefMI); 1475 1476 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1477 // DefMI would be invalid when tranferred inside the loop. Checking for a 1478 // loop is expensive, but at least remove kill flags if they are in different 1479 // BBs. 1480 if (DefMI->getParent() != MI.getParent()) 1481 NewMI->clearKillInfo(); 1482 1483 // The caller will erase MI, but not DefMI. 1484 DefMI->eraseFromParent(); 1485 return NewMI; 1486 } 1487 1488 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1489 if (MI.isMetaInstruction()) 1490 return 0; 1491 1492 unsigned Opcode = MI.getOpcode(); 1493 1494 if (Opcode == TargetOpcode::INLINEASM || 1495 Opcode == TargetOpcode::INLINEASM_BR) { 1496 const MachineFunction &MF = *MI.getParent()->getParent(); 1497 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget()); 1498 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1499 *TM.getMCAsmInfo()); 1500 } 1501 1502 if (!MI.memoperands_empty()) { 1503 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1504 const MachineFunction &MF = *MI.getParent()->getParent(); 1505 const auto &ST = MF.getSubtarget<RISCVSubtarget>(); 1506 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1507 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) { 1508 if (isCompressibleInst(MI, STI)) 1509 return 4; // c.ntl.all + c.load/c.store 1510 return 6; // c.ntl.all + load/store 1511 } 1512 return 8; // ntl.all + load/store 1513 } 1514 } 1515 1516 if (Opcode == TargetOpcode::BUNDLE) 1517 return getInstBundleLength(MI); 1518 1519 if (MI.getParent() && MI.getParent()->getParent()) { 1520 if (isCompressibleInst(MI, STI)) 1521 return 2; 1522 } 1523 1524 switch (Opcode) { 1525 case TargetOpcode::STACKMAP: 1526 // The upper bound for a stackmap intrinsic is the full length of its shadow 1527 return StackMapOpers(&MI).getNumPatchBytes(); 1528 case TargetOpcode::PATCHPOINT: 1529 // The size of the patchpoint intrinsic is the number of bytes requested 1530 return PatchPointOpers(&MI).getNumPatchBytes(); 1531 case TargetOpcode::STATEPOINT: 1532 // The size of the statepoint intrinsic is the number of bytes requested 1533 return StatepointOpers(&MI).getNumPatchBytes(); 1534 default: 1535 return get(Opcode).getSize(); 1536 } 1537 } 1538 1539 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1540 unsigned Size = 0; 1541 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1542 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1543 while (++I != E && I->isInsideBundle()) { 1544 assert(!I->isBundle() && "No nested bundle!"); 1545 Size += getInstSizeInBytes(*I); 1546 } 1547 return Size; 1548 } 1549 1550 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1551 const unsigned Opcode = MI.getOpcode(); 1552 switch (Opcode) { 1553 default: 1554 break; 1555 case RISCV::FSGNJ_D: 1556 case RISCV::FSGNJ_S: 1557 case RISCV::FSGNJ_H: 1558 case RISCV::FSGNJ_D_INX: 1559 case RISCV::FSGNJ_D_IN32X: 1560 case RISCV::FSGNJ_S_INX: 1561 case RISCV::FSGNJ_H_INX: 1562 // The canonical floating-point move is fsgnj rd, rs, rs. 1563 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1564 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1565 case RISCV::ADDI: 1566 case RISCV::ORI: 1567 case RISCV::XORI: 1568 return (MI.getOperand(1).isReg() && 1569 MI.getOperand(1).getReg() == RISCV::X0) || 1570 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1571 } 1572 return MI.isAsCheapAsAMove(); 1573 } 1574 1575 std::optional<DestSourcePair> 1576 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1577 if (MI.isMoveReg()) 1578 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1579 switch (MI.getOpcode()) { 1580 default: 1581 break; 1582 case RISCV::ADDI: 1583 // Operand 1 can be a frameindex but callers expect registers 1584 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1585 MI.getOperand(2).getImm() == 0) 1586 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1587 break; 1588 case RISCV::FSGNJ_D: 1589 case RISCV::FSGNJ_S: 1590 case RISCV::FSGNJ_H: 1591 case RISCV::FSGNJ_D_INX: 1592 case RISCV::FSGNJ_D_IN32X: 1593 case RISCV::FSGNJ_S_INX: 1594 case RISCV::FSGNJ_H_INX: 1595 // The canonical floating-point move is fsgnj rd, rs, rs. 1596 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1597 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1598 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1599 break; 1600 } 1601 return std::nullopt; 1602 } 1603 1604 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1605 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1606 // The option is unused. Choose Local strategy only for in-order cores. When 1607 // scheduling model is unspecified, use MinInstrCount strategy as more 1608 // generic one. 1609 const auto &SchedModel = STI.getSchedModel(); 1610 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1611 ? MachineTraceStrategy::TS_MinInstrCount 1612 : MachineTraceStrategy::TS_Local; 1613 } 1614 // The strategy was forced by the option. 1615 return ForceMachineCombinerStrategy; 1616 } 1617 1618 void RISCVInstrInfo::finalizeInsInstrs( 1619 MachineInstr &Root, MachineCombinerPattern &P, 1620 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1621 int16_t FrmOpIdx = 1622 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1623 if (FrmOpIdx < 0) { 1624 assert(all_of(InsInstrs, 1625 [](MachineInstr *MI) { 1626 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1627 RISCV::OpName::frm) < 0; 1628 }) && 1629 "New instructions require FRM whereas the old one does not have it"); 1630 return; 1631 } 1632 1633 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1634 MachineFunction &MF = *Root.getMF(); 1635 1636 for (auto *NewMI : InsInstrs) { 1637 assert(static_cast<unsigned>(RISCV::getNamedOperandIdx( 1638 NewMI->getOpcode(), RISCV::OpName::frm)) == 1639 NewMI->getNumOperands() && 1640 "Instruction has unexpected number of operands"); 1641 MachineInstrBuilder MIB(MF, NewMI); 1642 MIB.add(FRM); 1643 if (FRM.getImm() == RISCVFPRndMode::DYN) 1644 MIB.addUse(RISCV::FRM, RegState::Implicit); 1645 } 1646 } 1647 1648 static bool isFADD(unsigned Opc) { 1649 switch (Opc) { 1650 default: 1651 return false; 1652 case RISCV::FADD_H: 1653 case RISCV::FADD_S: 1654 case RISCV::FADD_D: 1655 return true; 1656 } 1657 } 1658 1659 static bool isFSUB(unsigned Opc) { 1660 switch (Opc) { 1661 default: 1662 return false; 1663 case RISCV::FSUB_H: 1664 case RISCV::FSUB_S: 1665 case RISCV::FSUB_D: 1666 return true; 1667 } 1668 } 1669 1670 static bool isFMUL(unsigned Opc) { 1671 switch (Opc) { 1672 default: 1673 return false; 1674 case RISCV::FMUL_H: 1675 case RISCV::FMUL_S: 1676 case RISCV::FMUL_D: 1677 return true; 1678 } 1679 } 1680 1681 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1682 bool &Commuted) const { 1683 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1684 return false; 1685 1686 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1687 unsigned OperandIdx = Commuted ? 2 : 1; 1688 const MachineInstr &Sibling = 1689 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1690 1691 int16_t InstFrmOpIdx = 1692 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1693 int16_t SiblingFrmOpIdx = 1694 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1695 1696 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1697 RISCV::hasEqualFRM(Inst, Sibling); 1698 } 1699 1700 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1701 bool Invert) const { 1702 unsigned Opc = Inst.getOpcode(); 1703 if (Invert) { 1704 auto InverseOpcode = getInverseOpcode(Opc); 1705 if (!InverseOpcode) 1706 return false; 1707 Opc = *InverseOpcode; 1708 } 1709 1710 if (isFADD(Opc) || isFMUL(Opc)) 1711 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 1712 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 1713 1714 switch (Opc) { 1715 default: 1716 return false; 1717 case RISCV::ADD: 1718 case RISCV::ADDW: 1719 case RISCV::AND: 1720 case RISCV::OR: 1721 case RISCV::XOR: 1722 // From RISC-V ISA spec, if both the high and low bits of the same product 1723 // are required, then the recommended code sequence is: 1724 // 1725 // MULH[[S]U] rdh, rs1, rs2 1726 // MUL rdl, rs1, rs2 1727 // (source register specifiers must be in same order and rdh cannot be the 1728 // same as rs1 or rs2) 1729 // 1730 // Microarchitectures can then fuse these into a single multiply operation 1731 // instead of performing two separate multiplies. 1732 // MachineCombiner may reassociate MUL operands and lose the fusion 1733 // opportunity. 1734 case RISCV::MUL: 1735 case RISCV::MULW: 1736 case RISCV::MIN: 1737 case RISCV::MINU: 1738 case RISCV::MAX: 1739 case RISCV::MAXU: 1740 case RISCV::FMIN_H: 1741 case RISCV::FMIN_S: 1742 case RISCV::FMIN_D: 1743 case RISCV::FMAX_H: 1744 case RISCV::FMAX_S: 1745 case RISCV::FMAX_D: 1746 return true; 1747 } 1748 1749 return false; 1750 } 1751 1752 std::optional<unsigned> 1753 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 1754 switch (Opcode) { 1755 default: 1756 return std::nullopt; 1757 case RISCV::FADD_H: 1758 return RISCV::FSUB_H; 1759 case RISCV::FADD_S: 1760 return RISCV::FSUB_S; 1761 case RISCV::FADD_D: 1762 return RISCV::FSUB_D; 1763 case RISCV::FSUB_H: 1764 return RISCV::FADD_H; 1765 case RISCV::FSUB_S: 1766 return RISCV::FADD_S; 1767 case RISCV::FSUB_D: 1768 return RISCV::FADD_D; 1769 case RISCV::ADD: 1770 return RISCV::SUB; 1771 case RISCV::SUB: 1772 return RISCV::ADD; 1773 case RISCV::ADDW: 1774 return RISCV::SUBW; 1775 case RISCV::SUBW: 1776 return RISCV::ADDW; 1777 } 1778 } 1779 1780 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 1781 const MachineOperand &MO, 1782 bool DoRegPressureReduce) { 1783 if (!MO.isReg() || !MO.getReg().isVirtual()) 1784 return false; 1785 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1786 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 1787 if (!MI || !isFMUL(MI->getOpcode())) 1788 return false; 1789 1790 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 1791 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 1792 return false; 1793 1794 // Try combining even if fmul has more than one use as it eliminates 1795 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 1796 // for fmul operands, so reject the transformation in register pressure 1797 // reduction mode. 1798 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1799 return false; 1800 1801 // Do not combine instructions from different basic blocks. 1802 if (Root.getParent() != MI->getParent()) 1803 return false; 1804 return RISCV::hasEqualFRM(Root, *MI); 1805 } 1806 1807 static bool 1808 getFPFusedMultiplyPatterns(MachineInstr &Root, 1809 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1810 bool DoRegPressureReduce) { 1811 unsigned Opc = Root.getOpcode(); 1812 bool IsFAdd = isFADD(Opc); 1813 if (!IsFAdd && !isFSUB(Opc)) 1814 return false; 1815 bool Added = false; 1816 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 1817 DoRegPressureReduce)) { 1818 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX 1819 : MachineCombinerPattern::FMSUB); 1820 Added = true; 1821 } 1822 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 1823 DoRegPressureReduce)) { 1824 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA 1825 : MachineCombinerPattern::FNMSUB); 1826 Added = true; 1827 } 1828 return Added; 1829 } 1830 1831 static bool getFPPatterns(MachineInstr &Root, 1832 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1833 bool DoRegPressureReduce) { 1834 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 1835 } 1836 1837 bool RISCVInstrInfo::getMachineCombinerPatterns( 1838 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 1839 bool DoRegPressureReduce) const { 1840 1841 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 1842 return true; 1843 1844 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 1845 DoRegPressureReduce); 1846 } 1847 1848 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, 1849 MachineCombinerPattern Pattern) { 1850 switch (RootOpc) { 1851 default: 1852 llvm_unreachable("Unexpected opcode"); 1853 case RISCV::FADD_H: 1854 return RISCV::FMADD_H; 1855 case RISCV::FADD_S: 1856 return RISCV::FMADD_S; 1857 case RISCV::FADD_D: 1858 return RISCV::FMADD_D; 1859 case RISCV::FSUB_H: 1860 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 1861 : RISCV::FNMSUB_H; 1862 case RISCV::FSUB_S: 1863 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 1864 : RISCV::FNMSUB_S; 1865 case RISCV::FSUB_D: 1866 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 1867 : RISCV::FNMSUB_D; 1868 } 1869 } 1870 1871 static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) { 1872 switch (Pattern) { 1873 default: 1874 llvm_unreachable("Unexpected pattern"); 1875 case MachineCombinerPattern::FMADD_AX: 1876 case MachineCombinerPattern::FMSUB: 1877 return 2; 1878 case MachineCombinerPattern::FMADD_XA: 1879 case MachineCombinerPattern::FNMSUB: 1880 return 1; 1881 } 1882 } 1883 1884 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 1885 MachineCombinerPattern Pattern, 1886 SmallVectorImpl<MachineInstr *> &InsInstrs, 1887 SmallVectorImpl<MachineInstr *> &DelInstrs) { 1888 MachineFunction *MF = Root.getMF(); 1889 MachineRegisterInfo &MRI = MF->getRegInfo(); 1890 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1891 1892 MachineOperand &Mul1 = Prev.getOperand(1); 1893 MachineOperand &Mul2 = Prev.getOperand(2); 1894 MachineOperand &Dst = Root.getOperand(0); 1895 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 1896 1897 Register DstReg = Dst.getReg(); 1898 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 1899 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 1900 DebugLoc MergedLoc = 1901 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 1902 1903 bool Mul1IsKill = Mul1.isKill(); 1904 bool Mul2IsKill = Mul2.isKill(); 1905 bool AddendIsKill = Addend.isKill(); 1906 1907 // We need to clear kill flags since we may be extending the live range past 1908 // a kill. If the mul had kill flags, we can preserve those since we know 1909 // where the previous range stopped. 1910 MRI.clearKillFlags(Mul1.getReg()); 1911 MRI.clearKillFlags(Mul2.getReg()); 1912 1913 MachineInstrBuilder MIB = 1914 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 1915 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 1916 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 1917 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 1918 .setMIFlags(IntersectedFlags); 1919 1920 InsInstrs.push_back(MIB); 1921 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 1922 DelInstrs.push_back(&Prev); 1923 DelInstrs.push_back(&Root); 1924 } 1925 1926 void RISCVInstrInfo::genAlternativeCodeSequence( 1927 MachineInstr &Root, MachineCombinerPattern Pattern, 1928 SmallVectorImpl<MachineInstr *> &InsInstrs, 1929 SmallVectorImpl<MachineInstr *> &DelInstrs, 1930 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 1931 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1932 switch (Pattern) { 1933 default: 1934 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 1935 DelInstrs, InstrIdxForVirtReg); 1936 return; 1937 case MachineCombinerPattern::FMADD_AX: 1938 case MachineCombinerPattern::FMSUB: { 1939 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 1940 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1941 return; 1942 } 1943 case MachineCombinerPattern::FMADD_XA: 1944 case MachineCombinerPattern::FNMSUB: { 1945 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 1946 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1947 return; 1948 } 1949 } 1950 } 1951 1952 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 1953 StringRef &ErrInfo) const { 1954 MCInstrDesc const &Desc = MI.getDesc(); 1955 1956 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 1957 unsigned OpType = Operand.OperandType; 1958 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 1959 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 1960 const MachineOperand &MO = MI.getOperand(Index); 1961 if (MO.isImm()) { 1962 int64_t Imm = MO.getImm(); 1963 bool Ok; 1964 switch (OpType) { 1965 default: 1966 llvm_unreachable("Unexpected operand type"); 1967 1968 // clang-format off 1969 #define CASE_OPERAND_UIMM(NUM) \ 1970 case RISCVOp::OPERAND_UIMM##NUM: \ 1971 Ok = isUInt<NUM>(Imm); \ 1972 break; 1973 CASE_OPERAND_UIMM(1) 1974 CASE_OPERAND_UIMM(2) 1975 CASE_OPERAND_UIMM(3) 1976 CASE_OPERAND_UIMM(4) 1977 CASE_OPERAND_UIMM(5) 1978 CASE_OPERAND_UIMM(6) 1979 CASE_OPERAND_UIMM(7) 1980 CASE_OPERAND_UIMM(8) 1981 CASE_OPERAND_UIMM(12) 1982 CASE_OPERAND_UIMM(20) 1983 // clang-format on 1984 case RISCVOp::OPERAND_UIMM2_LSB0: 1985 Ok = isShiftedUInt<1, 1>(Imm); 1986 break; 1987 case RISCVOp::OPERAND_UIMM7_LSB00: 1988 Ok = isShiftedUInt<5, 2>(Imm); 1989 break; 1990 case RISCVOp::OPERAND_UIMM8_LSB00: 1991 Ok = isShiftedUInt<6, 2>(Imm); 1992 break; 1993 case RISCVOp::OPERAND_UIMM8_LSB000: 1994 Ok = isShiftedUInt<5, 3>(Imm); 1995 break; 1996 case RISCVOp::OPERAND_UIMM8_GE32: 1997 Ok = isUInt<8>(Imm) && Imm >= 32; 1998 break; 1999 case RISCVOp::OPERAND_UIMM9_LSB000: 2000 Ok = isShiftedUInt<6, 3>(Imm); 2001 break; 2002 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 2003 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2004 break; 2005 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2006 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2007 break; 2008 case RISCVOp::OPERAND_ZERO: 2009 Ok = Imm == 0; 2010 break; 2011 case RISCVOp::OPERAND_SIMM5: 2012 Ok = isInt<5>(Imm); 2013 break; 2014 case RISCVOp::OPERAND_SIMM5_PLUS1: 2015 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2016 break; 2017 case RISCVOp::OPERAND_SIMM6: 2018 Ok = isInt<6>(Imm); 2019 break; 2020 case RISCVOp::OPERAND_SIMM6_NONZERO: 2021 Ok = Imm != 0 && isInt<6>(Imm); 2022 break; 2023 case RISCVOp::OPERAND_VTYPEI10: 2024 Ok = isUInt<10>(Imm); 2025 break; 2026 case RISCVOp::OPERAND_VTYPEI11: 2027 Ok = isUInt<11>(Imm); 2028 break; 2029 case RISCVOp::OPERAND_SIMM12: 2030 Ok = isInt<12>(Imm); 2031 break; 2032 case RISCVOp::OPERAND_SIMM12_LSB00000: 2033 Ok = isShiftedInt<7, 5>(Imm); 2034 break; 2035 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2036 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2037 break; 2038 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2039 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2040 Ok = Ok && Imm != 0; 2041 break; 2042 case RISCVOp::OPERAND_CLUI_IMM: 2043 Ok = (isUInt<5>(Imm) && Imm != 0) || 2044 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2045 break; 2046 case RISCVOp::OPERAND_RVKRNUM: 2047 Ok = Imm >= 0 && Imm <= 10; 2048 break; 2049 case RISCVOp::OPERAND_RVKRNUM_0_7: 2050 Ok = Imm >= 0 && Imm <= 7; 2051 break; 2052 case RISCVOp::OPERAND_RVKRNUM_1_10: 2053 Ok = Imm >= 1 && Imm <= 10; 2054 break; 2055 case RISCVOp::OPERAND_RVKRNUM_2_14: 2056 Ok = Imm >= 2 && Imm <= 14; 2057 break; 2058 } 2059 if (!Ok) { 2060 ErrInfo = "Invalid immediate"; 2061 return false; 2062 } 2063 } 2064 } 2065 } 2066 2067 const uint64_t TSFlags = Desc.TSFlags; 2068 if (RISCVII::hasVLOp(TSFlags)) { 2069 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2070 if (!Op.isImm() && !Op.isReg()) { 2071 ErrInfo = "Invalid operand type for VL operand"; 2072 return false; 2073 } 2074 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2075 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2076 auto *RC = MRI.getRegClass(Op.getReg()); 2077 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2078 ErrInfo = "Invalid register class for VL operand"; 2079 return false; 2080 } 2081 } 2082 if (!RISCVII::hasSEWOp(TSFlags)) { 2083 ErrInfo = "VL operand w/o SEW operand?"; 2084 return false; 2085 } 2086 } 2087 if (RISCVII::hasSEWOp(TSFlags)) { 2088 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2089 if (!MI.getOperand(OpIdx).isImm()) { 2090 ErrInfo = "SEW value expected to be an immediate"; 2091 return false; 2092 } 2093 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2094 if (Log2SEW > 31) { 2095 ErrInfo = "Unexpected SEW value"; 2096 return false; 2097 } 2098 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2099 if (!RISCVVType::isValidSEW(SEW)) { 2100 ErrInfo = "Unexpected SEW value"; 2101 return false; 2102 } 2103 } 2104 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2105 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2106 if (!MI.getOperand(OpIdx).isImm()) { 2107 ErrInfo = "Policy operand expected to be an immediate"; 2108 return false; 2109 } 2110 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2111 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2112 ErrInfo = "Invalid Policy Value"; 2113 return false; 2114 } 2115 if (!RISCVII::hasVLOp(TSFlags)) { 2116 ErrInfo = "policy operand w/o VL operand?"; 2117 return false; 2118 } 2119 2120 // VecPolicy operands can only exist on instructions with passthru/merge 2121 // arguments. Note that not all arguments with passthru have vec policy 2122 // operands- some instructions have implicit policies. 2123 unsigned UseOpIdx; 2124 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2125 ErrInfo = "policy operand w/o tied operand?"; 2126 return false; 2127 } 2128 } 2129 2130 return true; 2131 } 2132 2133 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2134 const MachineInstr &AddrI, 2135 ExtAddrMode &AM) const { 2136 switch (MemI.getOpcode()) { 2137 default: 2138 return false; 2139 case RISCV::LB: 2140 case RISCV::LBU: 2141 case RISCV::LH: 2142 case RISCV::LHU: 2143 case RISCV::LW: 2144 case RISCV::LWU: 2145 case RISCV::LD: 2146 case RISCV::FLH: 2147 case RISCV::FLW: 2148 case RISCV::FLD: 2149 case RISCV::SB: 2150 case RISCV::SH: 2151 case RISCV::SW: 2152 case RISCV::SD: 2153 case RISCV::FSH: 2154 case RISCV::FSW: 2155 case RISCV::FSD: 2156 break; 2157 } 2158 2159 if (MemI.getOperand(0).getReg() == Reg) 2160 return false; 2161 2162 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2163 !AddrI.getOperand(2).isImm()) 2164 return false; 2165 2166 int64_t OldOffset = MemI.getOperand(2).getImm(); 2167 int64_t Disp = AddrI.getOperand(2).getImm(); 2168 int64_t NewOffset = OldOffset + Disp; 2169 if (!STI.is64Bit()) 2170 NewOffset = SignExtend64<32>(NewOffset); 2171 2172 if (!isInt<12>(NewOffset)) 2173 return false; 2174 2175 AM.BaseReg = AddrI.getOperand(1).getReg(); 2176 AM.ScaledReg = 0; 2177 AM.Scale = 0; 2178 AM.Displacement = NewOffset; 2179 AM.Form = ExtAddrMode::Formula::Basic; 2180 return true; 2181 } 2182 2183 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2184 const ExtAddrMode &AM) const { 2185 2186 const DebugLoc &DL = MemI.getDebugLoc(); 2187 MachineBasicBlock &MBB = *MemI.getParent(); 2188 2189 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2190 "Addressing mode not supported for folding"); 2191 2192 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2193 .addReg(MemI.getOperand(0).getReg(), 2194 MemI.mayLoad() ? RegState::Define : 0) 2195 .addReg(AM.BaseReg) 2196 .addImm(AM.Displacement) 2197 .setMemRefs(MemI.memoperands()) 2198 .setMIFlags(MemI.getFlags()); 2199 } 2200 2201 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2202 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2203 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2204 const TargetRegisterInfo *TRI) const { 2205 if (!LdSt.mayLoadOrStore()) 2206 return false; 2207 2208 // Conservatively, only handle scalar loads/stores for now. 2209 switch (LdSt.getOpcode()) { 2210 case RISCV::LB: 2211 case RISCV::LBU: 2212 case RISCV::SB: 2213 case RISCV::LH: 2214 case RISCV::LHU: 2215 case RISCV::FLH: 2216 case RISCV::SH: 2217 case RISCV::FSH: 2218 case RISCV::LW: 2219 case RISCV::LWU: 2220 case RISCV::FLW: 2221 case RISCV::SW: 2222 case RISCV::FSW: 2223 case RISCV::LD: 2224 case RISCV::FLD: 2225 case RISCV::SD: 2226 case RISCV::FSD: 2227 break; 2228 default: 2229 return false; 2230 } 2231 const MachineOperand *BaseOp; 2232 OffsetIsScalable = false; 2233 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2234 return false; 2235 BaseOps.push_back(BaseOp); 2236 return true; 2237 } 2238 2239 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2240 // helper? 2241 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2242 ArrayRef<const MachineOperand *> BaseOps1, 2243 const MachineInstr &MI2, 2244 ArrayRef<const MachineOperand *> BaseOps2) { 2245 // Only examine the first "base" operand of each instruction, on the 2246 // assumption that it represents the real base address of the memory access. 2247 // Other operands are typically offsets or indices from this base address. 2248 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2249 return true; 2250 2251 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2252 return false; 2253 2254 auto MO1 = *MI1.memoperands_begin(); 2255 auto MO2 = *MI2.memoperands_begin(); 2256 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2257 return false; 2258 2259 auto Base1 = MO1->getValue(); 2260 auto Base2 = MO2->getValue(); 2261 if (!Base1 || !Base2) 2262 return false; 2263 Base1 = getUnderlyingObject(Base1); 2264 Base2 = getUnderlyingObject(Base2); 2265 2266 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2267 return false; 2268 2269 return Base1 == Base2; 2270 } 2271 2272 bool RISCVInstrInfo::shouldClusterMemOps( 2273 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2274 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2275 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2276 unsigned NumBytes) const { 2277 // If the mem ops (to be clustered) do not have the same base ptr, then they 2278 // should not be clustered 2279 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2280 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2281 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2282 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2283 return false; 2284 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2285 // If only one base op is empty, they do not have the same base ptr 2286 return false; 2287 } 2288 2289 unsigned CacheLineSize = 2290 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2291 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2292 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2293 // Cluster if the memory operations are on the same or a neighbouring cache 2294 // line, but limit the maximum ClusterSize to avoid creating too much 2295 // additional register pressure. 2296 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2297 } 2298 2299 // Set BaseReg (the base register operand), Offset (the byte offset being 2300 // accessed) and the access Width of the passed instruction that reads/writes 2301 // memory. Returns false if the instruction does not read/write memory or the 2302 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2303 // recognise base operands and offsets in all cases. 2304 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2305 // function) and set it as appropriate. 2306 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2307 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2308 unsigned &Width, const TargetRegisterInfo *TRI) const { 2309 if (!LdSt.mayLoadOrStore()) 2310 return false; 2311 2312 // Here we assume the standard RISC-V ISA, which uses a base+offset 2313 // addressing mode. You'll need to relax these conditions to support custom 2314 // load/store instructions. 2315 if (LdSt.getNumExplicitOperands() != 3) 2316 return false; 2317 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2318 !LdSt.getOperand(2).isImm()) 2319 return false; 2320 2321 if (!LdSt.hasOneMemOperand()) 2322 return false; 2323 2324 Width = (*LdSt.memoperands_begin())->getSize(); 2325 BaseReg = &LdSt.getOperand(1); 2326 Offset = LdSt.getOperand(2).getImm(); 2327 return true; 2328 } 2329 2330 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2331 const MachineInstr &MIa, const MachineInstr &MIb) const { 2332 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2333 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2334 2335 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2336 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2337 return false; 2338 2339 // Retrieve the base register, offset from the base register and width. Width 2340 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2341 // base registers are identical, and the offset of a lower memory access + 2342 // the width doesn't overlap the offset of a higher memory access, 2343 // then the memory accesses are different. 2344 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2345 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2346 int64_t OffsetA = 0, OffsetB = 0; 2347 unsigned int WidthA = 0, WidthB = 0; 2348 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2349 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2350 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2351 int LowOffset = std::min(OffsetA, OffsetB); 2352 int HighOffset = std::max(OffsetA, OffsetB); 2353 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2354 if (LowOffset + LowWidth <= HighOffset) 2355 return true; 2356 } 2357 } 2358 return false; 2359 } 2360 2361 std::pair<unsigned, unsigned> 2362 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2363 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2364 return std::make_pair(TF & Mask, TF & ~Mask); 2365 } 2366 2367 ArrayRef<std::pair<unsigned, const char *>> 2368 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2369 using namespace RISCVII; 2370 static const std::pair<unsigned, const char *> TargetFlags[] = { 2371 {MO_CALL, "riscv-call"}, 2372 {MO_LO, "riscv-lo"}, 2373 {MO_HI, "riscv-hi"}, 2374 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2375 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2376 {MO_GOT_HI, "riscv-got-hi"}, 2377 {MO_TPREL_LO, "riscv-tprel-lo"}, 2378 {MO_TPREL_HI, "riscv-tprel-hi"}, 2379 {MO_TPREL_ADD, "riscv-tprel-add"}, 2380 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2381 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}}; 2382 return ArrayRef(TargetFlags); 2383 } 2384 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2385 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2386 const Function &F = MF.getFunction(); 2387 2388 // Can F be deduplicated by the linker? If it can, don't outline from it. 2389 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2390 return false; 2391 2392 // Don't outline from functions with section markings; the program could 2393 // expect that all the code is in the named section. 2394 if (F.hasSection()) 2395 return false; 2396 2397 // It's safe to outline from MF. 2398 return true; 2399 } 2400 2401 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2402 unsigned &Flags) const { 2403 // More accurate safety checking is done in getOutliningCandidateInfo. 2404 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2405 } 2406 2407 // Enum values indicating how an outlined call should be constructed. 2408 enum MachineOutlinerConstructionID { 2409 MachineOutlinerDefault 2410 }; 2411 2412 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2413 MachineFunction &MF) const { 2414 return MF.getFunction().hasMinSize(); 2415 } 2416 2417 std::optional<outliner::OutlinedFunction> 2418 RISCVInstrInfo::getOutliningCandidateInfo( 2419 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 2420 2421 // First we need to filter out candidates where the X5 register (IE t0) can't 2422 // be used to setup the function call. 2423 auto CannotInsertCall = [](outliner::Candidate &C) { 2424 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 2425 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 2426 }; 2427 2428 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); 2429 2430 // If the sequence doesn't have enough candidates left, then we're done. 2431 if (RepeatedSequenceLocs.size() < 2) 2432 return std::nullopt; 2433 2434 unsigned SequenceSize = 0; 2435 2436 auto I = RepeatedSequenceLocs[0].front(); 2437 auto E = std::next(RepeatedSequenceLocs[0].back()); 2438 for (; I != E; ++I) 2439 SequenceSize += getInstSizeInBytes(*I); 2440 2441 // call t0, function = 8 bytes. 2442 unsigned CallOverhead = 8; 2443 for (auto &C : RepeatedSequenceLocs) 2444 C.setCallInfo(MachineOutlinerDefault, CallOverhead); 2445 2446 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 2447 unsigned FrameOverhead = 4; 2448 if (RepeatedSequenceLocs[0] 2449 .getMF() 2450 ->getSubtarget<RISCVSubtarget>() 2451 .hasStdExtCOrZca()) 2452 FrameOverhead = 2; 2453 2454 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 2455 FrameOverhead, MachineOutlinerDefault); 2456 } 2457 2458 outliner::InstrType 2459 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, 2460 unsigned Flags) const { 2461 MachineInstr &MI = *MBBI; 2462 MachineBasicBlock *MBB = MI.getParent(); 2463 const TargetRegisterInfo *TRI = 2464 MBB->getParent()->getSubtarget().getRegisterInfo(); 2465 const auto &F = MI.getMF()->getFunction(); 2466 2467 // We can manually strip out CFI instructions later. 2468 if (MI.isCFIInstruction()) 2469 // If current function has exception handling code, we can't outline & 2470 // strip these CFI instructions since it may break .eh_frame section 2471 // needed in unwinding. 2472 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 2473 : outliner::InstrType::Invisible; 2474 2475 // We need support for tail calls to outlined functions before return 2476 // statements can be allowed. 2477 if (MI.isReturn()) 2478 return outliner::InstrType::Illegal; 2479 2480 // Don't allow modifying the X5 register which we use for return addresses for 2481 // these outlined functions. 2482 if (MI.modifiesRegister(RISCV::X5, TRI) || 2483 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) 2484 return outliner::InstrType::Illegal; 2485 2486 // Make sure the operands don't reference something unsafe. 2487 for (const auto &MO : MI.operands()) { 2488 2489 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 2490 // if any possible. 2491 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 2492 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 2493 F.hasSection())) 2494 return outliner::InstrType::Illegal; 2495 } 2496 2497 return outliner::InstrType::Legal; 2498 } 2499 2500 void RISCVInstrInfo::buildOutlinedFrame( 2501 MachineBasicBlock &MBB, MachineFunction &MF, 2502 const outliner::OutlinedFunction &OF) const { 2503 2504 // Strip out any CFI instructions 2505 bool Changed = true; 2506 while (Changed) { 2507 Changed = false; 2508 auto I = MBB.begin(); 2509 auto E = MBB.end(); 2510 for (; I != E; ++I) { 2511 if (I->isCFIInstruction()) { 2512 I->removeFromParent(); 2513 Changed = true; 2514 break; 2515 } 2516 } 2517 } 2518 2519 MBB.addLiveIn(RISCV::X5); 2520 2521 // Add in a return instruction to the end of the outlined frame. 2522 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 2523 .addReg(RISCV::X0, RegState::Define) 2524 .addReg(RISCV::X5) 2525 .addImm(0)); 2526 } 2527 2528 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 2529 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 2530 MachineFunction &MF, outliner::Candidate &C) const { 2531 2532 // Add in a call instruction to the outlined function at the given location. 2533 It = MBB.insert(It, 2534 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 2535 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 2536 RISCVII::MO_CALL)); 2537 return It; 2538 } 2539 2540 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 2541 Register Reg) const { 2542 // TODO: Handle cases where Reg is a super- or sub-register of the 2543 // destination register. 2544 const MachineOperand &Op0 = MI.getOperand(0); 2545 if (!Op0.isReg() || Reg != Op0.getReg()) 2546 return std::nullopt; 2547 2548 // Don't consider ADDIW as a candidate because the caller may not be aware 2549 // of its sign extension behaviour. 2550 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 2551 MI.getOperand(2).isImm()) 2552 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 2553 2554 return std::nullopt; 2555 } 2556 2557 // MIR printer helper function to annotate Operands with a comment. 2558 std::string RISCVInstrInfo::createMIROperandComment( 2559 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2560 const TargetRegisterInfo *TRI) const { 2561 // Print a generic comment for this operand if there is one. 2562 std::string GenericComment = 2563 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 2564 if (!GenericComment.empty()) 2565 return GenericComment; 2566 2567 // If not, we must have an immediate operand. 2568 if (!Op.isImm()) 2569 return std::string(); 2570 2571 std::string Comment; 2572 raw_string_ostream OS(Comment); 2573 2574 uint64_t TSFlags = MI.getDesc().TSFlags; 2575 2576 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 2577 // operand of vector codegen pseudos. 2578 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || 2579 MI.getOpcode() == RISCV::PseudoVSETVLI || 2580 MI.getOpcode() == RISCV::PseudoVSETIVLI || 2581 MI.getOpcode() == RISCV::PseudoVSETVLIX0) && 2582 OpIdx == 2) { 2583 unsigned Imm = MI.getOperand(OpIdx).getImm(); 2584 RISCVVType::printVType(Imm, OS); 2585 } else if (RISCVII::hasSEWOp(TSFlags) && 2586 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) { 2587 unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); 2588 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2589 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2590 OS << "e" << SEW; 2591 } else if (RISCVII::hasVecPolicyOp(TSFlags) && 2592 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) { 2593 unsigned Policy = MI.getOperand(OpIdx).getImm(); 2594 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 2595 "Invalid Policy Value"); 2596 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 2597 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 2598 } 2599 2600 OS.flush(); 2601 return Comment; 2602 } 2603 2604 // clang-format off 2605 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 2606 RISCV::PseudoV##OP##_##TYPE##_##LMUL 2607 2608 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \ 2609 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ 2610 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ 2611 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ 2612 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) 2613 2614 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \ 2615 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ 2616 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) 2617 2618 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \ 2619 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ 2620 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) 2621 2622 #define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ 2623 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ 2624 case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) 2625 2626 #define CASE_VFMA_SPLATS(OP) \ 2627 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \ 2628 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \ 2629 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64) 2630 // clang-format on 2631 2632 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 2633 unsigned &SrcOpIdx1, 2634 unsigned &SrcOpIdx2) const { 2635 const MCInstrDesc &Desc = MI.getDesc(); 2636 if (!Desc.isCommutable()) 2637 return false; 2638 2639 switch (MI.getOpcode()) { 2640 case RISCV::TH_MVEQZ: 2641 case RISCV::TH_MVNEZ: 2642 // We can't commute operands if operand 2 (i.e., rs1 in 2643 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 2644 // not valid as the in/out-operand 1). 2645 if (MI.getOperand(2).getReg() == RISCV::X0) 2646 return false; 2647 // Operands 1 and 2 are commutable, if we switch the opcode. 2648 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 2649 case RISCV::TH_MULA: 2650 case RISCV::TH_MULAW: 2651 case RISCV::TH_MULAH: 2652 case RISCV::TH_MULS: 2653 case RISCV::TH_MULSW: 2654 case RISCV::TH_MULSH: 2655 // Operands 2 and 3 are commutable. 2656 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 2657 case RISCV::PseudoCCMOVGPRNoX0: 2658 case RISCV::PseudoCCMOVGPR: 2659 // Operands 4 and 5 are commutable. 2660 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 2661 case CASE_VFMA_SPLATS(FMADD): 2662 case CASE_VFMA_SPLATS(FMSUB): 2663 case CASE_VFMA_SPLATS(FMACC): 2664 case CASE_VFMA_SPLATS(FMSAC): 2665 case CASE_VFMA_SPLATS(FNMADD): 2666 case CASE_VFMA_SPLATS(FNMSUB): 2667 case CASE_VFMA_SPLATS(FNMACC): 2668 case CASE_VFMA_SPLATS(FNMSAC): 2669 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2670 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2671 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2672 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2673 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2674 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2675 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2676 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2677 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2678 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2679 // If the tail policy is undisturbed we can't commute. 2680 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2681 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2682 return false; 2683 2684 // For these instructions we can only swap operand 1 and operand 3 by 2685 // changing the opcode. 2686 unsigned CommutableOpIdx1 = 1; 2687 unsigned CommutableOpIdx2 = 3; 2688 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2689 CommutableOpIdx2)) 2690 return false; 2691 return true; 2692 } 2693 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2694 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2695 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2696 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2697 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2698 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2699 // If the tail policy is undisturbed we can't commute. 2700 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2701 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2702 return false; 2703 2704 // For these instructions we have more freedom. We can commute with the 2705 // other multiplicand or with the addend/subtrahend/minuend. 2706 2707 // Any fixed operand must be from source 1, 2 or 3. 2708 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 2709 return false; 2710 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 2711 return false; 2712 2713 // It both ops are fixed one must be the tied source. 2714 if (SrcOpIdx1 != CommuteAnyOperandIndex && 2715 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 2716 return false; 2717 2718 // Look for two different register operands assumed to be commutable 2719 // regardless of the FMA opcode. The FMA opcode is adjusted later if 2720 // needed. 2721 if (SrcOpIdx1 == CommuteAnyOperandIndex || 2722 SrcOpIdx2 == CommuteAnyOperandIndex) { 2723 // At least one of operands to be commuted is not specified and 2724 // this method is free to choose appropriate commutable operands. 2725 unsigned CommutableOpIdx1 = SrcOpIdx1; 2726 if (SrcOpIdx1 == SrcOpIdx2) { 2727 // Both of operands are not fixed. Set one of commutable 2728 // operands to the tied source. 2729 CommutableOpIdx1 = 1; 2730 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 2731 // Only one of the operands is not fixed. 2732 CommutableOpIdx1 = SrcOpIdx2; 2733 } 2734 2735 // CommutableOpIdx1 is well defined now. Let's choose another commutable 2736 // operand and assign its index to CommutableOpIdx2. 2737 unsigned CommutableOpIdx2; 2738 if (CommutableOpIdx1 != 1) { 2739 // If we haven't already used the tied source, we must use it now. 2740 CommutableOpIdx2 = 1; 2741 } else { 2742 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 2743 2744 // The commuted operands should have different registers. 2745 // Otherwise, the commute transformation does not change anything and 2746 // is useless. We use this as a hint to make our decision. 2747 if (Op1Reg != MI.getOperand(2).getReg()) 2748 CommutableOpIdx2 = 2; 2749 else 2750 CommutableOpIdx2 = 3; 2751 } 2752 2753 // Assign the found pair of commutable indices to SrcOpIdx1 and 2754 // SrcOpIdx2 to return those values. 2755 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2756 CommutableOpIdx2)) 2757 return false; 2758 } 2759 2760 return true; 2761 } 2762 } 2763 2764 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2765 } 2766 2767 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 2768 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 2769 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 2770 break; 2771 2772 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 2773 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 2774 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 2775 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 2776 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 2777 2778 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 2779 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 2780 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 2781 2782 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 2783 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 2784 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 2785 2786 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 2787 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 2788 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 2789 2790 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 2791 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 2792 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 2793 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 2794 2795 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2796 bool NewMI, 2797 unsigned OpIdx1, 2798 unsigned OpIdx2) const { 2799 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 2800 if (NewMI) 2801 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 2802 return MI; 2803 }; 2804 2805 switch (MI.getOpcode()) { 2806 case RISCV::TH_MVEQZ: 2807 case RISCV::TH_MVNEZ: { 2808 auto &WorkingMI = cloneIfNew(MI); 2809 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 2810 : RISCV::TH_MVEQZ)); 2811 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 2812 OpIdx2); 2813 } 2814 case RISCV::PseudoCCMOVGPRNoX0: 2815 case RISCV::PseudoCCMOVGPR: { 2816 // CCMOV can be commuted by inverting the condition. 2817 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 2818 CC = RISCVCC::getOppositeBranchCondition(CC); 2819 auto &WorkingMI = cloneIfNew(MI); 2820 WorkingMI.getOperand(3).setImm(CC); 2821 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 2822 OpIdx1, OpIdx2); 2823 } 2824 case CASE_VFMA_SPLATS(FMACC): 2825 case CASE_VFMA_SPLATS(FMADD): 2826 case CASE_VFMA_SPLATS(FMSAC): 2827 case CASE_VFMA_SPLATS(FMSUB): 2828 case CASE_VFMA_SPLATS(FNMACC): 2829 case CASE_VFMA_SPLATS(FNMADD): 2830 case CASE_VFMA_SPLATS(FNMSAC): 2831 case CASE_VFMA_SPLATS(FNMSUB): 2832 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2833 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2834 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2835 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2836 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2837 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2838 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2839 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2840 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2841 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2842 // It only make sense to toggle these between clobbering the 2843 // addend/subtrahend/minuend one of the multiplicands. 2844 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2845 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 2846 unsigned Opc; 2847 switch (MI.getOpcode()) { 2848 default: 2849 llvm_unreachable("Unexpected opcode"); 2850 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 2851 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 2852 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 2853 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 2854 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 2855 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 2856 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 2857 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 2858 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV) 2859 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV) 2860 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV) 2861 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV) 2862 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 2863 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 2864 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 2865 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 2866 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 2867 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 2868 } 2869 2870 auto &WorkingMI = cloneIfNew(MI); 2871 WorkingMI.setDesc(get(Opc)); 2872 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2873 OpIdx1, OpIdx2); 2874 } 2875 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2876 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2877 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2878 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2879 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2880 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2881 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2882 // If one of the operands, is the addend we need to change opcode. 2883 // Otherwise we're just swapping 2 of the multiplicands. 2884 if (OpIdx1 == 3 || OpIdx2 == 3) { 2885 unsigned Opc; 2886 switch (MI.getOpcode()) { 2887 default: 2888 llvm_unreachable("Unexpected opcode"); 2889 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV) 2890 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV) 2891 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV) 2892 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV) 2893 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 2894 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 2895 } 2896 2897 auto &WorkingMI = cloneIfNew(MI); 2898 WorkingMI.setDesc(get(Opc)); 2899 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2900 OpIdx1, OpIdx2); 2901 } 2902 // Let the default code handle it. 2903 break; 2904 } 2905 } 2906 2907 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2908 } 2909 2910 #undef CASE_VFMA_CHANGE_OPCODE_SPLATS 2911 #undef CASE_VFMA_CHANGE_OPCODE_LMULS 2912 #undef CASE_VFMA_CHANGE_OPCODE_COMMON 2913 #undef CASE_VFMA_SPLATS 2914 #undef CASE_VFMA_OPCODE_LMULS 2915 #undef CASE_VFMA_OPCODE_COMMON 2916 2917 // clang-format off 2918 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 2919 RISCV::PseudoV##OP##_##LMUL##_TIED 2920 2921 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 2922 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 2923 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 2924 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 2925 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 2926 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 2927 2928 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 2929 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 2930 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 2931 // clang-format on 2932 2933 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 2934 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 2935 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 2936 break; 2937 2938 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 2939 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 2940 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 2941 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 2942 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 2943 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 2944 2945 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 2946 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 2947 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 2948 2949 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 2950 LiveVariables *LV, 2951 LiveIntervals *LIS) const { 2952 MachineInstrBuilder MIB; 2953 switch (MI.getOpcode()) { 2954 default: 2955 return nullptr; 2956 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 2957 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 2958 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2959 MI.getNumExplicitOperands() == 7 && 2960 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 2961 // If the tail policy is undisturbed we can't convert. 2962 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 2963 1) == 0) 2964 return nullptr; 2965 // clang-format off 2966 unsigned NewOpc; 2967 switch (MI.getOpcode()) { 2968 default: 2969 llvm_unreachable("Unexpected opcode"); 2970 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 2971 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 2972 } 2973 // clang-format on 2974 2975 MachineBasicBlock &MBB = *MI.getParent(); 2976 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 2977 .add(MI.getOperand(0)) 2978 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 2979 .add(MI.getOperand(1)) 2980 .add(MI.getOperand(2)) 2981 .add(MI.getOperand(3)) 2982 .add(MI.getOperand(4)) 2983 .add(MI.getOperand(5)) 2984 .add(MI.getOperand(6)); 2985 break; 2986 } 2987 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 2988 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 2989 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 2990 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 2991 // If the tail policy is undisturbed we can't convert. 2992 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2993 MI.getNumExplicitOperands() == 6); 2994 if ((MI.getOperand(5).getImm() & 1) == 0) 2995 return nullptr; 2996 2997 // clang-format off 2998 unsigned NewOpc; 2999 switch (MI.getOpcode()) { 3000 default: 3001 llvm_unreachable("Unexpected opcode"); 3002 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 3003 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 3004 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3005 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3006 } 3007 // clang-format on 3008 3009 MachineBasicBlock &MBB = *MI.getParent(); 3010 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3011 .add(MI.getOperand(0)) 3012 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3013 .add(MI.getOperand(1)) 3014 .add(MI.getOperand(2)) 3015 .add(MI.getOperand(3)) 3016 .add(MI.getOperand(4)) 3017 .add(MI.getOperand(5)); 3018 break; 3019 } 3020 } 3021 MIB.copyImplicitOps(MI); 3022 3023 if (LV) { 3024 unsigned NumOps = MI.getNumOperands(); 3025 for (unsigned I = 1; I < NumOps; ++I) { 3026 MachineOperand &Op = MI.getOperand(I); 3027 if (Op.isReg() && Op.isKill()) 3028 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3029 } 3030 } 3031 3032 if (LIS) { 3033 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3034 3035 if (MI.getOperand(0).isEarlyClobber()) { 3036 // Use operand 1 was tied to early-clobber def operand 0, so its live 3037 // interval could have ended at an early-clobber slot. Now they are not 3038 // tied we need to update it to the normal register slot. 3039 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3040 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3041 if (S->end == Idx.getRegSlot(true)) 3042 S->end = Idx.getRegSlot(); 3043 } 3044 } 3045 3046 return MIB; 3047 } 3048 3049 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3050 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3051 #undef CASE_WIDEOP_OPCODE_LMULS 3052 #undef CASE_WIDEOP_OPCODE_COMMON 3053 3054 void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, 3055 MachineBasicBlock &MBB, 3056 MachineBasicBlock::iterator II, 3057 const DebugLoc &DL, Register DestReg, 3058 int64_t Amount, 3059 MachineInstr::MIFlag Flag) const { 3060 assert(Amount > 0 && "There is no need to get VLEN scaled value."); 3061 assert(Amount % 8 == 0 && 3062 "Reserve the stack by the multiple of one vector size."); 3063 3064 MachineRegisterInfo &MRI = MF.getRegInfo(); 3065 int64_t NumOfVReg = Amount / 8; 3066 3067 BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag); 3068 assert(isInt<32>(NumOfVReg) && 3069 "Expect the number of vector registers within 32-bits."); 3070 if (llvm::has_single_bit<uint32_t>(NumOfVReg)) { 3071 uint32_t ShiftAmount = Log2_32(NumOfVReg); 3072 if (ShiftAmount == 0) 3073 return; 3074 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3075 .addReg(DestReg, RegState::Kill) 3076 .addImm(ShiftAmount) 3077 .setMIFlag(Flag); 3078 } else if (STI.hasStdExtZba() && 3079 ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || 3080 (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || 3081 (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { 3082 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3083 unsigned Opc; 3084 uint32_t ShiftAmount; 3085 if (NumOfVReg % 9 == 0) { 3086 Opc = RISCV::SH3ADD; 3087 ShiftAmount = Log2_64(NumOfVReg / 9); 3088 } else if (NumOfVReg % 5 == 0) { 3089 Opc = RISCV::SH2ADD; 3090 ShiftAmount = Log2_64(NumOfVReg / 5); 3091 } else if (NumOfVReg % 3 == 0) { 3092 Opc = RISCV::SH1ADD; 3093 ShiftAmount = Log2_64(NumOfVReg / 3); 3094 } else { 3095 llvm_unreachable("Unexpected number of vregs"); 3096 } 3097 if (ShiftAmount) 3098 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3099 .addReg(DestReg, RegState::Kill) 3100 .addImm(ShiftAmount) 3101 .setMIFlag(Flag); 3102 BuildMI(MBB, II, DL, get(Opc), DestReg) 3103 .addReg(DestReg, RegState::Kill) 3104 .addReg(DestReg) 3105 .setMIFlag(Flag); 3106 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) { 3107 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3108 uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); 3109 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3110 .addReg(DestReg) 3111 .addImm(ShiftAmount) 3112 .setMIFlag(Flag); 3113 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3114 .addReg(ScaledRegister, RegState::Kill) 3115 .addReg(DestReg, RegState::Kill) 3116 .setMIFlag(Flag); 3117 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) { 3118 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3119 uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); 3120 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3121 .addReg(DestReg) 3122 .addImm(ShiftAmount) 3123 .setMIFlag(Flag); 3124 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3125 .addReg(ScaledRegister, RegState::Kill) 3126 .addReg(DestReg, RegState::Kill) 3127 .setMIFlag(Flag); 3128 } else { 3129 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3130 movImm(MBB, II, DL, N, NumOfVReg, Flag); 3131 if (!STI.hasStdExtM() && !STI.hasStdExtZmmul()) 3132 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3133 MF.getFunction(), 3134 "M- or Zmmul-extension must be enabled to calculate the vscaled size/" 3135 "offset."}); 3136 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3137 .addReg(DestReg, RegState::Kill) 3138 .addReg(N, RegState::Kill) 3139 .setMIFlag(Flag); 3140 } 3141 } 3142 3143 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3144 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3145 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3146 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3147 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3148 return ArrayRef(TargetFlags); 3149 } 3150 3151 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3152 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3153 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3154 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 3155 } 3156 3157 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 3158 bool RISCV::isZEXT_W(const MachineInstr &MI) { 3159 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 3160 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 3161 } 3162 3163 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 3164 bool RISCV::isZEXT_B(const MachineInstr &MI) { 3165 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 3166 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 3167 } 3168 3169 static bool isRVVWholeLoadStore(unsigned Opcode) { 3170 switch (Opcode) { 3171 default: 3172 return false; 3173 case RISCV::VS1R_V: 3174 case RISCV::VS2R_V: 3175 case RISCV::VS4R_V: 3176 case RISCV::VS8R_V: 3177 case RISCV::VL1RE8_V: 3178 case RISCV::VL2RE8_V: 3179 case RISCV::VL4RE8_V: 3180 case RISCV::VL8RE8_V: 3181 case RISCV::VL1RE16_V: 3182 case RISCV::VL2RE16_V: 3183 case RISCV::VL4RE16_V: 3184 case RISCV::VL8RE16_V: 3185 case RISCV::VL1RE32_V: 3186 case RISCV::VL2RE32_V: 3187 case RISCV::VL4RE32_V: 3188 case RISCV::VL8RE32_V: 3189 case RISCV::VL1RE64_V: 3190 case RISCV::VL2RE64_V: 3191 case RISCV::VL4RE64_V: 3192 case RISCV::VL8RE64_V: 3193 return true; 3194 } 3195 } 3196 3197 bool RISCV::isRVVSpill(const MachineInstr &MI) { 3198 // RVV lacks any support for immediate addressing for stack addresses, so be 3199 // conservative. 3200 unsigned Opcode = MI.getOpcode(); 3201 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 3202 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 3203 return false; 3204 return true; 3205 } 3206 3207 std::optional<std::pair<unsigned, unsigned>> 3208 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 3209 switch (Opcode) { 3210 default: 3211 return std::nullopt; 3212 case RISCV::PseudoVSPILL2_M1: 3213 case RISCV::PseudoVRELOAD2_M1: 3214 return std::make_pair(2u, 1u); 3215 case RISCV::PseudoVSPILL2_M2: 3216 case RISCV::PseudoVRELOAD2_M2: 3217 return std::make_pair(2u, 2u); 3218 case RISCV::PseudoVSPILL2_M4: 3219 case RISCV::PseudoVRELOAD2_M4: 3220 return std::make_pair(2u, 4u); 3221 case RISCV::PseudoVSPILL3_M1: 3222 case RISCV::PseudoVRELOAD3_M1: 3223 return std::make_pair(3u, 1u); 3224 case RISCV::PseudoVSPILL3_M2: 3225 case RISCV::PseudoVRELOAD3_M2: 3226 return std::make_pair(3u, 2u); 3227 case RISCV::PseudoVSPILL4_M1: 3228 case RISCV::PseudoVRELOAD4_M1: 3229 return std::make_pair(4u, 1u); 3230 case RISCV::PseudoVSPILL4_M2: 3231 case RISCV::PseudoVRELOAD4_M2: 3232 return std::make_pair(4u, 2u); 3233 case RISCV::PseudoVSPILL5_M1: 3234 case RISCV::PseudoVRELOAD5_M1: 3235 return std::make_pair(5u, 1u); 3236 case RISCV::PseudoVSPILL6_M1: 3237 case RISCV::PseudoVRELOAD6_M1: 3238 return std::make_pair(6u, 1u); 3239 case RISCV::PseudoVSPILL7_M1: 3240 case RISCV::PseudoVRELOAD7_M1: 3241 return std::make_pair(7u, 1u); 3242 case RISCV::PseudoVSPILL8_M1: 3243 case RISCV::PseudoVRELOAD8_M1: 3244 return std::make_pair(8u, 1u); 3245 } 3246 } 3247 3248 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 3249 return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) && 3250 !MI.isInlineAsm(); 3251 } 3252 3253 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 3254 int16_t MI1FrmOpIdx = 3255 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 3256 int16_t MI2FrmOpIdx = 3257 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 3258 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 3259 return false; 3260 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 3261 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 3262 return FrmOp1.getImm() == FrmOp2.getImm(); 3263 } 3264 3265 std::optional<unsigned> 3266 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 3267 // TODO: Handle Zvbb instructions 3268 switch (Opcode) { 3269 default: 3270 return std::nullopt; 3271 3272 // 11.6. Vector Single-Width Shift Instructions 3273 case RISCV::VSLL_VX: 3274 case RISCV::VSRL_VX: 3275 case RISCV::VSRA_VX: 3276 // 12.4. Vector Single-Width Scaling Shift Instructions 3277 case RISCV::VSSRL_VX: 3278 case RISCV::VSSRA_VX: 3279 // Only the low lg2(SEW) bits of the shift-amount value are used. 3280 return Log2SEW; 3281 3282 // 11.7 Vector Narrowing Integer Right Shift Instructions 3283 case RISCV::VNSRL_WX: 3284 case RISCV::VNSRA_WX: 3285 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 3286 case RISCV::VNCLIPU_WX: 3287 case RISCV::VNCLIP_WX: 3288 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 3289 return Log2SEW + 1; 3290 3291 // 11.1. Vector Single-Width Integer Add and Subtract 3292 case RISCV::VADD_VX: 3293 case RISCV::VSUB_VX: 3294 case RISCV::VRSUB_VX: 3295 // 11.2. Vector Widening Integer Add/Subtract 3296 case RISCV::VWADDU_VX: 3297 case RISCV::VWSUBU_VX: 3298 case RISCV::VWADD_VX: 3299 case RISCV::VWSUB_VX: 3300 case RISCV::VWADDU_WX: 3301 case RISCV::VWSUBU_WX: 3302 case RISCV::VWADD_WX: 3303 case RISCV::VWSUB_WX: 3304 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 3305 case RISCV::VADC_VXM: 3306 case RISCV::VADC_VIM: 3307 case RISCV::VMADC_VXM: 3308 case RISCV::VMADC_VIM: 3309 case RISCV::VMADC_VX: 3310 case RISCV::VSBC_VXM: 3311 case RISCV::VMSBC_VXM: 3312 case RISCV::VMSBC_VX: 3313 // 11.5 Vector Bitwise Logical Instructions 3314 case RISCV::VAND_VX: 3315 case RISCV::VOR_VX: 3316 case RISCV::VXOR_VX: 3317 // 11.8. Vector Integer Compare Instructions 3318 case RISCV::VMSEQ_VX: 3319 case RISCV::VMSNE_VX: 3320 case RISCV::VMSLTU_VX: 3321 case RISCV::VMSLT_VX: 3322 case RISCV::VMSLEU_VX: 3323 case RISCV::VMSLE_VX: 3324 case RISCV::VMSGTU_VX: 3325 case RISCV::VMSGT_VX: 3326 // 11.9. Vector Integer Min/Max Instructions 3327 case RISCV::VMINU_VX: 3328 case RISCV::VMIN_VX: 3329 case RISCV::VMAXU_VX: 3330 case RISCV::VMAX_VX: 3331 // 11.10. Vector Single-Width Integer Multiply Instructions 3332 case RISCV::VMUL_VX: 3333 case RISCV::VMULH_VX: 3334 case RISCV::VMULHU_VX: 3335 case RISCV::VMULHSU_VX: 3336 // 11.11. Vector Integer Divide Instructions 3337 case RISCV::VDIVU_VX: 3338 case RISCV::VDIV_VX: 3339 case RISCV::VREMU_VX: 3340 case RISCV::VREM_VX: 3341 // 11.12. Vector Widening Integer Multiply Instructions 3342 case RISCV::VWMUL_VX: 3343 case RISCV::VWMULU_VX: 3344 case RISCV::VWMULSU_VX: 3345 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 3346 case RISCV::VMACC_VX: 3347 case RISCV::VNMSAC_VX: 3348 case RISCV::VMADD_VX: 3349 case RISCV::VNMSUB_VX: 3350 // 11.14. Vector Widening Integer Multiply-Add Instructions 3351 case RISCV::VWMACCU_VX: 3352 case RISCV::VWMACC_VX: 3353 case RISCV::VWMACCSU_VX: 3354 case RISCV::VWMACCUS_VX: 3355 // 11.15. Vector Integer Merge Instructions 3356 case RISCV::VMERGE_VXM: 3357 // 11.16. Vector Integer Move Instructions 3358 case RISCV::VMV_V_X: 3359 // 12.1. Vector Single-Width Saturating Add and Subtract 3360 case RISCV::VSADDU_VX: 3361 case RISCV::VSADD_VX: 3362 case RISCV::VSSUBU_VX: 3363 case RISCV::VSSUB_VX: 3364 // 12.2. Vector Single-Width Averaging Add and Subtract 3365 case RISCV::VAADDU_VX: 3366 case RISCV::VAADD_VX: 3367 case RISCV::VASUBU_VX: 3368 case RISCV::VASUB_VX: 3369 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 3370 case RISCV::VSMUL_VX: 3371 // 16.1. Integer Scalar Move Instructions 3372 case RISCV::VMV_S_X: 3373 return 1U << Log2SEW; 3374 } 3375 } 3376 3377 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 3378 const RISCVVPseudosTable::PseudoInfo *RVV = 3379 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 3380 if (!RVV) 3381 return 0; 3382 return RVV->BaseInstr; 3383 } 3384