1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 16 17 #include "RISCV.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include <optional> 22 23 namespace llvm { 24 class InstructionCost; 25 class RISCVSubtarget; 26 struct RISCVRegisterInfo; 27 class RVVArgDispatcher; 28 29 namespace RISCVISD { 30 // clang-format off 31 enum NodeType : unsigned { 32 FIRST_NUMBER = ISD::BUILTIN_OP_END, 33 RET_GLUE, 34 SRET_GLUE, 35 MRET_GLUE, 36 CALL, 37 /// Select with condition operator - This selects between a true value and 38 /// a false value (ops #3 and #4) based on the boolean result of comparing 39 /// the lhs and rhs (ops #0 and #1) of a conditional expression with the 40 /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. 41 /// The lhs and rhs are XLenVT integers. The true and false values can be 42 /// integer or floating point. 43 SELECT_CC, 44 BR_CC, 45 BuildPairF64, 46 SplitF64, 47 TAIL, 48 49 // Add the Lo 12 bits from an address. Selected to ADDI. 50 ADD_LO, 51 // Get the Hi 20 bits from an address. Selected to LUI. 52 HI, 53 54 // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. 55 LLA, 56 57 // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. 58 ADD_TPREL, 59 60 // Multiply high for signedxunsigned. 61 MULHSU, 62 63 // Represents (ADD (SHL a, b), c) with the arguments appearing in the order 64 // a, b, c. 'b' must be a constant. Maps to sh1add/sh2add/sh3add with zba 65 // or addsl with XTheadBa. 66 SHL_ADD, 67 68 // RV64I shifts, directly matching the semantics of the named RISC-V 69 // instructions. 70 SLLW, 71 SRAW, 72 SRLW, 73 // 32-bit operations from RV64M that can't be simply matched with a pattern 74 // at instruction selection time. These have undefined behavior for division 75 // by 0 or overflow (divw) like their target independent counterparts. 76 DIVW, 77 DIVUW, 78 REMUW, 79 // RV64IB rotates, directly matching the semantics of the named RISC-V 80 // instructions. 81 ROLW, 82 RORW, 83 // RV64IZbb bit counting instructions directly matching the semantics of the 84 // named RISC-V instructions. 85 CLZW, 86 CTZW, 87 88 // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. 89 ABSW, 90 91 // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as 92 // XLEN is the only legal integer width. 93 // 94 // FMV_H_X matches the semantics of the FMV.H.X. 95 // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. 96 // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. 97 // FMV_W_X_RV64 matches the semantics of the FMV.W.X. 98 // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. 99 // 100 // This is a more convenient semantic for producing dagcombines that remove 101 // unnecessary GPR->FPR->GPR moves. 102 FMV_H_X, 103 FMV_X_ANYEXTH, 104 FMV_X_SIGNEXTH, 105 FMV_W_X_RV64, 106 FMV_X_ANYEXTW_RV64, 107 // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and 108 // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of 109 // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode 110 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 111 FCVT_X, 112 FCVT_XU, 113 // FP to 32 bit int conversions for RV64. These are used to keep track of the 114 // result being sign extended to 64 bit. These saturate out of range inputs. 115 // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode 116 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 117 FCVT_W_RV64, 118 FCVT_WU_RV64, 119 120 FP_ROUND_BF16, 121 FP_EXTEND_BF16, 122 123 // Rounds an FP value to its corresponding integer in the same FP format. 124 // First operand is the value to round, the second operand is the largest 125 // integer that can be represented exactly in the FP format. This will be 126 // expanded into multiple instructions and basic blocks with a custom 127 // inserter. 128 FROUND, 129 130 FCLASS, 131 132 // Floating point fmax and fmin matching the RISC-V instruction semantics. 133 FMAX, FMIN, 134 135 // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)). 136 // It takes a chain operand and another two target constant operands (the 137 // CSR numbers of the low and high parts of the counter). 138 READ_COUNTER_WIDE, 139 140 // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or 141 // XLenVT. 142 BREV8, 143 ORC_B, 144 ZIP, 145 UNZIP, 146 147 // Scalar cryptography 148 CLMUL, CLMULH, CLMULR, 149 SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1, 150 SM4KS, SM4ED, 151 SM3P0, SM3P1, 152 153 // May-Be-Operations 154 MOPR, MOPRR, 155 156 // Vector Extension 157 FIRST_VL_VECTOR_OP, 158 // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand 159 // for the VL value to be used for the operation. The first operand is 160 // passthru operand. 161 VMV_V_V_VL = FIRST_VL_VECTOR_OP, 162 // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand 163 // for the VL value to be used for the operation. The first operand is 164 // passthru operand. 165 VMV_V_X_VL, 166 // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand 167 // for the VL value to be used for the operation. The first operand is 168 // passthru operand. 169 VFMV_V_F_VL, 170 // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign 171 // extended from the vector element size. 172 VMV_X_S, 173 // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand. 174 VMV_S_X_VL, 175 // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. 176 VFMV_S_F_VL, 177 // Splats an 64-bit value that has been split into two i32 parts. This is 178 // expanded late to two scalar stores and a stride 0 vector load. 179 // The first operand is passthru operand. 180 SPLAT_VECTOR_SPLIT_I64_VL, 181 // Truncates a RVV integer vector by one power-of-two. Carries both an extra 182 // mask and VL operand. 183 TRUNCATE_VECTOR_VL, 184 // Matches the semantics of vslideup/vslidedown. The first operand is the 185 // pass-thru operand, the second is the source vector, the third is the XLenVT 186 // index (either constant or non-constant), the fourth is the mask, the fifth 187 // is the VL and the sixth is the policy. 188 VSLIDEUP_VL, 189 VSLIDEDOWN_VL, 190 // Matches the semantics of vslide1up/slide1down. The first operand is 191 // passthru operand, the second is source vector, third is the XLenVT scalar 192 // value. The fourth and fifth operands are the mask and VL operands. 193 VSLIDE1UP_VL, 194 VSLIDE1DOWN_VL, 195 // Matches the semantics of vfslide1up/vfslide1down. The first operand is 196 // passthru operand, the second is source vector, third is a scalar value 197 // whose type matches the element type of the vectors. The fourth and fifth 198 // operands are the mask and VL operands. 199 VFSLIDE1UP_VL, 200 VFSLIDE1DOWN_VL, 201 // Matches the semantics of the vid.v instruction, with a mask and VL 202 // operand. 203 VID_VL, 204 // Matches the semantics of the vfcnvt.rod function (Convert double-width 205 // float to single-width float, rounding towards odd). Takes a double-width 206 // float vector and produces a single-width float vector. Also has a mask and 207 // VL operand. 208 VFNCVT_ROD_VL, 209 // These nodes match the semantics of the corresponding RVV vector reduction 210 // instructions. They produce a vector result which is the reduction 211 // performed over the second vector operand plus the first element of the 212 // third vector operand. The first operand is the pass-thru operand. The 213 // second operand is an unconstrained vector type, and the result, first, and 214 // third operand's types are expected to be the corresponding full-width 215 // LMUL=1 type for the second operand: 216 // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8 217 // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32 218 // The different in types does introduce extra vsetvli instructions but 219 // similarly it reduces the number of registers consumed per reduction. 220 // Also has a mask and VL operand. 221 VECREDUCE_ADD_VL, 222 VECREDUCE_UMAX_VL, 223 VECREDUCE_SMAX_VL, 224 VECREDUCE_UMIN_VL, 225 VECREDUCE_SMIN_VL, 226 VECREDUCE_AND_VL, 227 VECREDUCE_OR_VL, 228 VECREDUCE_XOR_VL, 229 VECREDUCE_FADD_VL, 230 VECREDUCE_SEQ_FADD_VL, 231 VECREDUCE_FMIN_VL, 232 VECREDUCE_FMAX_VL, 233 234 // Vector binary ops with a merge as a third operand, a mask as a fourth 235 // operand, and VL as a fifth operand. 236 ADD_VL, 237 AND_VL, 238 MUL_VL, 239 OR_VL, 240 SDIV_VL, 241 SHL_VL, 242 SREM_VL, 243 SRA_VL, 244 SRL_VL, 245 ROTL_VL, 246 ROTR_VL, 247 SUB_VL, 248 UDIV_VL, 249 UREM_VL, 250 XOR_VL, 251 SMIN_VL, 252 SMAX_VL, 253 UMIN_VL, 254 UMAX_VL, 255 256 BITREVERSE_VL, 257 BSWAP_VL, 258 CTLZ_VL, 259 CTTZ_VL, 260 CTPOP_VL, 261 262 SADDSAT_VL, 263 UADDSAT_VL, 264 SSUBSAT_VL, 265 USUBSAT_VL, 266 267 // Averaging adds of signed integers. 268 AVGFLOORS_VL, 269 // Averaging adds of unsigned integers. 270 AVGFLOORU_VL, 271 // Rounding averaging adds of signed integers. 272 AVGCEILS_VL, 273 // Rounding averaging adds of unsigned integers. 274 AVGCEILU_VL, 275 276 // Operands are (source, shift, merge, mask, roundmode, vl) 277 VNCLIPU_VL, 278 VNCLIP_VL, 279 280 MULHS_VL, 281 MULHU_VL, 282 FADD_VL, 283 FSUB_VL, 284 FMUL_VL, 285 FDIV_VL, 286 VFMIN_VL, 287 VFMAX_VL, 288 289 // Vector unary ops with a mask as a second operand and VL as a third operand. 290 FNEG_VL, 291 FABS_VL, 292 FSQRT_VL, 293 FCLASS_VL, 294 FCOPYSIGN_VL, // Has a merge operand 295 VFCVT_RTZ_X_F_VL, 296 VFCVT_RTZ_XU_F_VL, 297 VFCVT_X_F_VL, 298 VFCVT_XU_F_VL, 299 VFROUND_NOEXCEPT_VL, 300 VFCVT_RM_X_F_VL, // Has a rounding mode operand. 301 VFCVT_RM_XU_F_VL, // Has a rounding mode operand. 302 SINT_TO_FP_VL, 303 UINT_TO_FP_VL, 304 VFCVT_RM_F_X_VL, // Has a rounding mode operand. 305 VFCVT_RM_F_XU_VL, // Has a rounding mode operand. 306 FP_ROUND_VL, 307 FP_EXTEND_VL, 308 309 // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand. 310 VFMADD_VL, 311 VFNMADD_VL, 312 VFMSUB_VL, 313 VFNMSUB_VL, 314 315 // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth 316 // operand. 317 VFWMADD_VL, 318 VFWNMADD_VL, 319 VFWMSUB_VL, 320 VFWNMSUB_VL, 321 322 // Widening instructions with a merge value a third operand, a mask as a 323 // fourth operand, and VL as a fifth operand. 324 VWMUL_VL, 325 VWMULU_VL, 326 VWMULSU_VL, 327 VWADD_VL, 328 VWADDU_VL, 329 VWSUB_VL, 330 VWSUBU_VL, 331 VWADD_W_VL, 332 VWADDU_W_VL, 333 VWSUB_W_VL, 334 VWSUBU_W_VL, 335 VWSLL_VL, 336 337 VFWMUL_VL, 338 VFWADD_VL, 339 VFWSUB_VL, 340 VFWADD_W_VL, 341 VFWSUB_W_VL, 342 343 // Widening ternary operations with a mask as the fourth operand and VL as the 344 // fifth operand. 345 VWMACC_VL, 346 VWMACCU_VL, 347 VWMACCSU_VL, 348 349 // Narrowing logical shift right. 350 // Operands are (source, shift, passthru, mask, vl) 351 VNSRL_VL, 352 353 // Vector compare producing a mask. Fourth operand is input mask. Fifth 354 // operand is VL. 355 SETCC_VL, 356 357 // General vmerge node with mask, true, false, passthru, and vl operands. 358 // Tail agnostic vselect can be implemented by setting passthru to undef. 359 VMERGE_VL, 360 361 // Mask binary operators. 362 VMAND_VL, 363 VMOR_VL, 364 VMXOR_VL, 365 366 // Set mask vector to all zeros or ones. 367 VMCLR_VL, 368 VMSET_VL, 369 370 // Matches the semantics of vrgather.vx and vrgather.vv with extra operands 371 // for passthru and VL. Operands are (src, index, mask, passthru, vl). 372 VRGATHER_VX_VL, 373 VRGATHER_VV_VL, 374 VRGATHEREI16_VV_VL, 375 376 // Vector sign/zero extend with additional mask & VL operands. 377 VSEXT_VL, 378 VZEXT_VL, 379 380 // vcpop.m with additional mask and VL operands. 381 VCPOP_VL, 382 383 // vfirst.m with additional mask and VL operands. 384 VFIRST_VL, 385 386 LAST_VL_VECTOR_OP = VFIRST_VL, 387 388 // Read VLENB CSR 389 READ_VLENB, 390 // Reads value of CSR. 391 // The first operand is a chain pointer. The second specifies address of the 392 // required CSR. Two results are produced, the read value and the new chain 393 // pointer. 394 READ_CSR, 395 // Write value to CSR. 396 // The first operand is a chain pointer, the second specifies address of the 397 // required CSR and the third is the value to write. The result is the new 398 // chain pointer. 399 WRITE_CSR, 400 // Read and write value of CSR. 401 // The first operand is a chain pointer, the second specifies address of the 402 // required CSR and the third is the value to write. Two results are produced, 403 // the value read before the modification and the new chain pointer. 404 SWAP_CSR, 405 406 // Branchless select operations, matching the semantics of the instructions 407 // defined in Zicond or XVentanaCondOps. 408 CZERO_EQZ, // vt.maskc for XVentanaCondOps. 409 CZERO_NEZ, // vt.maskcn for XVentanaCondOps. 410 411 /// Software guarded BRIND node. Operand 0 is the chain operand and 412 /// operand 1 is the target address. 413 SW_GUARDED_BRIND, 414 415 // FP to 32 bit int conversions for RV64. These are used to keep track of the 416 // result being sign extended to 64 bit. These saturate out of range inputs. 417 STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, 418 STRICT_FCVT_WU_RV64, 419 STRICT_FADD_VL, 420 STRICT_FSUB_VL, 421 STRICT_FMUL_VL, 422 STRICT_FDIV_VL, 423 STRICT_FSQRT_VL, 424 STRICT_VFMADD_VL, 425 STRICT_VFNMADD_VL, 426 STRICT_VFMSUB_VL, 427 STRICT_VFNMSUB_VL, 428 STRICT_FP_ROUND_VL, 429 STRICT_FP_EXTEND_VL, 430 STRICT_VFNCVT_ROD_VL, 431 STRICT_SINT_TO_FP_VL, 432 STRICT_UINT_TO_FP_VL, 433 STRICT_VFCVT_RM_X_F_VL, 434 STRICT_VFCVT_RTZ_X_F_VL, 435 STRICT_VFCVT_RTZ_XU_F_VL, 436 STRICT_FSETCC_VL, 437 STRICT_FSETCCS_VL, 438 STRICT_VFROUND_NOEXCEPT_VL, 439 LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, 440 441 SF_VC_XV_SE, 442 SF_VC_IV_SE, 443 SF_VC_VV_SE, 444 SF_VC_FV_SE, 445 SF_VC_XVV_SE, 446 SF_VC_IVV_SE, 447 SF_VC_VVV_SE, 448 SF_VC_FVV_SE, 449 SF_VC_XVW_SE, 450 SF_VC_IVW_SE, 451 SF_VC_VVW_SE, 452 SF_VC_FVW_SE, 453 SF_VC_V_X_SE, 454 SF_VC_V_I_SE, 455 SF_VC_V_XV_SE, 456 SF_VC_V_IV_SE, 457 SF_VC_V_VV_SE, 458 SF_VC_V_FV_SE, 459 SF_VC_V_XVV_SE, 460 SF_VC_V_IVV_SE, 461 SF_VC_V_VVV_SE, 462 SF_VC_V_FVV_SE, 463 SF_VC_V_XVW_SE, 464 SF_VC_V_IVW_SE, 465 SF_VC_V_VVW_SE, 466 SF_VC_V_FVW_SE, 467 468 // WARNING: Do not add anything in the end unless you want the node to 469 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 470 // opcodes will be thought as target memory ops! 471 472 TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE, 473 TH_LWUD, 474 TH_LDD, 475 TH_SWD, 476 TH_SDD, 477 }; 478 // clang-format on 479 } // namespace RISCVISD 480 481 class RISCVTargetLowering : public TargetLowering { 482 const RISCVSubtarget &Subtarget; 483 484 public: 485 explicit RISCVTargetLowering(const TargetMachine &TM, 486 const RISCVSubtarget &STI); 487 getSubtarget()488 const RISCVSubtarget &getSubtarget() const { return Subtarget; } 489 490 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 491 MachineFunction &MF, 492 unsigned Intrinsic) const override; 493 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 494 unsigned AS, 495 Instruction *I = nullptr) const override; 496 bool isLegalICmpImmediate(int64_t Imm) const override; 497 bool isLegalAddImmediate(int64_t Imm) const override; 498 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; 499 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; 500 bool isTruncateFree(SDValue Val, EVT VT2) const override; 501 bool isZExtFree(SDValue Val, EVT VT2) const override; 502 bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; 503 bool signExtendConstant(const ConstantInt *CI) const override; 504 bool isCheapToSpeculateCttz(Type *Ty) const override; 505 bool isCheapToSpeculateCtlz(Type *Ty) const override; 506 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 507 bool hasAndNotCompare(SDValue Y) const override; 508 bool hasBitTest(SDValue X, SDValue Y) const override; 509 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 510 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 511 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 512 SelectionDAG &DAG) const override; 513 /// Return true if the (vector) instruction I will be lowered to an instruction 514 /// with a scalar splat operand for the given Operand number. 515 bool canSplatOperand(Instruction *I, int Operand) const; 516 /// Return true if a vector instruction will lower to a target instruction 517 /// able to splat the given operand. 518 bool canSplatOperand(unsigned Opcode, int Operand) const; 519 bool shouldSinkOperands(Instruction *I, 520 SmallVectorImpl<Use *> &Ops) const override; 521 bool shouldScalarizeBinop(SDValue VecOp) const override; 522 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 523 std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; 524 bool isFPImmLegal(const APFloat &Imm, EVT VT, 525 bool ForCodeSize) const override; 526 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 527 unsigned Index) const override; 528 529 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 530 531 bool preferScalarizeSplat(SDNode *N) const override; 532 softPromoteHalfType()533 bool softPromoteHalfType() const override { return true; } 534 535 /// Return the register type for a given MVT, ensuring vectors are treated 536 /// as a series of gpr sized integers. 537 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 538 EVT VT) const override; 539 540 /// Return the number of registers for a given MVT, ensuring vectors are 541 /// treated as a series of gpr sized integers. 542 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 543 CallingConv::ID CC, 544 EVT VT) const override; 545 546 unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, 547 CallingConv::ID CC, EVT VT, 548 EVT &IntermediateVT, 549 unsigned &NumIntermediates, 550 MVT &RegisterVT) const override; 551 552 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 553 EVT VT) const override; 554 555 /// Return true if the given shuffle mask can be codegen'd directly, or if it 556 /// should be stack expanded. 557 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 558 isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)559 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 560 // If the pair to store is a mixture of float and int values, we will 561 // save two bitwise instructions and one float-to-int instruction and 562 // increase one store instruction. There is potentially a more 563 // significant benefit because it avoids the float->int domain switch 564 // for input value. So It is more likely a win. 565 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 566 (LTy.isInteger() && HTy.isFloatingPoint())) 567 return true; 568 // If the pair only contains int values, we will save two bitwise 569 // instructions and increase one store instruction (costing one more 570 // store buffer). Since the benefit is more blurred we leave such a pair 571 // out until we get testcase to prove it is a win. 572 return false; 573 } 574 575 bool 576 shouldExpandBuildVectorWithShuffles(EVT VT, 577 unsigned DefinedValues) const override; 578 579 bool shouldExpandCttzElements(EVT VT) const override; 580 581 /// Return the cost of LMUL for linear operations. 582 InstructionCost getLMULCost(MVT VT) const; 583 584 InstructionCost getVRGatherVVCost(MVT VT) const; 585 InstructionCost getVRGatherVICost(MVT VT) const; 586 InstructionCost getVSlideVXCost(MVT VT) const; 587 InstructionCost getVSlideVICost(MVT VT) const; 588 589 // Provide custom lowering hooks for some operations. 590 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 591 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 592 SelectionDAG &DAG) const override; 593 594 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 595 596 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 597 const APInt &DemandedElts, 598 TargetLoweringOpt &TLO) const override; 599 600 void computeKnownBitsForTargetNode(const SDValue Op, 601 KnownBits &Known, 602 const APInt &DemandedElts, 603 const SelectionDAG &DAG, 604 unsigned Depth) const override; 605 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 606 const APInt &DemandedElts, 607 const SelectionDAG &DAG, 608 unsigned Depth) const override; 609 610 bool canCreateUndefOrPoisonForTargetNode(SDValue Op, 611 const APInt &DemandedElts, 612 const SelectionDAG &DAG, 613 bool PoisonOnly, bool ConsiderFlags, 614 unsigned Depth) const override; 615 616 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 617 618 // This method returns the name of a target specific DAG node. 619 const char *getTargetNodeName(unsigned Opcode) const override; 620 621 MachineMemOperand::Flags 622 getTargetMMOFlags(const Instruction &I) const override; 623 624 MachineMemOperand::Flags 625 getTargetMMOFlags(const MemSDNode &Node) const override; 626 627 bool 628 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, 629 const MemSDNode &NodeY) const override; 630 631 ConstraintType getConstraintType(StringRef Constraint) const override; 632 633 InlineAsm::ConstraintCode 634 getInlineAsmMemConstraint(StringRef ConstraintCode) const override; 635 636 std::pair<unsigned, const TargetRegisterClass *> 637 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 638 StringRef Constraint, MVT VT) const override; 639 640 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 641 std::vector<SDValue> &Ops, 642 SelectionDAG &DAG) const override; 643 644 MachineBasicBlock * 645 EmitInstrWithCustomInserter(MachineInstr &MI, 646 MachineBasicBlock *BB) const override; 647 648 void AdjustInstrPostInstrSelection(MachineInstr &MI, 649 SDNode *Node) const override; 650 651 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 652 EVT VT) const override; 653 shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)654 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 655 bool MathUsed) const override { 656 if (VT == MVT::i8 || VT == MVT::i16) 657 return false; 658 659 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); 660 } 661 storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)662 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 663 unsigned AddrSpace) const override { 664 // If we can replace 4 or more scalar stores, there will be a reduction 665 // in instructions even after we add a vector constant load. 666 return NumElem >= 4; 667 } 668 convertSetCCLogicToBitwiseLogic(EVT VT)669 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 670 return VT.isScalarInteger(); 671 } convertSelectOfConstantsToMath(EVT VT)672 bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } 673 674 bool isCtpopFast(EVT VT) const override; 675 676 unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override; 677 preferZeroCompareBranch()678 bool preferZeroCompareBranch() const override { return true; } 679 shouldInsertFencesForAtomic(const Instruction * I)680 bool shouldInsertFencesForAtomic(const Instruction *I) const override { 681 return isa<LoadInst>(I) || isa<StoreInst>(I); 682 } 683 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, 684 AtomicOrdering Ord) const override; 685 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, 686 AtomicOrdering Ord) const override; 687 688 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 689 EVT VT) const override; 690 getExtendForAtomicOps()691 ISD::NodeType getExtendForAtomicOps() const override { 692 return ISD::SIGN_EXTEND; 693 } 694 695 ISD::NodeType getExtendForAtomicCmpSwapArg() const override; 696 697 bool shouldTransformSignedTruncationCheck(EVT XVT, 698 unsigned KeptBits) const override; 699 700 TargetLowering::ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG & DAG,SDNode * N,unsigned ExpansionFactor)701 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 702 unsigned ExpansionFactor) const override { 703 if (DAG.getMachineFunction().getFunction().hasMinSize()) 704 return ShiftLegalizationStrategy::LowerToLibcall; 705 return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, 706 ExpansionFactor); 707 } 708 709 bool isDesirableToCommuteWithShift(const SDNode *N, 710 CombineLevel Level) const override; 711 712 /// If a physical register, this returns the register that receives the 713 /// exception address on entry to an EH pad. 714 Register 715 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 716 717 /// If a physical register, this returns the register that receives the 718 /// exception typeid on entry to a landing pad. 719 Register 720 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 721 722 bool shouldExtendTypeInLibCall(EVT Type) const override; 723 bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; 724 725 /// Returns the register with the specified architectural or ABI name. This 726 /// method is necessary to lower the llvm.read_register.* and 727 /// llvm.write_register.* intrinsics. Allocatable registers must be reserved 728 /// with the clang -ffixed-xX flag for access to be allowed. 729 Register getRegisterByName(const char *RegName, LLT VT, 730 const MachineFunction &MF) const override; 731 732 // Lower incoming arguments, copy physregs into vregs 733 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 734 bool IsVarArg, 735 const SmallVectorImpl<ISD::InputArg> &Ins, 736 const SDLoc &DL, SelectionDAG &DAG, 737 SmallVectorImpl<SDValue> &InVals) const override; 738 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 739 bool IsVarArg, 740 const SmallVectorImpl<ISD::OutputArg> &Outs, 741 LLVMContext &Context) const override; 742 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 743 const SmallVectorImpl<ISD::OutputArg> &Outs, 744 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 745 SelectionDAG &DAG) const override; 746 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, 747 SmallVectorImpl<SDValue> &InVals) const override; 748 749 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 750 Type *Ty) const override; 751 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 752 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; shouldConsiderGEPOffsetSplit()753 bool shouldConsiderGEPOffsetSplit() const override { return true; } 754 755 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 756 SDValue C) const override; 757 758 bool isMulAddWithConstProfitable(SDValue AddNode, 759 SDValue ConstNode) const override; 760 761 TargetLowering::AtomicExpansionKind 762 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 763 Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, 764 Value *AlignedAddr, Value *Incr, 765 Value *Mask, Value *ShiftAmt, 766 AtomicOrdering Ord) const override; 767 TargetLowering::AtomicExpansionKind 768 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; 769 Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, 770 AtomicCmpXchgInst *CI, 771 Value *AlignedAddr, Value *CmpVal, 772 Value *NewVal, Value *Mask, 773 AtomicOrdering Ord) const override; 774 775 /// Returns true if the target allows unaligned memory accesses of the 776 /// specified type. 777 bool allowsMisalignedMemoryAccesses( 778 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 779 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 780 unsigned *Fast = nullptr) const override; 781 782 EVT getOptimalMemOpType(const MemOp &Op, 783 const AttributeList &FuncAttributes) const override; 784 785 bool splitValueIntoRegisterParts( 786 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 787 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) 788 const override; 789 790 SDValue joinRegisterPartsIntoValue( 791 SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, 792 unsigned NumParts, MVT PartVT, EVT ValueVT, 793 std::optional<CallingConv::ID> CC) const override; 794 795 // Return the value of VLMax for the given vector type (i.e. SEW and LMUL) 796 SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const; 797 798 static RISCVII::VLMUL getLMUL(MVT VT); computeVLMAX(unsigned VectorBits,unsigned EltSize,unsigned MinSize)799 inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, 800 unsigned MinSize) { 801 // Original equation: 802 // VLMAX = (VectorBits / EltSize) * LMUL 803 // where LMUL = MinSize / RISCV::RVVBitsPerBlock 804 // The following equations have been reordered to prevent loss of precision 805 // when calculating fractional LMUL. 806 return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 807 } 808 809 // Return inclusive (low, high) bounds on the value of VLMAX for the 810 // given scalable container type given known bounds on VLEN. 811 static std::pair<unsigned, unsigned> 812 computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget); 813 814 static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); 815 static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); 816 static unsigned getRegClassIDForVecVT(MVT VT); 817 static std::pair<unsigned, unsigned> 818 decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, 819 unsigned InsertExtractIdx, 820 const RISCVRegisterInfo *TRI); 821 MVT getContainerForFixedLengthVector(MVT VT) const; 822 823 bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override; 824 825 bool isLegalElementTypeForRVV(EVT ScalarTy) const; 826 827 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 828 829 unsigned getJumpTableEncoding() const override; 830 831 const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 832 const MachineBasicBlock *MBB, 833 unsigned uid, 834 MCContext &Ctx) const override; 835 836 bool isVScaleKnownToBeAPowerOfTwo() const override; 837 838 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 839 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; 840 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 841 ISD::MemIndexedMode &AM, 842 SelectionDAG &DAG) const override; 843 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 844 SDValue &Offset, ISD::MemIndexedMode &AM, 845 SelectionDAG &DAG) const override; 846 isLegalScaleForGatherScatter(uint64_t Scale,uint64_t ElemSize)847 bool isLegalScaleForGatherScatter(uint64_t Scale, 848 uint64_t ElemSize) const override { 849 // Scaled addressing not supported on indexed load/stores 850 return Scale == 1; 851 } 852 853 /// If the target has a standard location for the stack protector cookie, 854 /// returns the address of that location. Otherwise, returns nullptr. 855 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 856 857 /// Returns whether or not generating a interleaved load/store intrinsic for 858 /// this type will be legal. 859 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, 860 Align Alignment, unsigned AddrSpace, 861 const DataLayout &) const; 862 863 /// Return true if a stride load store of the given result type and 864 /// alignment is legal. 865 bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; 866 getMaxSupportedInterleaveFactor()867 unsigned getMaxSupportedInterleaveFactor() const override { return 8; } 868 869 bool fallBackToDAGISel(const Instruction &Inst) const override; 870 871 bool lowerInterleavedLoad(LoadInst *LI, 872 ArrayRef<ShuffleVectorInst *> Shuffles, 873 ArrayRef<unsigned> Indices, 874 unsigned Factor) const override; 875 876 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 877 unsigned Factor) const override; 878 879 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, 880 LoadInst *LI) const override; 881 882 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 883 StoreInst *SI) const override; 884 supportKCFIBundles()885 bool supportKCFIBundles() const override { return true; } 886 887 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 888 int JTI, SelectionDAG &DAG) const override; 889 890 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 891 MachineBasicBlock::instr_iterator &MBBI, 892 const TargetInstrInfo *TII) const override; 893 894 /// RISCVCCAssignFn - This target-specific function extends the default 895 /// CCValAssign with additional information used to lower RISC-V calling 896 /// conventions. 897 typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI, 898 unsigned ValNo, MVT ValVT, MVT LocVT, 899 CCValAssign::LocInfo LocInfo, 900 ISD::ArgFlagsTy ArgFlags, CCState &State, 901 bool IsFixed, bool IsRet, Type *OrigTy, 902 const RISCVTargetLowering &TLI, 903 RVVArgDispatcher &RVVDispatcher); 904 905 private: 906 void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, 907 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 908 RISCVCCAssignFn Fn) const; 909 void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, 910 const SmallVectorImpl<ISD::OutputArg> &Outs, 911 bool IsRet, CallLoweringInfo *CLI, 912 RISCVCCAssignFn Fn) const; 913 914 template <class NodeTy> 915 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true, 916 bool IsExternWeak = false) const; 917 SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, 918 bool UseGOT) const; 919 SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; 920 SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; 921 922 SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 923 SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 924 SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 925 SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 926 SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 927 SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; 928 SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 929 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; 930 SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 931 SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 932 SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 933 SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; 934 SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; 935 SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; 936 SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 937 int64_t ExtTrueVal) const; 938 SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; 939 SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; 940 SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 941 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 942 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 943 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 944 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 945 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 946 SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const; 947 SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 948 SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG, 949 bool IsVP) const; 950 SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 951 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 952 SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 953 SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 954 SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 955 SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; 956 SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; 957 SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 958 SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; 959 SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; 960 SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; 961 SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, 962 SelectionDAG &DAG) const; 963 SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; 964 SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; 965 SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; 966 SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; 967 SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; 968 SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, 969 SelectionDAG &DAG) const; 970 SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 971 SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; 972 SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const; 973 SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; 974 SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; 975 SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; 976 SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; 977 SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; 978 SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; 979 SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const; 980 SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; 981 SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; 982 SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const; 983 SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, 984 unsigned ExtendOpc) const; 985 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 986 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 987 988 SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; 989 SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; 990 991 SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 992 993 SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; 994 995 SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; 996 SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; 997 998 bool isEligibleForTailCallOptimization( 999 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 1000 const SmallVector<CCValAssign, 16> &ArgLocs) const; 1001 1002 /// Generate error diagnostics if any register used by CC has been marked 1003 /// reserved. 1004 void validateCCReservedRegs( 1005 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 1006 MachineFunction &MF) const; 1007 1008 bool useRVVForFixedLengthVectorVT(MVT VT) const; 1009 1010 MVT getVPExplicitVectorLengthTy() const override; 1011 1012 bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, 1013 bool IsScalable) const override; 1014 1015 /// RVV code generation for fixed length vectors does not lower all 1016 /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to 1017 /// merge. However, merging them creates a BUILD_VECTOR that is just as 1018 /// illegal as the original, thus leading to an infinite legalisation loop. 1019 /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, 1020 /// this override can be removed. 1021 bool mergeStoresAfterLegalization(EVT VT) const override; 1022 1023 /// Disable normalizing 1024 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 1025 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) 1026 /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. shouldNormalizeToSelectSequence(LLVMContext &,EVT)1027 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { 1028 return false; 1029 } 1030 1031 /// For available scheduling models FDIV + two independent FMULs are much 1032 /// faster than two FDIVs. 1033 unsigned combineRepeatedFPDivisors() const override; 1034 1035 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1036 SmallVectorImpl<SDNode *> &Created) const override; 1037 1038 bool shouldFoldSelectWithSingleBitTest(EVT VT, 1039 const APInt &AndMask) const override; 1040 1041 unsigned getMinimumJumpTableEntries() const override; 1042 1043 SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start, 1044 SDValue End, SDValue Flags, SDLoc DL) const; 1045 }; 1046 1047 /// As per the spec, the rules for passing vector arguments are as follows: 1048 /// 1049 /// 1. For the first vector mask argument, use v0 to pass it. 1050 /// 2. For vector data arguments or rest vector mask arguments, starting from 1051 /// the v8 register, if a vector register group between v8-v23 that has not been 1052 /// allocated can be found and the first register number is a multiple of LMUL, 1053 /// then allocate this vector register group to the argument and mark these 1054 /// registers as allocated. Otherwise, pass it by reference and are replaced in 1055 /// the argument list with the address. 1056 /// 3. For tuple vector data arguments, starting from the v8 register, if 1057 /// NFIELDS consecutive vector register groups between v8-v23 that have not been 1058 /// allocated can be found and the first register number is a multiple of LMUL, 1059 /// then allocate these vector register groups to the argument and mark these 1060 /// registers as allocated. Otherwise, pass it by reference and are replaced in 1061 /// the argument list with the address. 1062 class RVVArgDispatcher { 1063 public: 1064 static constexpr unsigned NumArgVRs = 16; 1065 1066 struct RVVArgInfo { 1067 unsigned NF; 1068 MVT VT; 1069 bool FirstVMask = false; 1070 }; 1071 1072 template <typename Arg> RVVArgDispatcher(const MachineFunction * MF,const RISCVTargetLowering * TLI,ArrayRef<Arg> ArgList)1073 RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, 1074 ArrayRef<Arg> ArgList) 1075 : MF(MF), TLI(TLI) { 1076 constructArgInfos(ArgList); 1077 compute(); 1078 } 1079 1080 RVVArgDispatcher() = default; 1081 1082 MCPhysReg getNextPhysReg(); 1083 1084 private: 1085 SmallVector<RVVArgInfo, 4> RVVArgInfos; 1086 SmallVector<MCPhysReg, 4> AllocatedPhysRegs; 1087 1088 const MachineFunction *MF = nullptr; 1089 const RISCVTargetLowering *TLI = nullptr; 1090 1091 unsigned CurIdx = 0; 1092 1093 template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); 1094 void compute(); 1095 void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, 1096 unsigned StartReg = 0); 1097 }; 1098 1099 namespace RISCV { 1100 1101 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 1102 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 1103 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 1104 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 1105 RVVArgDispatcher &RVVDispatcher); 1106 1107 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 1108 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 1109 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 1110 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 1111 RVVArgDispatcher &RVVDispatcher); 1112 1113 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 1114 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 1115 CCState &State); 1116 1117 ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI); 1118 1119 } // end namespace RISCV 1120 1121 namespace RISCVVIntrinsicsTable { 1122 1123 struct RISCVVIntrinsicInfo { 1124 unsigned IntrinsicID; 1125 uint8_t ScalarOperand; 1126 uint8_t VLOperand; hasScalarOperandRISCVVIntrinsicInfo1127 bool hasScalarOperand() const { 1128 // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. 1129 return ScalarOperand != 0xF; 1130 } hasVLOperandRISCVVIntrinsicInfo1131 bool hasVLOperand() const { 1132 // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. 1133 return VLOperand != 0x1F; 1134 } 1135 }; 1136 1137 using namespace RISCV; 1138 1139 #define GET_RISCVVIntrinsicsTable_DECL 1140 #include "RISCVGenSearchableTables.inc" 1141 #undef GET_RISCVVIntrinsicsTable_DECL 1142 1143 } // end namespace RISCVVIntrinsicsTable 1144 1145 } // end namespace llvm 1146 1147 #endif 1148