1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H 16 17 #include "RISCV.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/TargetParser/RISCVTargetParser.h" 22 #include <optional> 23 24 namespace llvm { 25 class RISCVSubtarget; 26 struct RISCVRegisterInfo; 27 namespace RISCVISD { 28 enum NodeType : unsigned { 29 FIRST_NUMBER = ISD::BUILTIN_OP_END, 30 RET_GLUE, 31 SRET_GLUE, 32 MRET_GLUE, 33 CALL, 34 /// Select with condition operator - This selects between a true value and 35 /// a false value (ops #3 and #4) based on the boolean result of comparing 36 /// the lhs and rhs (ops #0 and #1) of a conditional expression with the 37 /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum. 38 /// The lhs and rhs are XLenVT integers. The true and false values can be 39 /// integer or floating point. 40 SELECT_CC, 41 BR_CC, 42 BuildPairF64, 43 SplitF64, 44 TAIL, 45 46 // Add the Lo 12 bits from an address. Selected to ADDI. 47 ADD_LO, 48 // Get the Hi 20 bits from an address. Selected to LUI. 49 HI, 50 51 // Represents an AUIPC+ADDI pair. Selected to PseudoLLA. 52 LLA, 53 54 // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation. 55 ADD_TPREL, 56 57 // Load address. 58 LA_TLS_GD, 59 60 // Multiply high for signedxunsigned. 61 MULHSU, 62 // RV64I shifts, directly matching the semantics of the named RISC-V 63 // instructions. 64 SLLW, 65 SRAW, 66 SRLW, 67 // 32-bit operations from RV64M that can't be simply matched with a pattern 68 // at instruction selection time. These have undefined behavior for division 69 // by 0 or overflow (divw) like their target independent counterparts. 70 DIVW, 71 DIVUW, 72 REMUW, 73 // RV64IB rotates, directly matching the semantics of the named RISC-V 74 // instructions. 75 ROLW, 76 RORW, 77 // RV64IZbb bit counting instructions directly matching the semantics of the 78 // named RISC-V instructions. 79 CLZW, 80 CTZW, 81 82 // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel. 83 ABSW, 84 85 // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as 86 // XLEN is the only legal integer width. 87 // 88 // FMV_H_X matches the semantics of the FMV.H.X. 89 // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result. 90 // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result. 91 // FMV_W_X_RV64 matches the semantics of the FMV.W.X. 92 // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. 93 // 94 // This is a more convenient semantic for producing dagcombines that remove 95 // unnecessary GPR->FPR->GPR moves. 96 FMV_H_X, 97 FMV_X_ANYEXTH, 98 FMV_X_SIGNEXTH, 99 FMV_W_X_RV64, 100 FMV_X_ANYEXTW_RV64, 101 // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and 102 // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of 103 // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode 104 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 105 FCVT_X, 106 FCVT_XU, 107 // FP to 32 bit int conversions for RV64. These are used to keep track of the 108 // result being sign extended to 64 bit. These saturate out of range inputs. 109 // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode 110 // is passed as a TargetConstant operand using the RISCVFPRndMode enum. 111 FCVT_W_RV64, 112 FCVT_WU_RV64, 113 114 FP_ROUND_BF16, 115 FP_EXTEND_BF16, 116 117 // Rounds an FP value to its corresponding integer in the same FP format. 118 // First operand is the value to round, the second operand is the largest 119 // integer that can be represented exactly in the FP format. This will be 120 // expanded into multiple instructions and basic blocks with a custom 121 // inserter. 122 FROUND, 123 124 FPCLASS, 125 126 // Floating point fmax and fmin matching the RISC-V instruction semantics. 127 FMAX, FMIN, 128 129 // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target 130 // (returns (Lo, Hi)). It takes a chain operand. 131 READ_CYCLE_WIDE, 132 // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or 133 // XLenVT. 134 BREV8, 135 ORC_B, 136 ZIP, 137 UNZIP, 138 139 // Scalar cryptography 140 CLMUL, CLMULH, CLMULR, 141 SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1, 142 SM4KS, SM4ED, 143 SM3P0, SM3P1, 144 145 // Vector Extension 146 // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand 147 // for the VL value to be used for the operation. The first operand is 148 // passthru operand. 149 VMV_V_V_VL, 150 // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand 151 // for the VL value to be used for the operation. The first operand is 152 // passthru operand. 153 VMV_V_X_VL, 154 // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand 155 // for the VL value to be used for the operation. The first operand is 156 // passthru operand. 157 VFMV_V_F_VL, 158 // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign 159 // extended from the vector element size. 160 VMV_X_S, 161 // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand. 162 VMV_S_X_VL, 163 // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand. 164 VFMV_S_F_VL, 165 // Splats an 64-bit value that has been split into two i32 parts. This is 166 // expanded late to two scalar stores and a stride 0 vector load. 167 // The first operand is passthru operand. 168 SPLAT_VECTOR_SPLIT_I64_VL, 169 // Read VLENB CSR 170 READ_VLENB, 171 // Truncates a RVV integer vector by one power-of-two. Carries both an extra 172 // mask and VL operand. 173 TRUNCATE_VECTOR_VL, 174 // Matches the semantics of vslideup/vslidedown. The first operand is the 175 // pass-thru operand, the second is the source vector, the third is the 176 // XLenVT index (either constant or non-constant), the fourth is the mask 177 // and the fifth the VL. 178 VSLIDEUP_VL, 179 VSLIDEDOWN_VL, 180 // Matches the semantics of vslide1up/slide1down. The first operand is 181 // passthru operand, the second is source vector, third is the XLenVT scalar 182 // value. The fourth and fifth operands are the mask and VL operands. 183 VSLIDE1UP_VL, 184 VSLIDE1DOWN_VL, 185 // Matches the semantics of vfslide1up/vfslide1down. The first operand is 186 // passthru operand, the second is source vector, third is a scalar value 187 // whose type matches the element type of the vectors. The fourth and fifth 188 // operands are the mask and VL operands. 189 VFSLIDE1UP_VL, 190 VFSLIDE1DOWN_VL, 191 // Matches the semantics of the vid.v instruction, with a mask and VL 192 // operand. 193 VID_VL, 194 // Matches the semantics of the vfcnvt.rod function (Convert double-width 195 // float to single-width float, rounding towards odd). Takes a double-width 196 // float vector and produces a single-width float vector. Also has a mask and 197 // VL operand. 198 VFNCVT_ROD_VL, 199 // These nodes match the semantics of the corresponding RVV vector reduction 200 // instructions. They produce a vector result which is the reduction 201 // performed over the second vector operand plus the first element of the 202 // third vector operand. The first operand is the pass-thru operand. The 203 // second operand is an unconstrained vector type, and the result, first, and 204 // third operand's types are expected to be the corresponding full-width 205 // LMUL=1 type for the second operand: 206 // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8 207 // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32 208 // The different in types does introduce extra vsetvli instructions but 209 // similarly it reduces the number of registers consumed per reduction. 210 // Also has a mask and VL operand. 211 VECREDUCE_ADD_VL, 212 VECREDUCE_UMAX_VL, 213 VECREDUCE_SMAX_VL, 214 VECREDUCE_UMIN_VL, 215 VECREDUCE_SMIN_VL, 216 VECREDUCE_AND_VL, 217 VECREDUCE_OR_VL, 218 VECREDUCE_XOR_VL, 219 VECREDUCE_FADD_VL, 220 VECREDUCE_SEQ_FADD_VL, 221 VECREDUCE_FMIN_VL, 222 VECREDUCE_FMAX_VL, 223 224 // Vector binary ops with a merge as a third operand, a mask as a fourth 225 // operand, and VL as a fifth operand. 226 ADD_VL, 227 AND_VL, 228 MUL_VL, 229 OR_VL, 230 SDIV_VL, 231 SHL_VL, 232 SREM_VL, 233 SRA_VL, 234 SRL_VL, 235 SUB_VL, 236 UDIV_VL, 237 UREM_VL, 238 XOR_VL, 239 SMIN_VL, 240 SMAX_VL, 241 UMIN_VL, 242 UMAX_VL, 243 244 BITREVERSE_VL, 245 BSWAP_VL, 246 CTLZ_VL, 247 CTTZ_VL, 248 CTPOP_VL, 249 250 SADDSAT_VL, 251 UADDSAT_VL, 252 SSUBSAT_VL, 253 USUBSAT_VL, 254 255 MULHS_VL, 256 MULHU_VL, 257 FADD_VL, 258 FSUB_VL, 259 FMUL_VL, 260 FDIV_VL, 261 FMINNUM_VL, 262 FMAXNUM_VL, 263 264 // Vector unary ops with a mask as a second operand and VL as a third operand. 265 FNEG_VL, 266 FABS_VL, 267 FSQRT_VL, 268 FCLASS_VL, 269 FCOPYSIGN_VL, // Has a merge operand 270 VFCVT_RTZ_X_F_VL, 271 VFCVT_RTZ_XU_F_VL, 272 VFCVT_X_F_VL, 273 VFCVT_XU_F_VL, 274 VFROUND_NOEXCEPT_VL, 275 VFCVT_RM_X_F_VL, // Has a rounding mode operand. 276 VFCVT_RM_XU_F_VL, // Has a rounding mode operand. 277 SINT_TO_FP_VL, 278 UINT_TO_FP_VL, 279 VFCVT_RM_F_X_VL, // Has a rounding mode operand. 280 VFCVT_RM_F_XU_VL, // Has a rounding mode operand. 281 FP_ROUND_VL, 282 FP_EXTEND_VL, 283 284 // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand. 285 VFMADD_VL, 286 VFNMADD_VL, 287 VFMSUB_VL, 288 VFNMSUB_VL, 289 290 // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth 291 // operand. 292 VFWMADD_VL, 293 VFWNMADD_VL, 294 VFWMSUB_VL, 295 VFWNMSUB_VL, 296 297 // Widening instructions with a merge value a third operand, a mask as a 298 // fourth operand, and VL as a fifth operand. 299 VWMUL_VL, 300 VWMULU_VL, 301 VWMULSU_VL, 302 VWADD_VL, 303 VWADDU_VL, 304 VWSUB_VL, 305 VWSUBU_VL, 306 VWADD_W_VL, 307 VWADDU_W_VL, 308 VWSUB_W_VL, 309 VWSUBU_W_VL, 310 311 VFWMUL_VL, 312 VFWADD_VL, 313 VFWSUB_VL, 314 VFWADD_W_VL, 315 VFWSUB_W_VL, 316 317 // Widening ternary operations with a mask as the fourth operand and VL as the 318 // fifth operand. 319 VWMACC_VL, 320 VWMACCU_VL, 321 VWMACCSU_VL, 322 323 // Narrowing logical shift right. 324 // Operands are (source, shift, passthru, mask, vl) 325 VNSRL_VL, 326 327 // Vector compare producing a mask. Fourth operand is input mask. Fifth 328 // operand is VL. 329 SETCC_VL, 330 331 // Vector select with an additional VL operand. This operation is unmasked. 332 VSELECT_VL, 333 // Vector select with operand #2 (the value when the condition is false) tied 334 // to the destination and an additional VL operand. This operation is 335 // unmasked. 336 VP_MERGE_VL, 337 338 // Mask binary operators. 339 VMAND_VL, 340 VMOR_VL, 341 VMXOR_VL, 342 343 // Set mask vector to all zeros or ones. 344 VMCLR_VL, 345 VMSET_VL, 346 347 // Matches the semantics of vrgather.vx and vrgather.vv with extra operands 348 // for passthru and VL. Operands are (src, index, mask, passthru, vl). 349 VRGATHER_VX_VL, 350 VRGATHER_VV_VL, 351 VRGATHEREI16_VV_VL, 352 353 // Vector sign/zero extend with additional mask & VL operands. 354 VSEXT_VL, 355 VZEXT_VL, 356 357 // vcpop.m with additional mask and VL operands. 358 VCPOP_VL, 359 360 // vfirst.m with additional mask and VL operands. 361 VFIRST_VL, 362 363 // Reads value of CSR. 364 // The first operand is a chain pointer. The second specifies address of the 365 // required CSR. Two results are produced, the read value and the new chain 366 // pointer. 367 READ_CSR, 368 // Write value to CSR. 369 // The first operand is a chain pointer, the second specifies address of the 370 // required CSR and the third is the value to write. The result is the new 371 // chain pointer. 372 WRITE_CSR, 373 // Read and write value of CSR. 374 // The first operand is a chain pointer, the second specifies address of the 375 // required CSR and the third is the value to write. Two results are produced, 376 // the value read before the modification and the new chain pointer. 377 SWAP_CSR, 378 379 // Branchless select operations, matching the semantics of the instructions 380 // defined in Zicond or XVentanaCondOps. 381 CZERO_EQZ, // vt.maskc for XVentanaCondOps. 382 CZERO_NEZ, // vt.maskcn for XVentanaCondOps. 383 384 // FP to 32 bit int conversions for RV64. These are used to keep track of the 385 // result being sign extended to 64 bit. These saturate out of range inputs. 386 STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, 387 STRICT_FCVT_WU_RV64, 388 STRICT_FADD_VL, 389 STRICT_FSUB_VL, 390 STRICT_FMUL_VL, 391 STRICT_FDIV_VL, 392 STRICT_FSQRT_VL, 393 STRICT_VFMADD_VL, 394 STRICT_VFNMADD_VL, 395 STRICT_VFMSUB_VL, 396 STRICT_VFNMSUB_VL, 397 STRICT_FP_ROUND_VL, 398 STRICT_FP_EXTEND_VL, 399 STRICT_VFNCVT_ROD_VL, 400 STRICT_SINT_TO_FP_VL, 401 STRICT_UINT_TO_FP_VL, 402 STRICT_VFCVT_RM_X_F_VL, 403 STRICT_VFCVT_RTZ_X_F_VL, 404 STRICT_VFCVT_RTZ_XU_F_VL, 405 STRICT_FSETCC_VL, 406 STRICT_FSETCCS_VL, 407 STRICT_VFROUND_NOEXCEPT_VL, 408 409 // WARNING: Do not add anything in the end unless you want the node to 410 // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all 411 // opcodes will be thought as target memory ops! 412 413 // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA. 414 LGA = ISD::FIRST_TARGET_MEMORY_OPCODE, 415 // Load initial exec thread-local address. 416 LA_TLS_IE, 417 418 TH_LWD, 419 TH_LWUD, 420 TH_LDD, 421 TH_SWD, 422 TH_SDD, 423 }; 424 } // namespace RISCVISD 425 426 class RISCVTargetLowering : public TargetLowering { 427 const RISCVSubtarget &Subtarget; 428 429 public: 430 explicit RISCVTargetLowering(const TargetMachine &TM, 431 const RISCVSubtarget &STI); 432 433 const RISCVSubtarget &getSubtarget() const { return Subtarget; } 434 435 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 436 MachineFunction &MF, 437 unsigned Intrinsic) const override; 438 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 439 unsigned AS, 440 Instruction *I = nullptr) const override; 441 bool isLegalICmpImmediate(int64_t Imm) const override; 442 bool isLegalAddImmediate(int64_t Imm) const override; 443 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; 444 bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; 445 bool isZExtFree(SDValue Val, EVT VT2) const override; 446 bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; 447 bool signExtendConstant(const ConstantInt *CI) const override; 448 bool isCheapToSpeculateCttz(Type *Ty) const override; 449 bool isCheapToSpeculateCtlz(Type *Ty) const override; 450 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 451 bool hasAndNotCompare(SDValue Y) const override; 452 bool hasBitTest(SDValue X, SDValue Y) const override; 453 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 454 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 455 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 456 SelectionDAG &DAG) const override; 457 /// Return true if the (vector) instruction I will be lowered to an instruction 458 /// with a scalar splat operand for the given Operand number. 459 bool canSplatOperand(Instruction *I, int Operand) const; 460 /// Return true if a vector instruction will lower to a target instruction 461 /// able to splat the given operand. 462 bool canSplatOperand(unsigned Opcode, int Operand) const; 463 bool shouldSinkOperands(Instruction *I, 464 SmallVectorImpl<Use *> &Ops) const override; 465 bool shouldScalarizeBinop(SDValue VecOp) const override; 466 bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; 467 int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; 468 bool isFPImmLegal(const APFloat &Imm, EVT VT, 469 bool ForCodeSize) const override; 470 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 471 unsigned Index) const override; 472 473 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 474 475 bool preferScalarizeSplat(SDNode *N) const override; 476 477 bool softPromoteHalfType() const override { return true; } 478 479 /// Return the register type for a given MVT, ensuring vectors are treated 480 /// as a series of gpr sized integers. 481 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 482 EVT VT) const override; 483 484 /// Return the number of registers for a given MVT, ensuring vectors are 485 /// treated as a series of gpr sized integers. 486 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 487 CallingConv::ID CC, 488 EVT VT) const override; 489 490 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 491 EVT VT) const override; 492 493 /// Return true if the given shuffle mask can be codegen'd directly, or if it 494 /// should be stack expanded. 495 bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; 496 497 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 498 // If the pair to store is a mixture of float and int values, we will 499 // save two bitwise instructions and one float-to-int instruction and 500 // increase one store instruction. There is potentially a more 501 // significant benefit because it avoids the float->int domain switch 502 // for input value. So It is more likely a win. 503 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 504 (LTy.isInteger() && HTy.isFloatingPoint())) 505 return true; 506 // If the pair only contains int values, we will save two bitwise 507 // instructions and increase one store instruction (costing one more 508 // store buffer). Since the benefit is more blurred we leave such a pair 509 // out until we get testcase to prove it is a win. 510 return false; 511 } 512 513 bool 514 shouldExpandBuildVectorWithShuffles(EVT VT, 515 unsigned DefinedValues) const override; 516 517 // Provide custom lowering hooks for some operations. 518 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 519 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 520 SelectionDAG &DAG) const override; 521 522 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 523 524 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 525 const APInt &DemandedElts, 526 TargetLoweringOpt &TLO) const override; 527 528 void computeKnownBitsForTargetNode(const SDValue Op, 529 KnownBits &Known, 530 const APInt &DemandedElts, 531 const SelectionDAG &DAG, 532 unsigned Depth) const override; 533 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 534 const APInt &DemandedElts, 535 const SelectionDAG &DAG, 536 unsigned Depth) const override; 537 538 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 539 540 // This method returns the name of a target specific DAG node. 541 const char *getTargetNodeName(unsigned Opcode) const override; 542 543 MachineMemOperand::Flags 544 getTargetMMOFlags(const Instruction &I) const override; 545 546 MachineMemOperand::Flags 547 getTargetMMOFlags(const MemSDNode &Node) const override; 548 549 bool 550 areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, 551 const MemSDNode &NodeY) const override; 552 553 ConstraintType getConstraintType(StringRef Constraint) const override; 554 555 unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override; 556 557 std::pair<unsigned, const TargetRegisterClass *> 558 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 559 StringRef Constraint, MVT VT) const override; 560 561 void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 562 std::vector<SDValue> &Ops, 563 SelectionDAG &DAG) const override; 564 565 MachineBasicBlock * 566 EmitInstrWithCustomInserter(MachineInstr &MI, 567 MachineBasicBlock *BB) const override; 568 569 void AdjustInstrPostInstrSelection(MachineInstr &MI, 570 SDNode *Node) const override; 571 572 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 573 EVT VT) const override; 574 575 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 576 bool MathUsed) const override { 577 if (VT == MVT::i8 || VT == MVT::i16) 578 return false; 579 580 return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); 581 } 582 583 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 584 unsigned AddrSpace) const override { 585 // If we can replace 4 or more scalar stores, there will be a reduction 586 // in instructions even after we add a vector constant load. 587 return NumElem >= 4; 588 } 589 590 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 591 return VT.isScalarInteger(); 592 } 593 bool convertSelectOfConstantsToMath(EVT VT) const override { return true; } 594 595 bool preferZeroCompareBranch() const override { return true; } 596 597 bool shouldInsertFencesForAtomic(const Instruction *I) const override { 598 return isa<LoadInst>(I) || isa<StoreInst>(I); 599 } 600 Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, 601 AtomicOrdering Ord) const override; 602 Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, 603 AtomicOrdering Ord) const override; 604 605 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 606 EVT VT) const override; 607 608 ISD::NodeType getExtendForAtomicOps() const override { 609 return ISD::SIGN_EXTEND; 610 } 611 612 ISD::NodeType getExtendForAtomicCmpSwapArg() const override { 613 return ISD::SIGN_EXTEND; 614 } 615 616 bool shouldTransformSignedTruncationCheck(EVT XVT, 617 unsigned KeptBits) const override; 618 619 TargetLowering::ShiftLegalizationStrategy 620 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 621 unsigned ExpansionFactor) const override { 622 if (DAG.getMachineFunction().getFunction().hasMinSize()) 623 return ShiftLegalizationStrategy::LowerToLibcall; 624 return TargetLowering::preferredShiftLegalizationStrategy(DAG, N, 625 ExpansionFactor); 626 } 627 628 bool isDesirableToCommuteWithShift(const SDNode *N, 629 CombineLevel Level) const override; 630 631 /// If a physical register, this returns the register that receives the 632 /// exception address on entry to an EH pad. 633 Register 634 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 635 636 /// If a physical register, this returns the register that receives the 637 /// exception typeid on entry to a landing pad. 638 Register 639 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 640 641 bool shouldExtendTypeInLibCall(EVT Type) const override; 642 bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; 643 644 /// Returns the register with the specified architectural or ABI name. This 645 /// method is necessary to lower the llvm.read_register.* and 646 /// llvm.write_register.* intrinsics. Allocatable registers must be reserved 647 /// with the clang -ffixed-xX flag for access to be allowed. 648 Register getRegisterByName(const char *RegName, LLT VT, 649 const MachineFunction &MF) const override; 650 651 // Lower incoming arguments, copy physregs into vregs 652 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 653 bool IsVarArg, 654 const SmallVectorImpl<ISD::InputArg> &Ins, 655 const SDLoc &DL, SelectionDAG &DAG, 656 SmallVectorImpl<SDValue> &InVals) const override; 657 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 658 bool IsVarArg, 659 const SmallVectorImpl<ISD::OutputArg> &Outs, 660 LLVMContext &Context) const override; 661 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 662 const SmallVectorImpl<ISD::OutputArg> &Outs, 663 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 664 SelectionDAG &DAG) const override; 665 SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, 666 SmallVectorImpl<SDValue> &InVals) const override; 667 668 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 669 Type *Ty) const override; 670 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 671 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 672 bool shouldConsiderGEPOffsetSplit() const override { return true; } 673 674 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 675 SDValue C) const override; 676 677 bool isMulAddWithConstProfitable(SDValue AddNode, 678 SDValue ConstNode) const override; 679 680 TargetLowering::AtomicExpansionKind 681 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 682 Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, 683 Value *AlignedAddr, Value *Incr, 684 Value *Mask, Value *ShiftAmt, 685 AtomicOrdering Ord) const override; 686 TargetLowering::AtomicExpansionKind 687 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override; 688 Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, 689 AtomicCmpXchgInst *CI, 690 Value *AlignedAddr, Value *CmpVal, 691 Value *NewVal, Value *Mask, 692 AtomicOrdering Ord) const override; 693 694 /// Returns true if the target allows unaligned memory accesses of the 695 /// specified type. 696 bool allowsMisalignedMemoryAccesses( 697 EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), 698 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 699 unsigned *Fast = nullptr) const override; 700 701 bool splitValueIntoRegisterParts( 702 SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 703 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) 704 const override; 705 706 SDValue joinRegisterPartsIntoValue( 707 SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts, 708 unsigned NumParts, MVT PartVT, EVT ValueVT, 709 std::optional<CallingConv::ID> CC) const override; 710 711 // Return the value of VLMax for the given vector type (i.e. SEW and LMUL) 712 SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const; 713 714 static RISCVII::VLMUL getLMUL(MVT VT); 715 inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, 716 unsigned MinSize) { 717 // Original equation: 718 // VLMAX = (VectorBits / EltSize) * LMUL 719 // where LMUL = MinSize / RISCV::RVVBitsPerBlock 720 // The following equations have been reordered to prevent loss of precision 721 // when calculating fractional LMUL. 722 return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; 723 }; 724 static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); 725 static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); 726 static unsigned getRegClassIDForVecVT(MVT VT); 727 static std::pair<unsigned, unsigned> 728 decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, 729 unsigned InsertExtractIdx, 730 const RISCVRegisterInfo *TRI); 731 MVT getContainerForFixedLengthVector(MVT VT) const; 732 733 bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; 734 735 bool isLegalElementTypeForRVV(EVT ScalarTy) const; 736 737 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; 738 739 unsigned getJumpTableEncoding() const override; 740 741 const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 742 const MachineBasicBlock *MBB, 743 unsigned uid, 744 MCContext &Ctx) const override; 745 746 bool isVScaleKnownToBeAPowerOfTwo() const override; 747 748 bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, 749 ISD::MemIndexedMode &AM, bool &IsInc, 750 SelectionDAG &DAG) const; 751 bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, 752 ISD::MemIndexedMode &AM, 753 SelectionDAG &DAG) const override; 754 bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, 755 SDValue &Offset, ISD::MemIndexedMode &AM, 756 SelectionDAG &DAG) const override; 757 758 bool isLegalScaleForGatherScatter(uint64_t Scale, 759 uint64_t ElemSize) const override { 760 // Scaled addressing not supported on indexed load/stores 761 return Scale == 1; 762 } 763 764 /// If the target has a standard location for the stack protector cookie, 765 /// returns the address of that location. Otherwise, returns nullptr. 766 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 767 768 /// Returns whether or not generating a interleaved load/store intrinsic for 769 /// this type will be legal. 770 bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, 771 Align Alignment, unsigned AddrSpace, 772 const DataLayout &) const; 773 774 /// Return true if a stride load store of the given result type and 775 /// alignment is legal. 776 bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; 777 778 unsigned getMaxSupportedInterleaveFactor() const override { return 8; } 779 780 bool lowerInterleavedLoad(LoadInst *LI, 781 ArrayRef<ShuffleVectorInst *> Shuffles, 782 ArrayRef<unsigned> Indices, 783 unsigned Factor) const override; 784 785 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 786 unsigned Factor) const override; 787 788 bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, 789 LoadInst *LI) const override; 790 791 bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, 792 StoreInst *SI) const override; 793 794 bool supportKCFIBundles() const override { return true; } 795 796 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 797 MachineBasicBlock::instr_iterator &MBBI, 798 const TargetInstrInfo *TII) const override; 799 800 /// RISCVCCAssignFn - This target-specific function extends the default 801 /// CCValAssign with additional information used to lower RISC-V calling 802 /// conventions. 803 typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI, 804 unsigned ValNo, MVT ValVT, MVT LocVT, 805 CCValAssign::LocInfo LocInfo, 806 ISD::ArgFlagsTy ArgFlags, CCState &State, 807 bool IsFixed, bool IsRet, Type *OrigTy, 808 const RISCVTargetLowering &TLI, 809 std::optional<unsigned> FirstMaskArgument); 810 811 private: 812 void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, 813 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet, 814 RISCVCCAssignFn Fn) const; 815 void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo, 816 const SmallVectorImpl<ISD::OutputArg> &Outs, 817 bool IsRet, CallLoweringInfo *CLI, 818 RISCVCCAssignFn Fn) const; 819 820 template <class NodeTy> 821 SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true, 822 bool IsExternWeak = false) const; 823 SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, 824 bool UseGOT) const; 825 SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; 826 827 SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 828 SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 829 SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 830 SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 831 SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 832 SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; 833 SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 834 SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; 835 SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 836 SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 837 SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 838 SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; 839 SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; 840 SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; 841 SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, 842 int64_t ExtTrueVal) const; 843 SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; 844 SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; 845 SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 846 SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 847 SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 848 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 849 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 850 SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; 851 SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const; 852 SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 853 SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG, 854 bool IsVP) const; 855 SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; 856 SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 857 SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 858 SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 859 SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; 860 SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; 861 SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; 862 SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; 863 SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; 864 SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; 865 SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; 866 SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, 867 SelectionDAG &DAG) const; 868 SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; 869 SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const; 870 SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; 871 SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; 872 SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; 873 SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op, 874 SelectionDAG &DAG) const; 875 SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const; 876 SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; 877 SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc, 878 bool HasMergeOp = false) const; 879 SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, 880 unsigned VecOpc) const; 881 SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; 882 SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; 883 SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG, 884 unsigned RISCVISDOpc) const; 885 SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const; 886 SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const; 887 SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, 888 unsigned ExtendOpc) const; 889 SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 890 SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 891 892 SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const; 893 SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; 894 895 SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const; 896 897 SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; 898 899 SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; 900 SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; 901 902 bool isEligibleForTailCallOptimization( 903 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, 904 const SmallVector<CCValAssign, 16> &ArgLocs) const; 905 906 /// Generate error diagnostics if any register used by CC has been marked 907 /// reserved. 908 void validateCCReservedRegs( 909 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs, 910 MachineFunction &MF) const; 911 912 bool useRVVForFixedLengthVectorVT(MVT VT) const; 913 914 MVT getVPExplicitVectorLengthTy() const override; 915 916 bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, 917 bool IsScalable) const override; 918 919 /// RVV code generation for fixed length vectors does not lower all 920 /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to 921 /// merge. However, merging them creates a BUILD_VECTOR that is just as 922 /// illegal as the original, thus leading to an infinite legalisation loop. 923 /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types, 924 /// this override can be removed. 925 bool mergeStoresAfterLegalization(EVT VT) const override; 926 927 /// Disable normalizing 928 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and 929 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) 930 /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR. 931 bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override { 932 return false; 933 }; 934 935 /// For available scheduling models FDIV + two independent FMULs are much 936 /// faster than two FDIVs. 937 unsigned combineRepeatedFPDivisors() const override; 938 }; 939 940 namespace RISCV { 941 942 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 943 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 944 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 945 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 946 std::optional<unsigned> FirstMaskArgument); 947 948 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, 949 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, 950 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, 951 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, 952 std::optional<unsigned> FirstMaskArgument); 953 954 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, 955 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, 956 CCState &State); 957 } // end namespace RISCV 958 959 namespace RISCVVIntrinsicsTable { 960 961 struct RISCVVIntrinsicInfo { 962 unsigned IntrinsicID; 963 uint8_t ScalarOperand; 964 uint8_t VLOperand; 965 bool hasScalarOperand() const { 966 // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td. 967 return ScalarOperand != 0xF; 968 } 969 bool hasVLOperand() const { 970 // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td. 971 return VLOperand != 0x1F; 972 } 973 }; 974 975 using namespace RISCV; 976 977 #define GET_RISCVVIntrinsicsTable_DECL 978 #include "RISCVGenSearchableTables.inc" 979 #undef GET_RISCVVIntrinsicsTable_DECL 980 981 } // end namespace RISCVVIntrinsicsTable 982 983 } // end namespace llvm 984 985 #endif 986