xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
16 
17 #include "RISCV.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include <optional>
22 
23 namespace llvm {
24 class InstructionCost;
25 class RISCVSubtarget;
26 struct RISCVRegisterInfo;
27 class RVVArgDispatcher;
28 
29 namespace RISCVISD {
30 // clang-format off
31 enum NodeType : unsigned {
32   FIRST_NUMBER = ISD::BUILTIN_OP_END,
33   RET_GLUE,
34   SRET_GLUE,
35   MRET_GLUE,
36   CALL,
37   /// Select with condition operator - This selects between a true value and
38   /// a false value (ops #3 and #4) based on the boolean result of comparing
39   /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
40   /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
41   /// The lhs and rhs are XLenVT integers. The true and false values can be
42   /// integer or floating point.
43   SELECT_CC,
44   BR_CC,
45   BuildPairF64,
46   SplitF64,
47   TAIL,
48 
49   // Add the Lo 12 bits from an address. Selected to ADDI.
50   ADD_LO,
51   // Get the Hi 20 bits from an address. Selected to LUI.
52   HI,
53 
54   // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
55   LLA,
56 
57   // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
58   ADD_TPREL,
59 
60   // Multiply high for signedxunsigned.
61   MULHSU,
62 
63   // Represents (ADD (SHL a, b), c) with the arguments appearing in the order
64   // a, b, c.  'b' must be a constant.  Maps to sh1add/sh2add/sh3add with zba
65   // or addsl with XTheadBa.
66   SHL_ADD,
67 
68   // RV64I shifts, directly matching the semantics of the named RISC-V
69   // instructions.
70   SLLW,
71   SRAW,
72   SRLW,
73   // 32-bit operations from RV64M that can't be simply matched with a pattern
74   // at instruction selection time. These have undefined behavior for division
75   // by 0 or overflow (divw) like their target independent counterparts.
76   DIVW,
77   DIVUW,
78   REMUW,
79   // RV64IB rotates, directly matching the semantics of the named RISC-V
80   // instructions.
81   ROLW,
82   RORW,
83   // RV64IZbb bit counting instructions directly matching the semantics of the
84   // named RISC-V instructions.
85   CLZW,
86   CTZW,
87 
88   // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
89   ABSW,
90 
91   // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
92   // XLEN is the only legal integer width.
93   //
94   // FMV_H_X matches the semantics of the FMV.H.X.
95   // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
96   // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
97   // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
98   // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
99   //
100   // This is a more convenient semantic for producing dagcombines that remove
101   // unnecessary GPR->FPR->GPR moves.
102   FMV_H_X,
103   FMV_X_ANYEXTH,
104   FMV_X_SIGNEXTH,
105   FMV_W_X_RV64,
106   FMV_X_ANYEXTW_RV64,
107   // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
108   // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
109   // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
110   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
111   FCVT_X,
112   FCVT_XU,
113   // FP to 32 bit int conversions for RV64. These are used to keep track of the
114   // result being sign extended to 64 bit. These saturate out of range inputs.
115   // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
116   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
117   FCVT_W_RV64,
118   FCVT_WU_RV64,
119 
120   FP_ROUND_BF16,
121   FP_EXTEND_BF16,
122 
123   // Rounds an FP value to its corresponding integer in the same FP format.
124   // First operand is the value to round, the second operand is the largest
125   // integer that can be represented exactly in the FP format. This will be
126   // expanded into multiple instructions and basic blocks with a custom
127   // inserter.
128   FROUND,
129 
130   FCLASS,
131 
132   // Floating point fmax and fmin matching the RISC-V instruction semantics.
133   FMAX, FMIN,
134 
135   // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)).
136   // It takes a chain operand and another two target constant operands (the
137   // CSR numbers of the low and high parts of the counter).
138   READ_COUNTER_WIDE,
139 
140   // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
141   // XLenVT.
142   BREV8,
143   ORC_B,
144   ZIP,
145   UNZIP,
146 
147   // Scalar cryptography
148   CLMUL, CLMULH, CLMULR,
149   SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
150   SM4KS, SM4ED,
151   SM3P0, SM3P1,
152 
153   // May-Be-Operations
154   MOPR, MOPRR,
155 
156   // Vector Extension
157   FIRST_VL_VECTOR_OP,
158   // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
159   // for the VL value to be used for the operation. The first operand is
160   // passthru operand.
161   VMV_V_V_VL = FIRST_VL_VECTOR_OP,
162   // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
163   // for the VL value to be used for the operation. The first operand is
164   // passthru operand.
165   VMV_V_X_VL,
166   // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
167   // for the VL value to be used for the operation. The first operand is
168   // passthru operand.
169   VFMV_V_F_VL,
170   // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
171   // extended from the vector element size.
172   VMV_X_S,
173   // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
174   VMV_S_X_VL,
175   // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
176   VFMV_S_F_VL,
177   // Splats an 64-bit value that has been split into two i32 parts. This is
178   // expanded late to two scalar stores and a stride 0 vector load.
179   // The first operand is passthru operand.
180   SPLAT_VECTOR_SPLIT_I64_VL,
181   // Truncates a RVV integer vector by one power-of-two. Carries both an extra
182   // mask and VL operand.
183   TRUNCATE_VECTOR_VL,
184   // Matches the semantics of vslideup/vslidedown. The first operand is the
185   // pass-thru operand, the second is the source vector, the third is the XLenVT
186   // index (either constant or non-constant), the fourth is the mask, the fifth
187   // is the VL and the sixth is the policy.
188   VSLIDEUP_VL,
189   VSLIDEDOWN_VL,
190   // Matches the semantics of vslide1up/slide1down. The first operand is
191   // passthru operand, the second is source vector, third is the XLenVT scalar
192   // value. The fourth and fifth operands are the mask and VL operands.
193   VSLIDE1UP_VL,
194   VSLIDE1DOWN_VL,
195   // Matches the semantics of vfslide1up/vfslide1down. The first operand is
196   // passthru operand, the second is source vector, third is a scalar value
197   // whose type matches the element type of the vectors.  The fourth and fifth
198   // operands are the mask and VL operands.
199   VFSLIDE1UP_VL,
200   VFSLIDE1DOWN_VL,
201   // Matches the semantics of the vid.v instruction, with a mask and VL
202   // operand.
203   VID_VL,
204   // Matches the semantics of the vfcnvt.rod function (Convert double-width
205   // float to single-width float, rounding towards odd). Takes a double-width
206   // float vector and produces a single-width float vector. Also has a mask and
207   // VL operand.
208   VFNCVT_ROD_VL,
209   // These nodes match the semantics of the corresponding RVV vector reduction
210   // instructions. They produce a vector result which is the reduction
211   // performed over the second vector operand plus the first element of the
212   // third vector operand. The first operand is the pass-thru operand. The
213   // second operand is an unconstrained vector type, and the result, first, and
214   // third operand's types are expected to be the corresponding full-width
215   // LMUL=1 type for the second operand:
216   //   nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
217   //   nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
218   // The different in types does introduce extra vsetvli instructions but
219   // similarly it reduces the number of registers consumed per reduction.
220   // Also has a mask and VL operand.
221   VECREDUCE_ADD_VL,
222   VECREDUCE_UMAX_VL,
223   VECREDUCE_SMAX_VL,
224   VECREDUCE_UMIN_VL,
225   VECREDUCE_SMIN_VL,
226   VECREDUCE_AND_VL,
227   VECREDUCE_OR_VL,
228   VECREDUCE_XOR_VL,
229   VECREDUCE_FADD_VL,
230   VECREDUCE_SEQ_FADD_VL,
231   VECREDUCE_FMIN_VL,
232   VECREDUCE_FMAX_VL,
233 
234   // Vector binary ops with a merge as a third operand, a mask as a fourth
235   // operand, and VL as a fifth operand.
236   ADD_VL,
237   AND_VL,
238   MUL_VL,
239   OR_VL,
240   SDIV_VL,
241   SHL_VL,
242   SREM_VL,
243   SRA_VL,
244   SRL_VL,
245   ROTL_VL,
246   ROTR_VL,
247   SUB_VL,
248   UDIV_VL,
249   UREM_VL,
250   XOR_VL,
251   SMIN_VL,
252   SMAX_VL,
253   UMIN_VL,
254   UMAX_VL,
255 
256   BITREVERSE_VL,
257   BSWAP_VL,
258   CTLZ_VL,
259   CTTZ_VL,
260   CTPOP_VL,
261 
262   SADDSAT_VL,
263   UADDSAT_VL,
264   SSUBSAT_VL,
265   USUBSAT_VL,
266 
267   // Averaging adds of signed integers.
268   AVGFLOORS_VL,
269   // Averaging adds of unsigned integers.
270   AVGFLOORU_VL,
271   // Rounding averaging adds of signed integers.
272   AVGCEILS_VL,
273   // Rounding averaging adds of unsigned integers.
274   AVGCEILU_VL,
275 
276   // Operands are (source, shift, merge, mask, roundmode, vl)
277   VNCLIPU_VL,
278   VNCLIP_VL,
279 
280   MULHS_VL,
281   MULHU_VL,
282   FADD_VL,
283   FSUB_VL,
284   FMUL_VL,
285   FDIV_VL,
286   VFMIN_VL,
287   VFMAX_VL,
288 
289   // Vector unary ops with a mask as a second operand and VL as a third operand.
290   FNEG_VL,
291   FABS_VL,
292   FSQRT_VL,
293   FCLASS_VL,
294   FCOPYSIGN_VL, // Has a merge operand
295   VFCVT_RTZ_X_F_VL,
296   VFCVT_RTZ_XU_F_VL,
297   VFCVT_X_F_VL,
298   VFCVT_XU_F_VL,
299   VFROUND_NOEXCEPT_VL,
300   VFCVT_RM_X_F_VL,  // Has a rounding mode operand.
301   VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
302   SINT_TO_FP_VL,
303   UINT_TO_FP_VL,
304   VFCVT_RM_F_X_VL,  // Has a rounding mode operand.
305   VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
306   FP_ROUND_VL,
307   FP_EXTEND_VL,
308 
309   // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
310   VFMADD_VL,
311   VFNMADD_VL,
312   VFMSUB_VL,
313   VFNMSUB_VL,
314 
315   // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
316   // operand.
317   VFWMADD_VL,
318   VFWNMADD_VL,
319   VFWMSUB_VL,
320   VFWNMSUB_VL,
321 
322   // Widening instructions with a merge value a third operand, a mask as a
323   // fourth operand, and VL as a fifth operand.
324   VWMUL_VL,
325   VWMULU_VL,
326   VWMULSU_VL,
327   VWADD_VL,
328   VWADDU_VL,
329   VWSUB_VL,
330   VWSUBU_VL,
331   VWADD_W_VL,
332   VWADDU_W_VL,
333   VWSUB_W_VL,
334   VWSUBU_W_VL,
335   VWSLL_VL,
336 
337   VFWMUL_VL,
338   VFWADD_VL,
339   VFWSUB_VL,
340   VFWADD_W_VL,
341   VFWSUB_W_VL,
342 
343   // Widening ternary operations with a mask as the fourth operand and VL as the
344   // fifth operand.
345   VWMACC_VL,
346   VWMACCU_VL,
347   VWMACCSU_VL,
348 
349   // Narrowing logical shift right.
350   // Operands are (source, shift, passthru, mask, vl)
351   VNSRL_VL,
352 
353   // Vector compare producing a mask. Fourth operand is input mask. Fifth
354   // operand is VL.
355   SETCC_VL,
356 
357   // General vmerge node with mask, true, false, passthru, and vl operands.
358   // Tail agnostic vselect can be implemented by setting passthru to undef.
359   VMERGE_VL,
360 
361   // Mask binary operators.
362   VMAND_VL,
363   VMOR_VL,
364   VMXOR_VL,
365 
366   // Set mask vector to all zeros or ones.
367   VMCLR_VL,
368   VMSET_VL,
369 
370   // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
371   // for passthru and VL. Operands are (src, index, mask, passthru, vl).
372   VRGATHER_VX_VL,
373   VRGATHER_VV_VL,
374   VRGATHEREI16_VV_VL,
375 
376   // Vector sign/zero extend with additional mask & VL operands.
377   VSEXT_VL,
378   VZEXT_VL,
379 
380   //  vcpop.m with additional mask and VL operands.
381   VCPOP_VL,
382 
383   //  vfirst.m with additional mask and VL operands.
384   VFIRST_VL,
385 
386   LAST_VL_VECTOR_OP = VFIRST_VL,
387 
388   // Read VLENB CSR
389   READ_VLENB,
390   // Reads value of CSR.
391   // The first operand is a chain pointer. The second specifies address of the
392   // required CSR. Two results are produced, the read value and the new chain
393   // pointer.
394   READ_CSR,
395   // Write value to CSR.
396   // The first operand is a chain pointer, the second specifies address of the
397   // required CSR and the third is the value to write. The result is the new
398   // chain pointer.
399   WRITE_CSR,
400   // Read and write value of CSR.
401   // The first operand is a chain pointer, the second specifies address of the
402   // required CSR and the third is the value to write. Two results are produced,
403   // the value read before the modification and the new chain pointer.
404   SWAP_CSR,
405 
406   // Branchless select operations, matching the semantics of the instructions
407   // defined in Zicond or XVentanaCondOps.
408   CZERO_EQZ, // vt.maskc for XVentanaCondOps.
409   CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
410 
411   /// Software guarded BRIND node. Operand 0 is the chain operand and
412   /// operand 1 is the target address.
413   SW_GUARDED_BRIND,
414 
415   // FP to 32 bit int conversions for RV64. These are used to keep track of the
416   // result being sign extended to 64 bit. These saturate out of range inputs.
417   STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
418   STRICT_FCVT_WU_RV64,
419   STRICT_FADD_VL,
420   STRICT_FSUB_VL,
421   STRICT_FMUL_VL,
422   STRICT_FDIV_VL,
423   STRICT_FSQRT_VL,
424   STRICT_VFMADD_VL,
425   STRICT_VFNMADD_VL,
426   STRICT_VFMSUB_VL,
427   STRICT_VFNMSUB_VL,
428   STRICT_FP_ROUND_VL,
429   STRICT_FP_EXTEND_VL,
430   STRICT_VFNCVT_ROD_VL,
431   STRICT_SINT_TO_FP_VL,
432   STRICT_UINT_TO_FP_VL,
433   STRICT_VFCVT_RM_X_F_VL,
434   STRICT_VFCVT_RTZ_X_F_VL,
435   STRICT_VFCVT_RTZ_XU_F_VL,
436   STRICT_FSETCC_VL,
437   STRICT_FSETCCS_VL,
438   STRICT_VFROUND_NOEXCEPT_VL,
439   LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
440 
441   SF_VC_XV_SE,
442   SF_VC_IV_SE,
443   SF_VC_VV_SE,
444   SF_VC_FV_SE,
445   SF_VC_XVV_SE,
446   SF_VC_IVV_SE,
447   SF_VC_VVV_SE,
448   SF_VC_FVV_SE,
449   SF_VC_XVW_SE,
450   SF_VC_IVW_SE,
451   SF_VC_VVW_SE,
452   SF_VC_FVW_SE,
453   SF_VC_V_X_SE,
454   SF_VC_V_I_SE,
455   SF_VC_V_XV_SE,
456   SF_VC_V_IV_SE,
457   SF_VC_V_VV_SE,
458   SF_VC_V_FV_SE,
459   SF_VC_V_XVV_SE,
460   SF_VC_V_IVV_SE,
461   SF_VC_V_VVV_SE,
462   SF_VC_V_FVV_SE,
463   SF_VC_V_XVW_SE,
464   SF_VC_V_IVW_SE,
465   SF_VC_V_VVW_SE,
466   SF_VC_V_FVW_SE,
467 
468   // WARNING: Do not add anything in the end unless you want the node to
469   // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
470   // opcodes will be thought as target memory ops!
471 
472   TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
473   TH_LWUD,
474   TH_LDD,
475   TH_SWD,
476   TH_SDD,
477 };
478 // clang-format on
479 } // namespace RISCVISD
480 
481 class RISCVTargetLowering : public TargetLowering {
482   const RISCVSubtarget &Subtarget;
483 
484 public:
485   explicit RISCVTargetLowering(const TargetMachine &TM,
486                                const RISCVSubtarget &STI);
487 
getSubtarget()488   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
489 
490   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
491                           MachineFunction &MF,
492                           unsigned Intrinsic) const override;
493   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
494                              unsigned AS,
495                              Instruction *I = nullptr) const override;
496   bool isLegalICmpImmediate(int64_t Imm) const override;
497   bool isLegalAddImmediate(int64_t Imm) const override;
498   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
499   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
500   bool isTruncateFree(SDValue Val, EVT VT2) const override;
501   bool isZExtFree(SDValue Val, EVT VT2) const override;
502   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
503   bool signExtendConstant(const ConstantInt *CI) const override;
504   bool isCheapToSpeculateCttz(Type *Ty) const override;
505   bool isCheapToSpeculateCtlz(Type *Ty) const override;
506   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
507   bool hasAndNotCompare(SDValue Y) const override;
508   bool hasBitTest(SDValue X, SDValue Y) const override;
509   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
510       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
511       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
512       SelectionDAG &DAG) const override;
513   /// Return true if the (vector) instruction I will be lowered to an instruction
514   /// with a scalar splat operand for the given Operand number.
515   bool canSplatOperand(Instruction *I, int Operand) const;
516   /// Return true if a vector instruction will lower to a target instruction
517   /// able to splat the given operand.
518   bool canSplatOperand(unsigned Opcode, int Operand) const;
519   bool shouldSinkOperands(Instruction *I,
520                           SmallVectorImpl<Use *> &Ops) const override;
521   bool shouldScalarizeBinop(SDValue VecOp) const override;
522   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
523   std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
524   bool isFPImmLegal(const APFloat &Imm, EVT VT,
525                     bool ForCodeSize) const override;
526   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
527                                unsigned Index) const override;
528 
529   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
530 
531   bool preferScalarizeSplat(SDNode *N) const override;
532 
softPromoteHalfType()533   bool softPromoteHalfType() const override { return true; }
534 
535   /// Return the register type for a given MVT, ensuring vectors are treated
536   /// as a series of gpr sized integers.
537   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
538                                     EVT VT) const override;
539 
540   /// Return the number of registers for a given MVT, ensuring vectors are
541   /// treated as a series of gpr sized integers.
542   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
543                                          CallingConv::ID CC,
544                                          EVT VT) const override;
545 
546   unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
547                                                 CallingConv::ID CC, EVT VT,
548                                                 EVT &IntermediateVT,
549                                                 unsigned &NumIntermediates,
550                                                 MVT &RegisterVT) const override;
551 
552   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
553                                             EVT VT) const override;
554 
555   /// Return true if the given shuffle mask can be codegen'd directly, or if it
556   /// should be stack expanded.
557   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
558 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)559   bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
560     // If the pair to store is a mixture of float and int values, we will
561     // save two bitwise instructions and one float-to-int instruction and
562     // increase one store instruction. There is potentially a more
563     // significant benefit because it avoids the float->int domain switch
564     // for input value. So It is more likely a win.
565     if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
566         (LTy.isInteger() && HTy.isFloatingPoint()))
567       return true;
568     // If the pair only contains int values, we will save two bitwise
569     // instructions and increase one store instruction (costing one more
570     // store buffer). Since the benefit is more blurred we leave such a pair
571     // out until we get testcase to prove it is a win.
572     return false;
573   }
574 
575   bool
576   shouldExpandBuildVectorWithShuffles(EVT VT,
577                                       unsigned DefinedValues) const override;
578 
579   bool shouldExpandCttzElements(EVT VT) const override;
580 
581   /// Return the cost of LMUL for linear operations.
582   InstructionCost getLMULCost(MVT VT) const;
583 
584   InstructionCost getVRGatherVVCost(MVT VT) const;
585   InstructionCost getVRGatherVICost(MVT VT) const;
586   InstructionCost getVSlideVXCost(MVT VT) const;
587   InstructionCost getVSlideVICost(MVT VT) const;
588 
589   // Provide custom lowering hooks for some operations.
590   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
591   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
592                           SelectionDAG &DAG) const override;
593 
594   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
595 
596   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
597                                     const APInt &DemandedElts,
598                                     TargetLoweringOpt &TLO) const override;
599 
600   void computeKnownBitsForTargetNode(const SDValue Op,
601                                      KnownBits &Known,
602                                      const APInt &DemandedElts,
603                                      const SelectionDAG &DAG,
604                                      unsigned Depth) const override;
605   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
606                                            const APInt &DemandedElts,
607                                            const SelectionDAG &DAG,
608                                            unsigned Depth) const override;
609 
610   bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
611                                            const APInt &DemandedElts,
612                                            const SelectionDAG &DAG,
613                                            bool PoisonOnly, bool ConsiderFlags,
614                                            unsigned Depth) const override;
615 
616   const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
617 
618   // This method returns the name of a target specific DAG node.
619   const char *getTargetNodeName(unsigned Opcode) const override;
620 
621   MachineMemOperand::Flags
622   getTargetMMOFlags(const Instruction &I) const override;
623 
624   MachineMemOperand::Flags
625   getTargetMMOFlags(const MemSDNode &Node) const override;
626 
627   bool
628   areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
629                                       const MemSDNode &NodeY) const override;
630 
631   ConstraintType getConstraintType(StringRef Constraint) const override;
632 
633   InlineAsm::ConstraintCode
634   getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
635 
636   std::pair<unsigned, const TargetRegisterClass *>
637   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
638                                StringRef Constraint, MVT VT) const override;
639 
640   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
641                                     std::vector<SDValue> &Ops,
642                                     SelectionDAG &DAG) const override;
643 
644   MachineBasicBlock *
645   EmitInstrWithCustomInserter(MachineInstr &MI,
646                               MachineBasicBlock *BB) const override;
647 
648   void AdjustInstrPostInstrSelection(MachineInstr &MI,
649                                      SDNode *Node) const override;
650 
651   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
652                          EVT VT) const override;
653 
shouldFormOverflowOp(unsigned Opcode,EVT VT,bool MathUsed)654   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
655                             bool MathUsed) const override {
656     if (VT == MVT::i8 || VT == MVT::i16)
657       return false;
658 
659     return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
660   }
661 
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)662   bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
663                                     unsigned AddrSpace) const override {
664     // If we can replace 4 or more scalar stores, there will be a reduction
665     // in instructions even after we add a vector constant load.
666     return NumElem >= 4;
667   }
668 
convertSetCCLogicToBitwiseLogic(EVT VT)669   bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
670     return VT.isScalarInteger();
671   }
convertSelectOfConstantsToMath(EVT VT)672   bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
673 
674   bool isCtpopFast(EVT VT) const override;
675 
676   unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
677 
preferZeroCompareBranch()678   bool preferZeroCompareBranch() const override { return true; }
679 
shouldInsertFencesForAtomic(const Instruction * I)680   bool shouldInsertFencesForAtomic(const Instruction *I) const override {
681     return isa<LoadInst>(I) || isa<StoreInst>(I);
682   }
683   Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
684                                 AtomicOrdering Ord) const override;
685   Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
686                                  AtomicOrdering Ord) const override;
687 
688   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
689                                   EVT VT) const override;
690 
getExtendForAtomicOps()691   ISD::NodeType getExtendForAtomicOps() const override {
692     return ISD::SIGN_EXTEND;
693   }
694 
695   ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
696 
697   bool shouldTransformSignedTruncationCheck(EVT XVT,
698                                             unsigned KeptBits) const override;
699 
700   TargetLowering::ShiftLegalizationStrategy
preferredShiftLegalizationStrategy(SelectionDAG & DAG,SDNode * N,unsigned ExpansionFactor)701   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
702                                      unsigned ExpansionFactor) const override {
703     if (DAG.getMachineFunction().getFunction().hasMinSize())
704       return ShiftLegalizationStrategy::LowerToLibcall;
705     return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
706                                                               ExpansionFactor);
707   }
708 
709   bool isDesirableToCommuteWithShift(const SDNode *N,
710                                      CombineLevel Level) const override;
711 
712   /// If a physical register, this returns the register that receives the
713   /// exception address on entry to an EH pad.
714   Register
715   getExceptionPointerRegister(const Constant *PersonalityFn) const override;
716 
717   /// If a physical register, this returns the register that receives the
718   /// exception typeid on entry to a landing pad.
719   Register
720   getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
721 
722   bool shouldExtendTypeInLibCall(EVT Type) const override;
723   bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
724 
725   /// Returns the register with the specified architectural or ABI name. This
726   /// method is necessary to lower the llvm.read_register.* and
727   /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
728   /// with the clang -ffixed-xX flag for access to be allowed.
729   Register getRegisterByName(const char *RegName, LLT VT,
730                              const MachineFunction &MF) const override;
731 
732   // Lower incoming arguments, copy physregs into vregs
733   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
734                                bool IsVarArg,
735                                const SmallVectorImpl<ISD::InputArg> &Ins,
736                                const SDLoc &DL, SelectionDAG &DAG,
737                                SmallVectorImpl<SDValue> &InVals) const override;
738   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
739                       bool IsVarArg,
740                       const SmallVectorImpl<ISD::OutputArg> &Outs,
741                       LLVMContext &Context) const override;
742   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
743                       const SmallVectorImpl<ISD::OutputArg> &Outs,
744                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
745                       SelectionDAG &DAG) const override;
746   SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
747                     SmallVectorImpl<SDValue> &InVals) const override;
748 
749   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
750                                          Type *Ty) const override;
751   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
752   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
shouldConsiderGEPOffsetSplit()753   bool shouldConsiderGEPOffsetSplit() const override { return true; }
754 
755   bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
756                               SDValue C) const override;
757 
758   bool isMulAddWithConstProfitable(SDValue AddNode,
759                                    SDValue ConstNode) const override;
760 
761   TargetLowering::AtomicExpansionKind
762   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
763   Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
764                                       Value *AlignedAddr, Value *Incr,
765                                       Value *Mask, Value *ShiftAmt,
766                                       AtomicOrdering Ord) const override;
767   TargetLowering::AtomicExpansionKind
768   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
769   Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
770                                           AtomicCmpXchgInst *CI,
771                                           Value *AlignedAddr, Value *CmpVal,
772                                           Value *NewVal, Value *Mask,
773                                           AtomicOrdering Ord) const override;
774 
775   /// Returns true if the target allows unaligned memory accesses of the
776   /// specified type.
777   bool allowsMisalignedMemoryAccesses(
778       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
779       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
780       unsigned *Fast = nullptr) const override;
781 
782   EVT getOptimalMemOpType(const MemOp &Op,
783                           const AttributeList &FuncAttributes) const override;
784 
785   bool splitValueIntoRegisterParts(
786       SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
787       unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
788       const override;
789 
790   SDValue joinRegisterPartsIntoValue(
791       SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
792       unsigned NumParts, MVT PartVT, EVT ValueVT,
793       std::optional<CallingConv::ID> CC) const override;
794 
795   // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
796   SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
797 
798   static RISCVII::VLMUL getLMUL(MVT VT);
computeVLMAX(unsigned VectorBits,unsigned EltSize,unsigned MinSize)799   inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
800                                       unsigned MinSize) {
801     // Original equation:
802     //   VLMAX = (VectorBits / EltSize) * LMUL
803     //   where LMUL = MinSize / RISCV::RVVBitsPerBlock
804     // The following equations have been reordered to prevent loss of precision
805     // when calculating fractional LMUL.
806     return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
807   }
808 
809   // Return inclusive (low, high) bounds on the value of VLMAX for the
810   // given scalable container type given known bounds on VLEN.
811   static std::pair<unsigned, unsigned>
812   computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
813 
814   static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
815   static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
816   static unsigned getRegClassIDForVecVT(MVT VT);
817   static std::pair<unsigned, unsigned>
818   decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
819                                            unsigned InsertExtractIdx,
820                                            const RISCVRegisterInfo *TRI);
821   MVT getContainerForFixedLengthVector(MVT VT) const;
822 
823   bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
824 
825   bool isLegalElementTypeForRVV(EVT ScalarTy) const;
826 
827   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
828 
829   unsigned getJumpTableEncoding() const override;
830 
831   const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
832                                           const MachineBasicBlock *MBB,
833                                           unsigned uid,
834                                           MCContext &Ctx) const override;
835 
836   bool isVScaleKnownToBeAPowerOfTwo() const override;
837 
838   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
839                               ISD::MemIndexedMode &AM, SelectionDAG &DAG) const;
840   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
841                                  ISD::MemIndexedMode &AM,
842                                  SelectionDAG &DAG) const override;
843   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
844                                   SDValue &Offset, ISD::MemIndexedMode &AM,
845                                   SelectionDAG &DAG) const override;
846 
isLegalScaleForGatherScatter(uint64_t Scale,uint64_t ElemSize)847   bool isLegalScaleForGatherScatter(uint64_t Scale,
848                                     uint64_t ElemSize) const override {
849     // Scaled addressing not supported on indexed load/stores
850     return Scale == 1;
851   }
852 
853   /// If the target has a standard location for the stack protector cookie,
854   /// returns the address of that location. Otherwise, returns nullptr.
855   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
856 
857   /// Returns whether or not generating a interleaved load/store intrinsic for
858   /// this type will be legal.
859   bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
860                                     Align Alignment, unsigned AddrSpace,
861                                     const DataLayout &) const;
862 
863   /// Return true if a stride load store of the given result type and
864   /// alignment is legal.
865   bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
866 
getMaxSupportedInterleaveFactor()867   unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
868 
869   bool fallBackToDAGISel(const Instruction &Inst) const override;
870 
871   bool lowerInterleavedLoad(LoadInst *LI,
872                             ArrayRef<ShuffleVectorInst *> Shuffles,
873                             ArrayRef<unsigned> Indices,
874                             unsigned Factor) const override;
875 
876   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
877                              unsigned Factor) const override;
878 
879   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
880                                         LoadInst *LI) const override;
881 
882   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
883                                        StoreInst *SI) const override;
884 
supportKCFIBundles()885   bool supportKCFIBundles() const override { return true; }
886 
887   SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
888                                  int JTI, SelectionDAG &DAG) const override;
889 
890   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
891                               MachineBasicBlock::instr_iterator &MBBI,
892                               const TargetInstrInfo *TII) const override;
893 
894   /// RISCVCCAssignFn - This target-specific function extends the default
895   /// CCValAssign with additional information used to lower RISC-V calling
896   /// conventions.
897   typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI,
898                                unsigned ValNo, MVT ValVT, MVT LocVT,
899                                CCValAssign::LocInfo LocInfo,
900                                ISD::ArgFlagsTy ArgFlags, CCState &State,
901                                bool IsFixed, bool IsRet, Type *OrigTy,
902                                const RISCVTargetLowering &TLI,
903                                RVVArgDispatcher &RVVDispatcher);
904 
905 private:
906   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
907                         const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
908                         RISCVCCAssignFn Fn) const;
909   void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
910                          const SmallVectorImpl<ISD::OutputArg> &Outs,
911                          bool IsRet, CallLoweringInfo *CLI,
912                          RISCVCCAssignFn Fn) const;
913 
914   template <class NodeTy>
915   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
916                   bool IsExternWeak = false) const;
917   SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
918                            bool UseGOT) const;
919   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
920   SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
921 
922   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
923   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
924   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
925   SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
926   SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
927   SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
928   SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
929   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
930   SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
931   SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
932   SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
933   SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
934   SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
935   SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
936   SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
937                              int64_t ExtTrueVal) const;
938   SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
939   SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
940   SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
941   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
942   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
943   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
944   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
945   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
946   SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
947   SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
948   SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
949                                       bool IsVP) const;
950   SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
951   SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
952   SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
953   SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
954   SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
955   SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
956   SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
957   SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
958   SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
959   SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
960   SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
961   SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
962                                                SelectionDAG &DAG) const;
963   SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
964   SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
965   SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
966   SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
967   SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
968   SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
969                                             SelectionDAG &DAG) const;
970   SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
971   SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
972   SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
973   SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
974   SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
975   SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
976   SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const;
977   SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
978   SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
979   SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
980   SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
981   SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
982   SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const;
983   SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
984                                             unsigned ExtendOpc) const;
985   SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
986   SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
987 
988   SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
989   SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
990 
991   SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
992 
993   SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
994 
995   SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
996   SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
997 
998   bool isEligibleForTailCallOptimization(
999       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1000       const SmallVector<CCValAssign, 16> &ArgLocs) const;
1001 
1002   /// Generate error diagnostics if any register used by CC has been marked
1003   /// reserved.
1004   void validateCCReservedRegs(
1005       const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
1006       MachineFunction &MF) const;
1007 
1008   bool useRVVForFixedLengthVectorVT(MVT VT) const;
1009 
1010   MVT getVPExplicitVectorLengthTy() const override;
1011 
1012   bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
1013                                    bool IsScalable) const override;
1014 
1015   /// RVV code generation for fixed length vectors does not lower all
1016   /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
1017   /// merge. However, merging them creates a BUILD_VECTOR that is just as
1018   /// illegal as the original, thus leading to an infinite legalisation loop.
1019   /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
1020   /// this override can be removed.
1021   bool mergeStoresAfterLegalization(EVT VT) const override;
1022 
1023   /// Disable normalizing
1024   /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1025   /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
1026   /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
shouldNormalizeToSelectSequence(LLVMContext &,EVT)1027   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
1028     return false;
1029   }
1030 
1031   /// For available scheduling models FDIV + two independent FMULs are much
1032   /// faster than two FDIVs.
1033   unsigned combineRepeatedFPDivisors() const override;
1034 
1035   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1036                         SmallVectorImpl<SDNode *> &Created) const override;
1037 
1038   bool shouldFoldSelectWithSingleBitTest(EVT VT,
1039                                          const APInt &AndMask) const override;
1040 
1041   unsigned getMinimumJumpTableEntries() const override;
1042 
1043   SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start,
1044                           SDValue End, SDValue Flags, SDLoc DL) const;
1045 };
1046 
1047 /// As per the spec, the rules for passing vector arguments are as follows:
1048 ///
1049 /// 1. For the first vector mask argument, use v0 to pass it.
1050 /// 2. For vector data arguments or rest vector mask arguments, starting from
1051 /// the v8 register, if a vector register group between v8-v23 that has not been
1052 /// allocated can be found and the first register number is a multiple of LMUL,
1053 /// then allocate this vector register group to the argument and mark these
1054 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1055 /// the argument list with the address.
1056 /// 3. For tuple vector data arguments, starting from the v8 register, if
1057 /// NFIELDS consecutive vector register groups between v8-v23 that have not been
1058 /// allocated can be found and the first register number is a multiple of LMUL,
1059 /// then allocate these vector register groups to the argument and mark these
1060 /// registers as allocated. Otherwise, pass it by reference and are replaced in
1061 /// the argument list with the address.
1062 class RVVArgDispatcher {
1063 public:
1064   static constexpr unsigned NumArgVRs = 16;
1065 
1066   struct RVVArgInfo {
1067     unsigned NF;
1068     MVT VT;
1069     bool FirstVMask = false;
1070   };
1071 
1072   template <typename Arg>
RVVArgDispatcher(const MachineFunction * MF,const RISCVTargetLowering * TLI,ArrayRef<Arg> ArgList)1073   RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
1074                    ArrayRef<Arg> ArgList)
1075       : MF(MF), TLI(TLI) {
1076     constructArgInfos(ArgList);
1077     compute();
1078   }
1079 
1080   RVVArgDispatcher() = default;
1081 
1082   MCPhysReg getNextPhysReg();
1083 
1084 private:
1085   SmallVector<RVVArgInfo, 4> RVVArgInfos;
1086   SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
1087 
1088   const MachineFunction *MF = nullptr;
1089   const RISCVTargetLowering *TLI = nullptr;
1090 
1091   unsigned CurIdx = 0;
1092 
1093   template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
1094   void compute();
1095   void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
1096                        unsigned StartReg = 0);
1097 };
1098 
1099 namespace RISCV {
1100 
1101 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1102               MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1103               ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1104               bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1105               RVVArgDispatcher &RVVDispatcher);
1106 
1107 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1108                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1109                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1110                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1111                      RVVArgDispatcher &RVVDispatcher);
1112 
1113 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
1114                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1115                   CCState &State);
1116 
1117 ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI);
1118 
1119 } // end namespace RISCV
1120 
1121 namespace RISCVVIntrinsicsTable {
1122 
1123 struct RISCVVIntrinsicInfo {
1124   unsigned IntrinsicID;
1125   uint8_t ScalarOperand;
1126   uint8_t VLOperand;
hasScalarOperandRISCVVIntrinsicInfo1127   bool hasScalarOperand() const {
1128     // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
1129     return ScalarOperand != 0xF;
1130   }
hasVLOperandRISCVVIntrinsicInfo1131   bool hasVLOperand() const {
1132     // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
1133     return VLOperand != 0x1F;
1134   }
1135 };
1136 
1137 using namespace RISCV;
1138 
1139 #define GET_RISCVVIntrinsicsTable_DECL
1140 #include "RISCVGenSearchableTables.inc"
1141 #undef GET_RISCVVIntrinsicsTable_DECL
1142 
1143 } // end namespace RISCVVIntrinsicsTable
1144 
1145 } // end namespace llvm
1146 
1147 #endif
1148