xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 //===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
15 #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
16 
17 #include "RISCV.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/TargetParser/RISCVTargetParser.h"
22 #include <optional>
23 
24 namespace llvm {
25 class RISCVSubtarget;
26 struct RISCVRegisterInfo;
27 namespace RISCVISD {
28 enum NodeType : unsigned {
29   FIRST_NUMBER = ISD::BUILTIN_OP_END,
30   RET_GLUE,
31   SRET_GLUE,
32   MRET_GLUE,
33   CALL,
34   /// Select with condition operator - This selects between a true value and
35   /// a false value (ops #3 and #4) based on the boolean result of comparing
36   /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
37   /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
38   /// The lhs and rhs are XLenVT integers. The true and false values can be
39   /// integer or floating point.
40   SELECT_CC,
41   BR_CC,
42   BuildPairF64,
43   SplitF64,
44   TAIL,
45 
46   // Add the Lo 12 bits from an address. Selected to ADDI.
47   ADD_LO,
48   // Get the Hi 20 bits from an address. Selected to LUI.
49   HI,
50 
51   // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
52   LLA,
53 
54   // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
55   ADD_TPREL,
56 
57   // Load address.
58   LA_TLS_GD,
59 
60   // Multiply high for signedxunsigned.
61   MULHSU,
62   // RV64I shifts, directly matching the semantics of the named RISC-V
63   // instructions.
64   SLLW,
65   SRAW,
66   SRLW,
67   // 32-bit operations from RV64M that can't be simply matched with a pattern
68   // at instruction selection time. These have undefined behavior for division
69   // by 0 or overflow (divw) like their target independent counterparts.
70   DIVW,
71   DIVUW,
72   REMUW,
73   // RV64IB rotates, directly matching the semantics of the named RISC-V
74   // instructions.
75   ROLW,
76   RORW,
77   // RV64IZbb bit counting instructions directly matching the semantics of the
78   // named RISC-V instructions.
79   CLZW,
80   CTZW,
81 
82   // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
83   ABSW,
84 
85   // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
86   // XLEN is the only legal integer width.
87   //
88   // FMV_H_X matches the semantics of the FMV.H.X.
89   // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
90   // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
91   // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
92   // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
93   //
94   // This is a more convenient semantic for producing dagcombines that remove
95   // unnecessary GPR->FPR->GPR moves.
96   FMV_H_X,
97   FMV_X_ANYEXTH,
98   FMV_X_SIGNEXTH,
99   FMV_W_X_RV64,
100   FMV_X_ANYEXTW_RV64,
101   // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
102   // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
103   // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
104   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
105   FCVT_X,
106   FCVT_XU,
107   // FP to 32 bit int conversions for RV64. These are used to keep track of the
108   // result being sign extended to 64 bit. These saturate out of range inputs.
109   // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
110   // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
111   FCVT_W_RV64,
112   FCVT_WU_RV64,
113 
114   FP_ROUND_BF16,
115   FP_EXTEND_BF16,
116 
117   // Rounds an FP value to its corresponding integer in the same FP format.
118   // First operand is the value to round, the second operand is the largest
119   // integer that can be represented exactly in the FP format. This will be
120   // expanded into multiple instructions and basic blocks with a custom
121   // inserter.
122   FROUND,
123 
124   FPCLASS,
125 
126   // Floating point fmax and fmin matching the RISC-V instruction semantics.
127   FMAX, FMIN,
128 
129   // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
130   // (returns (Lo, Hi)). It takes a chain operand.
131   READ_CYCLE_WIDE,
132   // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
133   // XLenVT.
134   BREV8,
135   ORC_B,
136   ZIP,
137   UNZIP,
138 
139   // Scalar cryptography
140   CLMUL, CLMULH, CLMULR,
141   SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
142   SM4KS, SM4ED,
143   SM3P0, SM3P1,
144 
145   // Vector Extension
146   // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
147   // for the VL value to be used for the operation. The first operand is
148   // passthru operand.
149   VMV_V_V_VL,
150   // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
151   // for the VL value to be used for the operation. The first operand is
152   // passthru operand.
153   VMV_V_X_VL,
154   // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
155   // for the VL value to be used for the operation. The first operand is
156   // passthru operand.
157   VFMV_V_F_VL,
158   // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
159   // extended from the vector element size.
160   VMV_X_S,
161   // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
162   VMV_S_X_VL,
163   // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
164   VFMV_S_F_VL,
165   // Splats an 64-bit value that has been split into two i32 parts. This is
166   // expanded late to two scalar stores and a stride 0 vector load.
167   // The first operand is passthru operand.
168   SPLAT_VECTOR_SPLIT_I64_VL,
169   // Read VLENB CSR
170   READ_VLENB,
171   // Truncates a RVV integer vector by one power-of-two. Carries both an extra
172   // mask and VL operand.
173   TRUNCATE_VECTOR_VL,
174   // Matches the semantics of vslideup/vslidedown. The first operand is the
175   // pass-thru operand, the second is the source vector, the third is the
176   // XLenVT index (either constant or non-constant), the fourth is the mask
177   // and the fifth the VL.
178   VSLIDEUP_VL,
179   VSLIDEDOWN_VL,
180   // Matches the semantics of vslide1up/slide1down. The first operand is
181   // passthru operand, the second is source vector, third is the XLenVT scalar
182   // value. The fourth and fifth operands are the mask and VL operands.
183   VSLIDE1UP_VL,
184   VSLIDE1DOWN_VL,
185   // Matches the semantics of vfslide1up/vfslide1down. The first operand is
186   // passthru operand, the second is source vector, third is a scalar value
187   // whose type matches the element type of the vectors.  The fourth and fifth
188   // operands are the mask and VL operands.
189   VFSLIDE1UP_VL,
190   VFSLIDE1DOWN_VL,
191   // Matches the semantics of the vid.v instruction, with a mask and VL
192   // operand.
193   VID_VL,
194   // Matches the semantics of the vfcnvt.rod function (Convert double-width
195   // float to single-width float, rounding towards odd). Takes a double-width
196   // float vector and produces a single-width float vector. Also has a mask and
197   // VL operand.
198   VFNCVT_ROD_VL,
199   // These nodes match the semantics of the corresponding RVV vector reduction
200   // instructions. They produce a vector result which is the reduction
201   // performed over the second vector operand plus the first element of the
202   // third vector operand. The first operand is the pass-thru operand. The
203   // second operand is an unconstrained vector type, and the result, first, and
204   // third operand's types are expected to be the corresponding full-width
205   // LMUL=1 type for the second operand:
206   //   nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
207   //   nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
208   // The different in types does introduce extra vsetvli instructions but
209   // similarly it reduces the number of registers consumed per reduction.
210   // Also has a mask and VL operand.
211   VECREDUCE_ADD_VL,
212   VECREDUCE_UMAX_VL,
213   VECREDUCE_SMAX_VL,
214   VECREDUCE_UMIN_VL,
215   VECREDUCE_SMIN_VL,
216   VECREDUCE_AND_VL,
217   VECREDUCE_OR_VL,
218   VECREDUCE_XOR_VL,
219   VECREDUCE_FADD_VL,
220   VECREDUCE_SEQ_FADD_VL,
221   VECREDUCE_FMIN_VL,
222   VECREDUCE_FMAX_VL,
223 
224   // Vector binary ops with a merge as a third operand, a mask as a fourth
225   // operand, and VL as a fifth operand.
226   ADD_VL,
227   AND_VL,
228   MUL_VL,
229   OR_VL,
230   SDIV_VL,
231   SHL_VL,
232   SREM_VL,
233   SRA_VL,
234   SRL_VL,
235   SUB_VL,
236   UDIV_VL,
237   UREM_VL,
238   XOR_VL,
239   SMIN_VL,
240   SMAX_VL,
241   UMIN_VL,
242   UMAX_VL,
243 
244   BITREVERSE_VL,
245   BSWAP_VL,
246   CTLZ_VL,
247   CTTZ_VL,
248   CTPOP_VL,
249 
250   SADDSAT_VL,
251   UADDSAT_VL,
252   SSUBSAT_VL,
253   USUBSAT_VL,
254 
255   MULHS_VL,
256   MULHU_VL,
257   FADD_VL,
258   FSUB_VL,
259   FMUL_VL,
260   FDIV_VL,
261   FMINNUM_VL,
262   FMAXNUM_VL,
263 
264   // Vector unary ops with a mask as a second operand and VL as a third operand.
265   FNEG_VL,
266   FABS_VL,
267   FSQRT_VL,
268   FCLASS_VL,
269   FCOPYSIGN_VL, // Has a merge operand
270   VFCVT_RTZ_X_F_VL,
271   VFCVT_RTZ_XU_F_VL,
272   VFCVT_X_F_VL,
273   VFCVT_XU_F_VL,
274   VFROUND_NOEXCEPT_VL,
275   VFCVT_RM_X_F_VL,  // Has a rounding mode operand.
276   VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
277   SINT_TO_FP_VL,
278   UINT_TO_FP_VL,
279   VFCVT_RM_F_X_VL,  // Has a rounding mode operand.
280   VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
281   FP_ROUND_VL,
282   FP_EXTEND_VL,
283 
284   // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
285   VFMADD_VL,
286   VFNMADD_VL,
287   VFMSUB_VL,
288   VFNMSUB_VL,
289 
290   // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
291   // operand.
292   VFWMADD_VL,
293   VFWNMADD_VL,
294   VFWMSUB_VL,
295   VFWNMSUB_VL,
296 
297   // Widening instructions with a merge value a third operand, a mask as a
298   // fourth operand, and VL as a fifth operand.
299   VWMUL_VL,
300   VWMULU_VL,
301   VWMULSU_VL,
302   VWADD_VL,
303   VWADDU_VL,
304   VWSUB_VL,
305   VWSUBU_VL,
306   VWADD_W_VL,
307   VWADDU_W_VL,
308   VWSUB_W_VL,
309   VWSUBU_W_VL,
310 
311   VFWMUL_VL,
312   VFWADD_VL,
313   VFWSUB_VL,
314   VFWADD_W_VL,
315   VFWSUB_W_VL,
316 
317   // Widening ternary operations with a mask as the fourth operand and VL as the
318   // fifth operand.
319   VWMACC_VL,
320   VWMACCU_VL,
321   VWMACCSU_VL,
322 
323   // Narrowing logical shift right.
324   // Operands are (source, shift, passthru, mask, vl)
325   VNSRL_VL,
326 
327   // Vector compare producing a mask. Fourth operand is input mask. Fifth
328   // operand is VL.
329   SETCC_VL,
330 
331   // Vector select with an additional VL operand. This operation is unmasked.
332   VSELECT_VL,
333   // Vector select with operand #2 (the value when the condition is false) tied
334   // to the destination and an additional VL operand. This operation is
335   // unmasked.
336   VP_MERGE_VL,
337 
338   // Mask binary operators.
339   VMAND_VL,
340   VMOR_VL,
341   VMXOR_VL,
342 
343   // Set mask vector to all zeros or ones.
344   VMCLR_VL,
345   VMSET_VL,
346 
347   // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
348   // for passthru and VL. Operands are (src, index, mask, passthru, vl).
349   VRGATHER_VX_VL,
350   VRGATHER_VV_VL,
351   VRGATHEREI16_VV_VL,
352 
353   // Vector sign/zero extend with additional mask & VL operands.
354   VSEXT_VL,
355   VZEXT_VL,
356 
357   //  vcpop.m with additional mask and VL operands.
358   VCPOP_VL,
359 
360   //  vfirst.m with additional mask and VL operands.
361   VFIRST_VL,
362 
363   // Reads value of CSR.
364   // The first operand is a chain pointer. The second specifies address of the
365   // required CSR. Two results are produced, the read value and the new chain
366   // pointer.
367   READ_CSR,
368   // Write value to CSR.
369   // The first operand is a chain pointer, the second specifies address of the
370   // required CSR and the third is the value to write. The result is the new
371   // chain pointer.
372   WRITE_CSR,
373   // Read and write value of CSR.
374   // The first operand is a chain pointer, the second specifies address of the
375   // required CSR and the third is the value to write. Two results are produced,
376   // the value read before the modification and the new chain pointer.
377   SWAP_CSR,
378 
379   // Branchless select operations, matching the semantics of the instructions
380   // defined in Zicond or XVentanaCondOps.
381   CZERO_EQZ, // vt.maskc for XVentanaCondOps.
382   CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
383 
384   // FP to 32 bit int conversions for RV64. These are used to keep track of the
385   // result being sign extended to 64 bit. These saturate out of range inputs.
386   STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
387   STRICT_FCVT_WU_RV64,
388   STRICT_FADD_VL,
389   STRICT_FSUB_VL,
390   STRICT_FMUL_VL,
391   STRICT_FDIV_VL,
392   STRICT_FSQRT_VL,
393   STRICT_VFMADD_VL,
394   STRICT_VFNMADD_VL,
395   STRICT_VFMSUB_VL,
396   STRICT_VFNMSUB_VL,
397   STRICT_FP_ROUND_VL,
398   STRICT_FP_EXTEND_VL,
399   STRICT_VFNCVT_ROD_VL,
400   STRICT_SINT_TO_FP_VL,
401   STRICT_UINT_TO_FP_VL,
402   STRICT_VFCVT_RM_X_F_VL,
403   STRICT_VFCVT_RTZ_X_F_VL,
404   STRICT_VFCVT_RTZ_XU_F_VL,
405   STRICT_FSETCC_VL,
406   STRICT_FSETCCS_VL,
407   STRICT_VFROUND_NOEXCEPT_VL,
408 
409   // WARNING: Do not add anything in the end unless you want the node to
410   // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
411   // opcodes will be thought as target memory ops!
412 
413   // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA.
414   LGA = ISD::FIRST_TARGET_MEMORY_OPCODE,
415   // Load initial exec thread-local address.
416   LA_TLS_IE,
417 
418   TH_LWD,
419   TH_LWUD,
420   TH_LDD,
421   TH_SWD,
422   TH_SDD,
423 };
424 } // namespace RISCVISD
425 
426 class RISCVTargetLowering : public TargetLowering {
427   const RISCVSubtarget &Subtarget;
428 
429 public:
430   explicit RISCVTargetLowering(const TargetMachine &TM,
431                                const RISCVSubtarget &STI);
432 
433   const RISCVSubtarget &getSubtarget() const { return Subtarget; }
434 
435   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
436                           MachineFunction &MF,
437                           unsigned Intrinsic) const override;
438   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
439                              unsigned AS,
440                              Instruction *I = nullptr) const override;
441   bool isLegalICmpImmediate(int64_t Imm) const override;
442   bool isLegalAddImmediate(int64_t Imm) const override;
443   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
444   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
445   bool isZExtFree(SDValue Val, EVT VT2) const override;
446   bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
447   bool signExtendConstant(const ConstantInt *CI) const override;
448   bool isCheapToSpeculateCttz(Type *Ty) const override;
449   bool isCheapToSpeculateCtlz(Type *Ty) const override;
450   bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
451   bool hasAndNotCompare(SDValue Y) const override;
452   bool hasBitTest(SDValue X, SDValue Y) const override;
453   bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
454       SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
455       unsigned OldShiftOpcode, unsigned NewShiftOpcode,
456       SelectionDAG &DAG) const override;
457   /// Return true if the (vector) instruction I will be lowered to an instruction
458   /// with a scalar splat operand for the given Operand number.
459   bool canSplatOperand(Instruction *I, int Operand) const;
460   /// Return true if a vector instruction will lower to a target instruction
461   /// able to splat the given operand.
462   bool canSplatOperand(unsigned Opcode, int Operand) const;
463   bool shouldSinkOperands(Instruction *I,
464                           SmallVectorImpl<Use *> &Ops) const override;
465   bool shouldScalarizeBinop(SDValue VecOp) const override;
466   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
467   int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
468   bool isFPImmLegal(const APFloat &Imm, EVT VT,
469                     bool ForCodeSize) const override;
470   bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
471                                unsigned Index) const override;
472 
473   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
474 
475   bool preferScalarizeSplat(SDNode *N) const override;
476 
477   bool softPromoteHalfType() const override { return true; }
478 
479   /// Return the register type for a given MVT, ensuring vectors are treated
480   /// as a series of gpr sized integers.
481   MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
482                                     EVT VT) const override;
483 
484   /// Return the number of registers for a given MVT, ensuring vectors are
485   /// treated as a series of gpr sized integers.
486   unsigned getNumRegistersForCallingConv(LLVMContext &Context,
487                                          CallingConv::ID CC,
488                                          EVT VT) const override;
489 
490   bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
491                                             EVT VT) const override;
492 
493   /// Return true if the given shuffle mask can be codegen'd directly, or if it
494   /// should be stack expanded.
495   bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
496 
497   bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
498     // If the pair to store is a mixture of float and int values, we will
499     // save two bitwise instructions and one float-to-int instruction and
500     // increase one store instruction. There is potentially a more
501     // significant benefit because it avoids the float->int domain switch
502     // for input value. So It is more likely a win.
503     if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
504         (LTy.isInteger() && HTy.isFloatingPoint()))
505       return true;
506     // If the pair only contains int values, we will save two bitwise
507     // instructions and increase one store instruction (costing one more
508     // store buffer). Since the benefit is more blurred we leave such a pair
509     // out until we get testcase to prove it is a win.
510     return false;
511   }
512 
513   bool
514   shouldExpandBuildVectorWithShuffles(EVT VT,
515                                       unsigned DefinedValues) const override;
516 
517   // Provide custom lowering hooks for some operations.
518   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
519   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
520                           SelectionDAG &DAG) const override;
521 
522   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
523 
524   bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
525                                     const APInt &DemandedElts,
526                                     TargetLoweringOpt &TLO) const override;
527 
528   void computeKnownBitsForTargetNode(const SDValue Op,
529                                      KnownBits &Known,
530                                      const APInt &DemandedElts,
531                                      const SelectionDAG &DAG,
532                                      unsigned Depth) const override;
533   unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
534                                            const APInt &DemandedElts,
535                                            const SelectionDAG &DAG,
536                                            unsigned Depth) const override;
537 
538   const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
539 
540   // This method returns the name of a target specific DAG node.
541   const char *getTargetNodeName(unsigned Opcode) const override;
542 
543   MachineMemOperand::Flags
544   getTargetMMOFlags(const Instruction &I) const override;
545 
546   MachineMemOperand::Flags
547   getTargetMMOFlags(const MemSDNode &Node) const override;
548 
549   bool
550   areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
551                                       const MemSDNode &NodeY) const override;
552 
553   ConstraintType getConstraintType(StringRef Constraint) const override;
554 
555   unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
556 
557   std::pair<unsigned, const TargetRegisterClass *>
558   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
559                                StringRef Constraint, MVT VT) const override;
560 
561   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
562                                     std::vector<SDValue> &Ops,
563                                     SelectionDAG &DAG) const override;
564 
565   MachineBasicBlock *
566   EmitInstrWithCustomInserter(MachineInstr &MI,
567                               MachineBasicBlock *BB) const override;
568 
569   void AdjustInstrPostInstrSelection(MachineInstr &MI,
570                                      SDNode *Node) const override;
571 
572   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
573                          EVT VT) const override;
574 
575   bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
576                             bool MathUsed) const override {
577     if (VT == MVT::i8 || VT == MVT::i16)
578       return false;
579 
580     return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
581   }
582 
583   bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
584                                     unsigned AddrSpace) const override {
585     // If we can replace 4 or more scalar stores, there will be a reduction
586     // in instructions even after we add a vector constant load.
587     return NumElem >= 4;
588   }
589 
590   bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
591     return VT.isScalarInteger();
592   }
593   bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
594 
595   bool preferZeroCompareBranch() const override { return true; }
596 
597   bool shouldInsertFencesForAtomic(const Instruction *I) const override {
598     return isa<LoadInst>(I) || isa<StoreInst>(I);
599   }
600   Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
601                                 AtomicOrdering Ord) const override;
602   Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
603                                  AtomicOrdering Ord) const override;
604 
605   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
606                                   EVT VT) const override;
607 
608   ISD::NodeType getExtendForAtomicOps() const override {
609     return ISD::SIGN_EXTEND;
610   }
611 
612   ISD::NodeType getExtendForAtomicCmpSwapArg() const override {
613     return ISD::SIGN_EXTEND;
614   }
615 
616   bool shouldTransformSignedTruncationCheck(EVT XVT,
617                                             unsigned KeptBits) const override;
618 
619   TargetLowering::ShiftLegalizationStrategy
620   preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
621                                      unsigned ExpansionFactor) const override {
622     if (DAG.getMachineFunction().getFunction().hasMinSize())
623       return ShiftLegalizationStrategy::LowerToLibcall;
624     return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
625                                                               ExpansionFactor);
626   }
627 
628   bool isDesirableToCommuteWithShift(const SDNode *N,
629                                      CombineLevel Level) const override;
630 
631   /// If a physical register, this returns the register that receives the
632   /// exception address on entry to an EH pad.
633   Register
634   getExceptionPointerRegister(const Constant *PersonalityFn) const override;
635 
636   /// If a physical register, this returns the register that receives the
637   /// exception typeid on entry to a landing pad.
638   Register
639   getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
640 
641   bool shouldExtendTypeInLibCall(EVT Type) const override;
642   bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
643 
644   /// Returns the register with the specified architectural or ABI name. This
645   /// method is necessary to lower the llvm.read_register.* and
646   /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
647   /// with the clang -ffixed-xX flag for access to be allowed.
648   Register getRegisterByName(const char *RegName, LLT VT,
649                              const MachineFunction &MF) const override;
650 
651   // Lower incoming arguments, copy physregs into vregs
652   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
653                                bool IsVarArg,
654                                const SmallVectorImpl<ISD::InputArg> &Ins,
655                                const SDLoc &DL, SelectionDAG &DAG,
656                                SmallVectorImpl<SDValue> &InVals) const override;
657   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
658                       bool IsVarArg,
659                       const SmallVectorImpl<ISD::OutputArg> &Outs,
660                       LLVMContext &Context) const override;
661   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
662                       const SmallVectorImpl<ISD::OutputArg> &Outs,
663                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
664                       SelectionDAG &DAG) const override;
665   SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
666                     SmallVectorImpl<SDValue> &InVals) const override;
667 
668   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
669                                          Type *Ty) const override;
670   bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
671   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
672   bool shouldConsiderGEPOffsetSplit() const override { return true; }
673 
674   bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
675                               SDValue C) const override;
676 
677   bool isMulAddWithConstProfitable(SDValue AddNode,
678                                    SDValue ConstNode) const override;
679 
680   TargetLowering::AtomicExpansionKind
681   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
682   Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
683                                       Value *AlignedAddr, Value *Incr,
684                                       Value *Mask, Value *ShiftAmt,
685                                       AtomicOrdering Ord) const override;
686   TargetLowering::AtomicExpansionKind
687   shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
688   Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
689                                           AtomicCmpXchgInst *CI,
690                                           Value *AlignedAddr, Value *CmpVal,
691                                           Value *NewVal, Value *Mask,
692                                           AtomicOrdering Ord) const override;
693 
694   /// Returns true if the target allows unaligned memory accesses of the
695   /// specified type.
696   bool allowsMisalignedMemoryAccesses(
697       EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
698       MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
699       unsigned *Fast = nullptr) const override;
700 
701   bool splitValueIntoRegisterParts(
702       SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
703       unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
704       const override;
705 
706   SDValue joinRegisterPartsIntoValue(
707       SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
708       unsigned NumParts, MVT PartVT, EVT ValueVT,
709       std::optional<CallingConv::ID> CC) const override;
710 
711   // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
712   SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
713 
714   static RISCVII::VLMUL getLMUL(MVT VT);
715   inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
716                                       unsigned MinSize) {
717     // Original equation:
718     //   VLMAX = (VectorBits / EltSize) * LMUL
719     //   where LMUL = MinSize / RISCV::RVVBitsPerBlock
720     // The following equations have been reordered to prevent loss of precision
721     // when calculating fractional LMUL.
722     return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
723   };
724   static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
725   static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
726   static unsigned getRegClassIDForVecVT(MVT VT);
727   static std::pair<unsigned, unsigned>
728   decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
729                                            unsigned InsertExtractIdx,
730                                            const RISCVRegisterInfo *TRI);
731   MVT getContainerForFixedLengthVector(MVT VT) const;
732 
733   bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
734 
735   bool isLegalElementTypeForRVV(EVT ScalarTy) const;
736 
737   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
738 
739   unsigned getJumpTableEncoding() const override;
740 
741   const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
742                                           const MachineBasicBlock *MBB,
743                                           unsigned uid,
744                                           MCContext &Ctx) const override;
745 
746   bool isVScaleKnownToBeAPowerOfTwo() const override;
747 
748   bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
749                               ISD::MemIndexedMode &AM, bool &IsInc,
750                               SelectionDAG &DAG) const;
751   bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
752                                  ISD::MemIndexedMode &AM,
753                                  SelectionDAG &DAG) const override;
754   bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
755                                   SDValue &Offset, ISD::MemIndexedMode &AM,
756                                   SelectionDAG &DAG) const override;
757 
758   bool isLegalScaleForGatherScatter(uint64_t Scale,
759                                     uint64_t ElemSize) const override {
760     // Scaled addressing not supported on indexed load/stores
761     return Scale == 1;
762   }
763 
764   /// If the target has a standard location for the stack protector cookie,
765   /// returns the address of that location. Otherwise, returns nullptr.
766   Value *getIRStackGuard(IRBuilderBase &IRB) const override;
767 
768   /// Returns whether or not generating a interleaved load/store intrinsic for
769   /// this type will be legal.
770   bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
771                                     Align Alignment, unsigned AddrSpace,
772                                     const DataLayout &) const;
773 
774   /// Return true if a stride load store of the given result type and
775   /// alignment is legal.
776   bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
777 
778   unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
779 
780   bool lowerInterleavedLoad(LoadInst *LI,
781                             ArrayRef<ShuffleVectorInst *> Shuffles,
782                             ArrayRef<unsigned> Indices,
783                             unsigned Factor) const override;
784 
785   bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
786                              unsigned Factor) const override;
787 
788   bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
789                                         LoadInst *LI) const override;
790 
791   bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
792                                        StoreInst *SI) const override;
793 
794   bool supportKCFIBundles() const override { return true; }
795 
796   MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
797                               MachineBasicBlock::instr_iterator &MBBI,
798                               const TargetInstrInfo *TII) const override;
799 
800   /// RISCVCCAssignFn - This target-specific function extends the default
801   /// CCValAssign with additional information used to lower RISC-V calling
802   /// conventions.
803   typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI,
804                                unsigned ValNo, MVT ValVT, MVT LocVT,
805                                CCValAssign::LocInfo LocInfo,
806                                ISD::ArgFlagsTy ArgFlags, CCState &State,
807                                bool IsFixed, bool IsRet, Type *OrigTy,
808                                const RISCVTargetLowering &TLI,
809                                std::optional<unsigned> FirstMaskArgument);
810 
811 private:
812   void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
813                         const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
814                         RISCVCCAssignFn Fn) const;
815   void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
816                          const SmallVectorImpl<ISD::OutputArg> &Outs,
817                          bool IsRet, CallLoweringInfo *CLI,
818                          RISCVCCAssignFn Fn) const;
819 
820   template <class NodeTy>
821   SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
822                   bool IsExternWeak = false) const;
823   SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
824                            bool UseGOT) const;
825   SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
826 
827   SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
828   SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
829   SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
830   SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
831   SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
832   SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
833   SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
834   SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
835   SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
836   SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
837   SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
838   SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
839   SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
840   SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
841   SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
842                              int64_t ExtTrueVal) const;
843   SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
844   SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
845   SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
846   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
847   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
848   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
849   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
850   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
851   SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
852   SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
853   SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
854                                       bool IsVP) const;
855   SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
856   SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
857   SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
858   SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
859   SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
860   SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
861   SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
862   SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
863   SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
864   SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
865   SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
866   SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
867                                                SelectionDAG &DAG) const;
868   SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
869   SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
870   SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
871   SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
872   SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
873   SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
874                                             SelectionDAG &DAG) const;
875   SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
876   SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
877   SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc,
878                     bool HasMergeOp = false) const;
879   SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
880                          unsigned VecOpc) const;
881   SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
882   SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
883   SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
884                              unsigned RISCVISDOpc) const;
885   SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
886   SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
887   SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
888                                             unsigned ExtendOpc) const;
889   SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
890   SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
891 
892   SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
893   SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
894 
895   SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
896 
897   SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
898 
899   SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
900   SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
901 
902   bool isEligibleForTailCallOptimization(
903       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
904       const SmallVector<CCValAssign, 16> &ArgLocs) const;
905 
906   /// Generate error diagnostics if any register used by CC has been marked
907   /// reserved.
908   void validateCCReservedRegs(
909       const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
910       MachineFunction &MF) const;
911 
912   bool useRVVForFixedLengthVectorVT(MVT VT) const;
913 
914   MVT getVPExplicitVectorLengthTy() const override;
915 
916   bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
917                                    bool IsScalable) const override;
918 
919   /// RVV code generation for fixed length vectors does not lower all
920   /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
921   /// merge. However, merging them creates a BUILD_VECTOR that is just as
922   /// illegal as the original, thus leading to an infinite legalisation loop.
923   /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
924   /// this override can be removed.
925   bool mergeStoresAfterLegalization(EVT VT) const override;
926 
927   /// Disable normalizing
928   /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
929   /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
930   /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
931   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
932     return false;
933   };
934 
935   /// For available scheduling models FDIV + two independent FMULs are much
936   /// faster than two FDIVs.
937   unsigned combineRepeatedFPDivisors() const override;
938 };
939 
940 namespace RISCV {
941 
942 bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
943               MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
944               ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
945               bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
946               std::optional<unsigned> FirstMaskArgument);
947 
948 bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
949                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
950                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
951                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
952                      std::optional<unsigned> FirstMaskArgument);
953 
954 bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
955                   CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
956                   CCState &State);
957 } // end namespace RISCV
958 
959 namespace RISCVVIntrinsicsTable {
960 
961 struct RISCVVIntrinsicInfo {
962   unsigned IntrinsicID;
963   uint8_t ScalarOperand;
964   uint8_t VLOperand;
965   bool hasScalarOperand() const {
966     // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
967     return ScalarOperand != 0xF;
968   }
969   bool hasVLOperand() const {
970     // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
971     return VLOperand != 0x1F;
972   }
973 };
974 
975 using namespace RISCV;
976 
977 #define GET_RISCVVIntrinsicsTable_DECL
978 #include "RISCVGenSearchableTables.inc"
979 #undef GET_RISCVVIntrinsicsTable_DECL
980 
981 } // end namespace RISCVVIntrinsicsTable
982 
983 } // end namespace llvm
984 
985 #endif
986