xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 //===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation  -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISC-V uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/MemoryLocation.h"
24 #include "llvm/Analysis/VectorUtils.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/CodeGen/ValueTypes.h"
32 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/DiagnosticPrinter.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicsRISCV.h"
37 #include "llvm/IR/PatternMatch.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/KnownBits.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <optional>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "riscv-lower"
49 
50 STATISTIC(NumTailCalls, "Number of tail calls");
51 
52 static cl::opt<unsigned> ExtensionMaxWebSize(
53     DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
54     cl::desc("Give the maximum size (in number of nodes) of the web of "
55              "instructions that we will consider for VW expansion"),
56     cl::init(18));
57 
58 static cl::opt<bool>
59     AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
60                      cl::desc("Allow the formation of VW_W operations (e.g., "
61                               "VWADD_W) with splat constants"),
62                      cl::init(false));
63 
64 static cl::opt<unsigned> NumRepeatedDivisors(
65     DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
66     cl::desc("Set the minimum number of repetitions of a divisor to allow "
67              "transformation to multiplications by the reciprocal"),
68     cl::init(2));
69 
70 static cl::opt<int>
71     FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
72               cl::desc("Give the maximum number of instructions that we will "
73                        "use for creating a floating-point immediate value"),
74               cl::init(2));
75 
76 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
77                                          const RISCVSubtarget &STI)
78     : TargetLowering(TM), Subtarget(STI) {
79 
80   if (Subtarget.isRVE())
81     report_fatal_error("Codegen not yet implemented for RVE");
82 
83   RISCVABI::ABI ABI = Subtarget.getTargetABI();
84   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
85 
86   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
87       !Subtarget.hasStdExtF()) {
88     errs() << "Hard-float 'f' ABI can't be used for a target that "
89                 "doesn't support the F instruction set extension (ignoring "
90                           "target-abi)\n";
91     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
92   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
93              !Subtarget.hasStdExtD()) {
94     errs() << "Hard-float 'd' ABI can't be used for a target that "
95               "doesn't support the D instruction set extension (ignoring "
96               "target-abi)\n";
97     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
98   }
99 
100   switch (ABI) {
101   default:
102     report_fatal_error("Don't know how to lower this ABI");
103   case RISCVABI::ABI_ILP32:
104   case RISCVABI::ABI_ILP32F:
105   case RISCVABI::ABI_ILP32D:
106   case RISCVABI::ABI_LP64:
107   case RISCVABI::ABI_LP64F:
108   case RISCVABI::ABI_LP64D:
109     break;
110   }
111 
112   MVT XLenVT = Subtarget.getXLenVT();
113 
114   // Set up the register classes.
115   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
116 
117   if (Subtarget.hasStdExtZfhOrZfhmin())
118     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
119   if (Subtarget.hasStdExtZfbfmin())
120     addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
121   if (Subtarget.hasStdExtF())
122     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
123   if (Subtarget.hasStdExtD())
124     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
125   if (Subtarget.hasStdExtZhinxOrZhinxmin())
126     addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
127   if (Subtarget.hasStdExtZfinx())
128     addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
129   if (Subtarget.hasStdExtZdinx()) {
130     if (Subtarget.is64Bit())
131       addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
132     else
133       addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
134   }
135 
136   static const MVT::SimpleValueType BoolVecVTs[] = {
137       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
138       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
139   static const MVT::SimpleValueType IntVecVTs[] = {
140       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
141       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
142       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
143       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
144       MVT::nxv4i64, MVT::nxv8i64};
145   static const MVT::SimpleValueType F16VecVTs[] = {
146       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
147       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
148   static const MVT::SimpleValueType F32VecVTs[] = {
149       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
150   static const MVT::SimpleValueType F64VecVTs[] = {
151       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
152 
153   if (Subtarget.hasVInstructions()) {
154     auto addRegClassForRVV = [this](MVT VT) {
155       // Disable the smallest fractional LMUL types if ELEN is less than
156       // RVVBitsPerBlock.
157       unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
158       if (VT.getVectorMinNumElements() < MinElts)
159         return;
160 
161       unsigned Size = VT.getSizeInBits().getKnownMinValue();
162       const TargetRegisterClass *RC;
163       if (Size <= RISCV::RVVBitsPerBlock)
164         RC = &RISCV::VRRegClass;
165       else if (Size == 2 * RISCV::RVVBitsPerBlock)
166         RC = &RISCV::VRM2RegClass;
167       else if (Size == 4 * RISCV::RVVBitsPerBlock)
168         RC = &RISCV::VRM4RegClass;
169       else if (Size == 8 * RISCV::RVVBitsPerBlock)
170         RC = &RISCV::VRM8RegClass;
171       else
172         llvm_unreachable("Unexpected size");
173 
174       addRegisterClass(VT, RC);
175     };
176 
177     for (MVT VT : BoolVecVTs)
178       addRegClassForRVV(VT);
179     for (MVT VT : IntVecVTs) {
180       if (VT.getVectorElementType() == MVT::i64 &&
181           !Subtarget.hasVInstructionsI64())
182         continue;
183       addRegClassForRVV(VT);
184     }
185 
186     if (Subtarget.hasVInstructionsF16())
187       for (MVT VT : F16VecVTs)
188         addRegClassForRVV(VT);
189 
190     if (Subtarget.hasVInstructionsF32())
191       for (MVT VT : F32VecVTs)
192         addRegClassForRVV(VT);
193 
194     if (Subtarget.hasVInstructionsF64())
195       for (MVT VT : F64VecVTs)
196         addRegClassForRVV(VT);
197 
198     if (Subtarget.useRVVForFixedLengthVectors()) {
199       auto addRegClassForFixedVectors = [this](MVT VT) {
200         MVT ContainerVT = getContainerForFixedLengthVector(VT);
201         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
202         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
203         addRegisterClass(VT, TRI.getRegClass(RCID));
204       };
205       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
206         if (useRVVForFixedLengthVectorVT(VT))
207           addRegClassForFixedVectors(VT);
208 
209       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
210         if (useRVVForFixedLengthVectorVT(VT))
211           addRegClassForFixedVectors(VT);
212     }
213   }
214 
215   // Compute derived properties from the register classes.
216   computeRegisterProperties(STI.getRegisterInfo());
217 
218   setStackPointerRegisterToSaveRestore(RISCV::X2);
219 
220   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
221                    MVT::i1, Promote);
222   // DAGCombiner can call isLoadExtLegal for types that aren't legal.
223   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32,
224                    MVT::i1, Promote);
225 
226   // TODO: add all necessary setOperationAction calls.
227   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
228 
229   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
230   setOperationAction(ISD::BR_CC, XLenVT, Expand);
231   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
232   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
233 
234   setCondCodeAction(ISD::SETLE, XLenVT, Expand);
235   setCondCodeAction(ISD::SETGT, XLenVT, Custom);
236   setCondCodeAction(ISD::SETGE, XLenVT, Expand);
237   setCondCodeAction(ISD::SETULE, XLenVT, Expand);
238   setCondCodeAction(ISD::SETUGT, XLenVT, Custom);
239   setCondCodeAction(ISD::SETUGE, XLenVT, Expand);
240 
241   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
242 
243   setOperationAction(ISD::VASTART, MVT::Other, Custom);
244   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
245 
246   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
247 
248   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
249 
250   if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
251     setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
252 
253   if (Subtarget.is64Bit()) {
254     setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
255 
256     setOperationAction(ISD::LOAD, MVT::i32, Custom);
257 
258     setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
259                        MVT::i32, Custom);
260 
261     setOperationAction(ISD::SADDO, MVT::i32, Custom);
262     setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
263                        MVT::i32, Custom);
264   } else {
265     setLibcallName(
266         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
267         nullptr);
268     setLibcallName(RTLIB::MULO_I64, nullptr);
269   }
270 
271   if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
272     setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
273   else if (Subtarget.is64Bit())
274     setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
275   else
276     setOperationAction(ISD::MUL, MVT::i64, Custom);
277 
278   if (!Subtarget.hasStdExtM())
279     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
280                        XLenVT, Expand);
281   else if (Subtarget.is64Bit())
282     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
283                        {MVT::i8, MVT::i16, MVT::i32}, Custom);
284 
285   setOperationAction(
286       {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
287       Expand);
288 
289   setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
290                      Custom);
291 
292   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
293     if (Subtarget.is64Bit())
294       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
295   } else if (Subtarget.hasVendorXTHeadBb()) {
296     if (Subtarget.is64Bit())
297       setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
298     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
299   } else {
300     setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
301   }
302 
303   // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
304   // pattern match it directly in isel.
305   setOperationAction(ISD::BSWAP, XLenVT,
306                      (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
307                       Subtarget.hasVendorXTHeadBb())
308                          ? Legal
309                          : Expand);
310   // Zbkb can use rev8+brev8 to implement bitreverse.
311   setOperationAction(ISD::BITREVERSE, XLenVT,
312                      Subtarget.hasStdExtZbkb() ? Custom : Expand);
313 
314   if (Subtarget.hasStdExtZbb()) {
315     setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
316                        Legal);
317 
318     if (Subtarget.is64Bit())
319       setOperationAction(
320           {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
321           MVT::i32, Custom);
322   } else {
323     setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
324   }
325 
326   if (Subtarget.hasVendorXTHeadBb()) {
327     setOperationAction(ISD::CTLZ, XLenVT, Legal);
328 
329     // We need the custom lowering to make sure that the resulting sequence
330     // for the 32bit case is efficient on 64bit targets.
331     if (Subtarget.is64Bit())
332       setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
333   }
334 
335   if (Subtarget.is64Bit())
336     setOperationAction(ISD::ABS, MVT::i32, Custom);
337 
338   if (!Subtarget.hasVendorXTHeadCondMov())
339     setOperationAction(ISD::SELECT, XLenVT, Custom);
340 
341   static const unsigned FPLegalNodeTypes[] = {
342       ISD::FMINNUM,        ISD::FMAXNUM,       ISD::LRINT,
343       ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
344       ISD::STRICT_LRINT,   ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
345       ISD::STRICT_LLROUND, ISD::STRICT_FMA,    ISD::STRICT_FADD,
346       ISD::STRICT_FSUB,    ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
347       ISD::STRICT_FSQRT,   ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
348 
349   static const ISD::CondCode FPCCToExpand[] = {
350       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
351       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
352       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
353 
354   static const unsigned FPOpToExpand[] = {
355       ISD::FSIN, ISD::FCOS,       ISD::FSINCOS,   ISD::FPOW,
356       ISD::FREM};
357 
358   static const unsigned FPRndMode[] = {
359       ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
360       ISD::FROUNDEVEN};
361 
362   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
363     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
364 
365   if (Subtarget.hasStdExtZfbfmin()) {
366     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
367     setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
368     setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
369     setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
370     setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
371     setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
372   }
373 
374   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
375     if (Subtarget.hasStdExtZfhOrZhinx()) {
376       setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
377       setOperationAction(FPRndMode, MVT::f16,
378                          Subtarget.hasStdExtZfa() ? Legal : Custom);
379       setOperationAction(ISD::SELECT, MVT::f16, Custom);
380       setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
381     } else {
382       static const unsigned ZfhminPromoteOps[] = {
383           ISD::FMINNUM,      ISD::FMAXNUM,       ISD::FADD,
384           ISD::FSUB,         ISD::FMUL,          ISD::FMA,
385           ISD::FDIV,         ISD::FSQRT,         ISD::FABS,
386           ISD::FNEG,         ISD::STRICT_FMA,    ISD::STRICT_FADD,
387           ISD::STRICT_FSUB,  ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
388           ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
389           ISD::SETCC,        ISD::FCEIL,         ISD::FFLOOR,
390           ISD::FTRUNC,       ISD::FRINT,         ISD::FROUND,
391           ISD::FROUNDEVEN,   ISD::SELECT};
392 
393       setOperationAction(ZfhminPromoteOps, MVT::f16, Promote);
394       setOperationAction({ISD::STRICT_LRINT, ISD::STRICT_LLRINT,
395                           ISD::STRICT_LROUND, ISD::STRICT_LLROUND},
396                          MVT::f16, Legal);
397       // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
398       // DAGCombiner::visitFP_ROUND probably needs improvements first.
399       setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
400     }
401 
402     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
403     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
404     setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
405     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
406     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
407 
408     setOperationAction(ISD::FNEARBYINT, MVT::f16,
409                        Subtarget.hasStdExtZfa() ? Legal : Promote);
410     setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
411                         ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
412                         ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
413                        MVT::f16, Promote);
414 
415     // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
416     // complete support for all operations in LegalizeDAG.
417     setOperationAction({ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
418                         ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
419                         ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN,
420                         ISD::STRICT_FTRUNC},
421                        MVT::f16, Promote);
422 
423     // We need to custom promote this.
424     if (Subtarget.is64Bit())
425       setOperationAction(ISD::FPOWI, MVT::i32, Custom);
426 
427     if (!Subtarget.hasStdExtZfa())
428       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
429   }
430 
431   if (Subtarget.hasStdExtFOrZfinx()) {
432     setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
433     setOperationAction(FPRndMode, MVT::f32,
434                        Subtarget.hasStdExtZfa() ? Legal : Custom);
435     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
436     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
437     setOperationAction(ISD::SELECT, MVT::f32, Custom);
438     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
439     setOperationAction(FPOpToExpand, MVT::f32, Expand);
440     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
441     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
442     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
443     setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
444     setOperationAction(ISD::FP_TO_BF16, MVT::f32,
445                        Subtarget.isSoftFPABI() ? LibCall : Custom);
446     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
447     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
448 
449     if (Subtarget.hasStdExtZfa())
450       setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
451     else
452       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
453   }
454 
455   if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
456     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
457 
458   if (Subtarget.hasStdExtDOrZdinx()) {
459     setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
460 
461     if (Subtarget.hasStdExtZfa()) {
462       setOperationAction(FPRndMode, MVT::f64, Legal);
463       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
464       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
465       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
466     } else {
467       if (Subtarget.is64Bit())
468         setOperationAction(FPRndMode, MVT::f64, Custom);
469 
470       setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
471     }
472 
473     setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
474     setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
475     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
476     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
477     setOperationAction(ISD::SELECT, MVT::f64, Custom);
478     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
479     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
480     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
481     setOperationAction(FPOpToExpand, MVT::f64, Expand);
482     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
483     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
484     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
485     setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
486     setOperationAction(ISD::FP_TO_BF16, MVT::f64,
487                        Subtarget.isSoftFPABI() ? LibCall : Custom);
488     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
489     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
490   }
491 
492   if (Subtarget.is64Bit()) {
493     setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
494                         ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
495                        MVT::i32, Custom);
496     setOperationAction(ISD::LROUND, MVT::i32, Custom);
497   }
498 
499   if (Subtarget.hasStdExtFOrZfinx()) {
500     setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
501                        Custom);
502 
503     setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
504                         ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
505                        XLenVT, Legal);
506 
507     setOperationAction(ISD::GET_ROUNDING, XLenVT, Custom);
508     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
509   }
510 
511   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
512                       ISD::JumpTable},
513                      XLenVT, Custom);
514 
515   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
516 
517   if (Subtarget.is64Bit())
518     setOperationAction(ISD::Constant, MVT::i64, Custom);
519 
520   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
521   // Unfortunately this can't be determined just from the ISA naming string.
522   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
523                      Subtarget.is64Bit() ? Legal : Custom);
524 
525   setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
526   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
527   if (Subtarget.is64Bit())
528     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
529 
530   if (Subtarget.hasStdExtZicbop()) {
531     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
532   }
533 
534   if (Subtarget.hasStdExtA()) {
535     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
536     setMinCmpXchgSizeInBits(32);
537   } else if (Subtarget.hasForcedAtomics()) {
538     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
539   } else {
540     setMaxAtomicSizeInBitsSupported(0);
541   }
542 
543   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
544 
545   setBooleanContents(ZeroOrOneBooleanContent);
546 
547   if (Subtarget.hasVInstructions()) {
548     setBooleanVectorContents(ZeroOrOneBooleanContent);
549 
550     setOperationAction(ISD::VSCALE, XLenVT, Custom);
551 
552     // RVV intrinsics may have illegal operands.
553     // We also need to custom legalize vmv.x.s.
554     setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
555                         ISD::INTRINSIC_VOID},
556                        {MVT::i8, MVT::i16}, Custom);
557     if (Subtarget.is64Bit())
558       setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
559                          MVT::i32, Custom);
560     else
561       setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
562                          MVT::i64, Custom);
563 
564     setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
565                        MVT::Other, Custom);
566 
567     static const unsigned IntegerVPOps[] = {
568         ISD::VP_ADD,         ISD::VP_SUB,         ISD::VP_MUL,
569         ISD::VP_SDIV,        ISD::VP_UDIV,        ISD::VP_SREM,
570         ISD::VP_UREM,        ISD::VP_AND,         ISD::VP_OR,
571         ISD::VP_XOR,         ISD::VP_ASHR,        ISD::VP_LSHR,
572         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
573         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
574         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
575         ISD::VP_MERGE,       ISD::VP_SELECT,      ISD::VP_FP_TO_SINT,
576         ISD::VP_FP_TO_UINT,  ISD::VP_SETCC,       ISD::VP_SIGN_EXTEND,
577         ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE,    ISD::VP_SMIN,
578         ISD::VP_SMAX,        ISD::VP_UMIN,        ISD::VP_UMAX,
579         ISD::VP_ABS};
580 
581     static const unsigned FloatingPointVPOps[] = {
582         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
583         ISD::VP_FDIV,        ISD::VP_FNEG,        ISD::VP_FABS,
584         ISD::VP_FMA,         ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
585         ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
586         ISD::VP_SELECT,      ISD::VP_SINT_TO_FP,  ISD::VP_UINT_TO_FP,
587         ISD::VP_SETCC,       ISD::VP_FP_ROUND,    ISD::VP_FP_EXTEND,
588         ISD::VP_SQRT,        ISD::VP_FMINNUM,     ISD::VP_FMAXNUM,
589         ISD::VP_FCEIL,       ISD::VP_FFLOOR,      ISD::VP_FROUND,
590         ISD::VP_FROUNDEVEN,  ISD::VP_FCOPYSIGN,   ISD::VP_FROUNDTOZERO,
591         ISD::VP_FRINT,       ISD::VP_FNEARBYINT};
592 
593     static const unsigned IntegerVecReduceOps[] = {
594         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_AND,  ISD::VECREDUCE_OR,
595         ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
596         ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
597 
598     static const unsigned FloatingPointVecReduceOps[] = {
599         ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
600         ISD::VECREDUCE_FMAX};
601 
602     if (!Subtarget.is64Bit()) {
603       // We must custom-lower certain vXi64 operations on RV32 due to the vector
604       // element type being illegal.
605       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
606                          MVT::i64, Custom);
607 
608       setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
609 
610       setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
611                           ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
612                           ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
613                           ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
614                          MVT::i64, Custom);
615     }
616 
617     for (MVT VT : BoolVecVTs) {
618       if (!isTypeLegal(VT))
619         continue;
620 
621       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
622 
623       // Mask VTs are custom-expanded into a series of standard nodes
624       setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
625                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
626                           ISD::SCALAR_TO_VECTOR},
627                          VT, Custom);
628 
629       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
630                          Custom);
631 
632       setOperationAction(ISD::SELECT, VT, Custom);
633       setOperationAction(
634           {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
635           Expand);
636 
637       setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
638 
639       setOperationAction(
640           {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
641           Custom);
642 
643       setOperationAction(
644           {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
645           Custom);
646 
647       // RVV has native int->float & float->int conversions where the
648       // element type sizes are within one power-of-two of each other. Any
649       // wider distances between type sizes have to be lowered as sequences
650       // which progressively narrow the gap in stages.
651       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
652                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
653                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
654                           ISD::STRICT_FP_TO_UINT},
655                          VT, Custom);
656       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
657                          Custom);
658 
659       // Expand all extending loads to types larger than this, and truncating
660       // stores from types larger than this.
661       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
662         setTruncStoreAction(OtherVT, VT, Expand);
663         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
664                          VT, Expand);
665       }
666 
667       setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
668                           ISD::VP_TRUNCATE, ISD::VP_SETCC},
669                          VT, Custom);
670 
671       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
672       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
673 
674       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
675 
676       setOperationPromotedToType(
677           ISD::VECTOR_SPLICE, VT,
678           MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
679     }
680 
681     for (MVT VT : IntVecVTs) {
682       if (!isTypeLegal(VT))
683         continue;
684 
685       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
686       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
687 
688       // Vectors implement MULHS/MULHU.
689       setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
690 
691       // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
692       if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
693         setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
694 
695       setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
696                          Legal);
697 
698       setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
699 
700       // Custom-lower extensions and truncations from/to mask types.
701       setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
702                          VT, Custom);
703 
704       // RVV has native int->float & float->int conversions where the
705       // element type sizes are within one power-of-two of each other. Any
706       // wider distances between type sizes have to be lowered as sequences
707       // which progressively narrow the gap in stages.
708       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
709                           ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
710                           ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
711                           ISD::STRICT_FP_TO_UINT},
712                          VT, Custom);
713       setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
714                          Custom);
715 
716       setOperationAction(
717           {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
718 
719       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
720       // nodes which truncate by one power of two at a time.
721       setOperationAction(ISD::TRUNCATE, VT, Custom);
722 
723       // Custom-lower insert/extract operations to simplify patterns.
724       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
725                          Custom);
726 
727       // Custom-lower reduction operations to set up the corresponding custom
728       // nodes' operands.
729       setOperationAction(IntegerVecReduceOps, VT, Custom);
730 
731       setOperationAction(IntegerVPOps, VT, Custom);
732 
733       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
734 
735       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
736                          VT, Custom);
737 
738       setOperationAction(
739           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
740            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
741           VT, Custom);
742 
743       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
744                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
745                          VT, Custom);
746 
747       setOperationAction(ISD::SELECT, VT, Custom);
748       setOperationAction(ISD::SELECT_CC, VT, Expand);
749 
750       setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
751 
752       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
753         setTruncStoreAction(VT, OtherVT, Expand);
754         setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
755                          VT, Expand);
756       }
757 
758       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
759       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
760 
761       // Splice
762       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
763 
764       if (Subtarget.hasStdExtZvbb()) {
765         setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
766         setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
767         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
768                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
769                            VT, Custom);
770       } else {
771         setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
772         setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
773         setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
774         setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
775                             ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
776                            VT, Expand);
777 
778         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
779         // range of f32.
780         EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
781         if (isTypeLegal(FloatVT)) {
782           setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
783                               ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
784                               ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
785                              VT, Custom);
786         }
787 
788         setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
789       }
790     }
791 
792     // Expand various CCs to best match the RVV ISA, which natively supports UNE
793     // but no other unordered comparisons, and supports all ordered comparisons
794     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
795     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
796     // and we pattern-match those back to the "original", swapping operands once
797     // more. This way we catch both operations and both "vf" and "fv" forms with
798     // fewer patterns.
799     static const ISD::CondCode VFPCCToExpand[] = {
800         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
801         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
802         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
803     };
804 
805     // Sets common operation actions on RVV floating-point vector types.
806     const auto SetCommonVFPActions = [&](MVT VT) {
807       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
808       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
809       // sizes are within one power-of-two of each other. Therefore conversions
810       // between vXf16 and vXf64 must be lowered as sequences which convert via
811       // vXf32.
812       setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
813       // Custom-lower insert/extract operations to simplify patterns.
814       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
815                          Custom);
816       // Expand various condition codes (explained above).
817       setCondCodeAction(VFPCCToExpand, VT, Expand);
818 
819       setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
820 
821       setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
822                           ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
823                           ISD::IS_FPCLASS},
824                          VT, Custom);
825 
826       setOperationAction(FloatingPointVecReduceOps, VT, Custom);
827 
828       // Expand FP operations that need libcalls.
829       setOperationAction(ISD::FREM, VT, Expand);
830       setOperationAction(ISD::FPOW, VT, Expand);
831       setOperationAction(ISD::FCOS, VT, Expand);
832       setOperationAction(ISD::FSIN, VT, Expand);
833       setOperationAction(ISD::FSINCOS, VT, Expand);
834       setOperationAction(ISD::FEXP, VT, Expand);
835       setOperationAction(ISD::FEXP2, VT, Expand);
836       setOperationAction(ISD::FLOG, VT, Expand);
837       setOperationAction(ISD::FLOG2, VT, Expand);
838       setOperationAction(ISD::FLOG10, VT, Expand);
839 
840       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
841 
842       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
843 
844       setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
845                          VT, Custom);
846 
847       setOperationAction(
848           {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
849            ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
850           VT, Custom);
851 
852       setOperationAction(ISD::SELECT, VT, Custom);
853       setOperationAction(ISD::SELECT_CC, VT, Expand);
854 
855       setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
856                           ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
857                          VT, Custom);
858 
859       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
860       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
861 
862       setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
863 
864       setOperationAction(FloatingPointVPOps, VT, Custom);
865 
866       setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
867                          Custom);
868       setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
869                           ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
870                          VT, Legal);
871       setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
872                           ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
873                           ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
874                           ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
875                          VT, Custom);
876     };
877 
878     // Sets common extload/truncstore actions on RVV floating-point vector
879     // types.
880     const auto SetCommonVFPExtLoadTruncStoreActions =
881         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
882           for (auto SmallVT : SmallerVTs) {
883             setTruncStoreAction(VT, SmallVT, Expand);
884             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
885           }
886         };
887 
888     if (Subtarget.hasVInstructionsF16()) {
889       for (MVT VT : F16VecVTs) {
890         if (!isTypeLegal(VT))
891           continue;
892         SetCommonVFPActions(VT);
893       }
894     }
895 
896     if (Subtarget.hasVInstructionsF32()) {
897       for (MVT VT : F32VecVTs) {
898         if (!isTypeLegal(VT))
899           continue;
900         SetCommonVFPActions(VT);
901         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
902       }
903     }
904 
905     if (Subtarget.hasVInstructionsF64()) {
906       for (MVT VT : F64VecVTs) {
907         if (!isTypeLegal(VT))
908           continue;
909         SetCommonVFPActions(VT);
910         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
911         SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
912       }
913     }
914 
915     if (Subtarget.useRVVForFixedLengthVectors()) {
916       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
917         if (!useRVVForFixedLengthVectorVT(VT))
918           continue;
919 
920         // By default everything must be expanded.
921         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
922           setOperationAction(Op, VT, Expand);
923         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
924           setTruncStoreAction(VT, OtherVT, Expand);
925           setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
926                            OtherVT, VT, Expand);
927         }
928 
929         // Custom lower fixed vector undefs to scalable vector undefs to avoid
930         // expansion to a build_vector of 0s.
931         setOperationAction(ISD::UNDEF, VT, Custom);
932 
933         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
934         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
935                            Custom);
936 
937         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
938                            Custom);
939 
940         setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
941                            VT, Custom);
942 
943         setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
944 
945         setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
946 
947         setOperationAction(ISD::SETCC, VT, Custom);
948 
949         setOperationAction(ISD::SELECT, VT, Custom);
950 
951         setOperationAction(ISD::TRUNCATE, VT, Custom);
952 
953         setOperationAction(ISD::BITCAST, VT, Custom);
954 
955         setOperationAction(
956             {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
957             Custom);
958 
959         setOperationAction(
960             {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
961             Custom);
962 
963         setOperationAction(
964             {
965                 ISD::SINT_TO_FP,
966                 ISD::UINT_TO_FP,
967                 ISD::FP_TO_SINT,
968                 ISD::FP_TO_UINT,
969                 ISD::STRICT_SINT_TO_FP,
970                 ISD::STRICT_UINT_TO_FP,
971                 ISD::STRICT_FP_TO_SINT,
972                 ISD::STRICT_FP_TO_UINT,
973             },
974             VT, Custom);
975         setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
976                            Custom);
977 
978         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
979 
980         // Operations below are different for between masks and other vectors.
981         if (VT.getVectorElementType() == MVT::i1) {
982           setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
983                               ISD::OR, ISD::XOR},
984                              VT, Custom);
985 
986           setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
987                               ISD::VP_SETCC, ISD::VP_TRUNCATE},
988                              VT, Custom);
989           continue;
990         }
991 
992         // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
993         // it before type legalization for i64 vectors on RV32. It will then be
994         // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
995         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
996         // improvements first.
997         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
998           setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
999           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
1000         }
1001 
1002         setOperationAction(
1003             {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1004 
1005         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1006                             ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1007                             ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1008                             ISD::VP_SCATTER},
1009                            VT, Custom);
1010 
1011         setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
1012                             ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
1013                             ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
1014                            VT, Custom);
1015 
1016         setOperationAction(
1017             {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
1018 
1019         // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1020         if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1021           setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
1022 
1023         setOperationAction(
1024             {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
1025             Custom);
1026 
1027         setOperationAction(ISD::VSELECT, VT, Custom);
1028         setOperationAction(ISD::SELECT_CC, VT, Expand);
1029 
1030         setOperationAction(
1031             {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
1032 
1033         // Custom-lower reduction operations to set up the corresponding custom
1034         // nodes' operands.
1035         setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1036                             ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1037                             ISD::VECREDUCE_UMIN},
1038                            VT, Custom);
1039 
1040         setOperationAction(IntegerVPOps, VT, Custom);
1041 
1042         // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1043         // range of f32.
1044         EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1045         if (isTypeLegal(FloatVT))
1046           setOperationAction(
1047               {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
1048               Custom);
1049       }
1050 
1051       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
1052         // There are no extending loads or truncating stores.
1053         for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1054           setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1055           setTruncStoreAction(VT, InnerVT, Expand);
1056         }
1057 
1058         if (!useRVVForFixedLengthVectorVT(VT))
1059           continue;
1060 
1061         // By default everything must be expanded.
1062         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1063           setOperationAction(Op, VT, Expand);
1064 
1065         // Custom lower fixed vector undefs to scalable vector undefs to avoid
1066         // expansion to a build_vector of 0s.
1067         setOperationAction(ISD::UNDEF, VT, Custom);
1068 
1069         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1070         setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
1071                            Custom);
1072 
1073         setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
1074                             ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
1075                             ISD::EXTRACT_VECTOR_ELT},
1076                            VT, Custom);
1077 
1078         setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1079                             ISD::MGATHER, ISD::MSCATTER},
1080                            VT, Custom);
1081 
1082         setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1083                             ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1084                             ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1085                             ISD::VP_SCATTER},
1086                            VT, Custom);
1087 
1088         setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
1089                             ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1090                             ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1091                             ISD::IS_FPCLASS},
1092                            VT, Custom);
1093 
1094         setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1095 
1096         setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1097                             ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
1098                            VT, Custom);
1099 
1100         setCondCodeAction(VFPCCToExpand, VT, Expand);
1101 
1102         setOperationAction(ISD::SETCC, VT, Custom);
1103         setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
1104         setOperationAction(ISD::SELECT_CC, VT, Expand);
1105 
1106         setOperationAction(ISD::BITCAST, VT, Custom);
1107 
1108         setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1109 
1110         setOperationAction(FloatingPointVPOps, VT, Custom);
1111 
1112         setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
1113                            Custom);
1114         setOperationAction(
1115             {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
1116              ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
1117              ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
1118              ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
1119              ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
1120             VT, Custom);
1121       }
1122 
1123       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1124       setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1125                          Custom);
1126       if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1127         setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1128       if (Subtarget.hasStdExtFOrZfinx())
1129         setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1130       if (Subtarget.hasStdExtDOrZdinx())
1131         setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1132     }
1133   }
1134 
1135   if (Subtarget.hasForcedAtomics()) {
1136     // Set atomic rmw/cas operations to expand to force __sync libcalls.
1137     setOperationAction(
1138         {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1139          ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1140          ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1141          ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1142         XLenVT, Expand);
1143   }
1144 
1145   if (Subtarget.hasVendorXTHeadMemIdx()) {
1146     for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1147          ++im) {
1148       setIndexedLoadAction(im, MVT::i8, Legal);
1149       setIndexedStoreAction(im, MVT::i8, Legal);
1150       setIndexedLoadAction(im, MVT::i16, Legal);
1151       setIndexedStoreAction(im, MVT::i16, Legal);
1152       setIndexedLoadAction(im, MVT::i32, Legal);
1153       setIndexedStoreAction(im, MVT::i32, Legal);
1154 
1155       if (Subtarget.is64Bit()) {
1156         setIndexedLoadAction(im, MVT::i64, Legal);
1157         setIndexedStoreAction(im, MVT::i64, Legal);
1158       }
1159     }
1160   }
1161 
1162   // Function alignments.
1163   const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1164   setMinFunctionAlignment(FunctionAlignment);
1165   // Set preferred alignments.
1166   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1167   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1168 
1169   setMinimumJumpTableEntries(5);
1170 
1171   // Jumps are expensive, compared to logic
1172   setJumpIsExpensive();
1173 
1174   setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
1175                        ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
1176                        ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
1177   if (Subtarget.is64Bit())
1178     setTargetDAGCombine(ISD::SRA);
1179 
1180   if (Subtarget.hasStdExtFOrZfinx())
1181     setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
1182 
1183   if (Subtarget.hasStdExtZbb())
1184     setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
1185 
1186   if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1187     setTargetDAGCombine(ISD::TRUNCATE);
1188 
1189   if (Subtarget.hasStdExtZbkb())
1190     setTargetDAGCombine(ISD::BITREVERSE);
1191   if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1192     setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1193   if (Subtarget.hasStdExtFOrZfinx())
1194     setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
1195                          ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
1196   if (Subtarget.hasVInstructions())
1197     setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1198                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1199                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
1200                          ISD::CONCAT_VECTORS});
1201   if (Subtarget.hasVendorXTHeadMemPair())
1202     setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1203   if (Subtarget.useRVVForFixedLengthVectors())
1204     setTargetDAGCombine(ISD::BITCAST);
1205 
1206   setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1207   setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1208 
1209   // Disable strict node mutation.
1210   IsStrictFPEnabled = true;
1211 }
1212 
1213 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
1214                                             LLVMContext &Context,
1215                                             EVT VT) const {
1216   if (!VT.isVector())
1217     return getPointerTy(DL);
1218   if (Subtarget.hasVInstructions() &&
1219       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1220     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1221   return VT.changeVectorElementTypeToInteger();
1222 }
1223 
1224 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1225   return Subtarget.getXLenVT();
1226 }
1227 
1228 // Return false if we can lower get_vector_length to a vsetvli intrinsic.
1229 bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1230                                                       unsigned VF,
1231                                                       bool IsScalable) const {
1232   if (!Subtarget.hasVInstructions())
1233     return true;
1234 
1235   if (!IsScalable)
1236     return true;
1237 
1238   if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1239     return true;
1240 
1241   // Don't allow VF=1 if those types are't legal.
1242   if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
1243     return true;
1244 
1245   // VLEN=32 support is incomplete.
1246   if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1247     return true;
1248 
1249   // The maximum VF is for the smallest element width with LMUL=8.
1250   // VF must be a power of 2.
1251   unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1252   return VF > MaxVF || !isPowerOf2_32(VF);
1253 }
1254 
1255 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1256                                              const CallInst &I,
1257                                              MachineFunction &MF,
1258                                              unsigned Intrinsic) const {
1259   auto &DL = I.getModule()->getDataLayout();
1260 
1261   auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1262                                  bool IsUnitStrided) {
1263     Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1264     Info.ptrVal = I.getArgOperand(PtrOp);
1265     Type *MemTy;
1266     if (IsStore) {
1267       // Store value is the first operand.
1268       MemTy = I.getArgOperand(0)->getType();
1269     } else {
1270       // Use return type. If it's segment load, return type is a struct.
1271       MemTy = I.getType();
1272       if (MemTy->isStructTy())
1273         MemTy = MemTy->getStructElementType(0);
1274     }
1275     if (!IsUnitStrided)
1276       MemTy = MemTy->getScalarType();
1277 
1278     Info.memVT = getValueType(DL, MemTy);
1279     Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1280     Info.size = MemoryLocation::UnknownSize;
1281     Info.flags |=
1282         IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
1283     return true;
1284   };
1285 
1286   if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1287     Info.flags |= MachineMemOperand::MONonTemporal;
1288 
1289   Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
1290   switch (Intrinsic) {
1291   default:
1292     return false;
1293   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1294   case Intrinsic::riscv_masked_atomicrmw_add_i32:
1295   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1296   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1297   case Intrinsic::riscv_masked_atomicrmw_max_i32:
1298   case Intrinsic::riscv_masked_atomicrmw_min_i32:
1299   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1300   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1301   case Intrinsic::riscv_masked_cmpxchg_i32:
1302     Info.opc = ISD::INTRINSIC_W_CHAIN;
1303     Info.memVT = MVT::i32;
1304     Info.ptrVal = I.getArgOperand(0);
1305     Info.offset = 0;
1306     Info.align = Align(4);
1307     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
1308                  MachineMemOperand::MOVolatile;
1309     return true;
1310   case Intrinsic::riscv_masked_strided_load:
1311     return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1312                                /*IsUnitStrided*/ false);
1313   case Intrinsic::riscv_masked_strided_store:
1314     return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1315                                /*IsUnitStrided*/ false);
1316   case Intrinsic::riscv_seg2_load:
1317   case Intrinsic::riscv_seg3_load:
1318   case Intrinsic::riscv_seg4_load:
1319   case Intrinsic::riscv_seg5_load:
1320   case Intrinsic::riscv_seg6_load:
1321   case Intrinsic::riscv_seg7_load:
1322   case Intrinsic::riscv_seg8_load:
1323     return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1324                                /*IsUnitStrided*/ false);
1325   case Intrinsic::riscv_seg2_store:
1326   case Intrinsic::riscv_seg3_store:
1327   case Intrinsic::riscv_seg4_store:
1328   case Intrinsic::riscv_seg5_store:
1329   case Intrinsic::riscv_seg6_store:
1330   case Intrinsic::riscv_seg7_store:
1331   case Intrinsic::riscv_seg8_store:
1332     // Operands are (vec, ..., vec, ptr, vl)
1333     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1334                                /*IsStore*/ true,
1335                                /*IsUnitStrided*/ false);
1336   case Intrinsic::riscv_vle:
1337   case Intrinsic::riscv_vle_mask:
1338   case Intrinsic::riscv_vleff:
1339   case Intrinsic::riscv_vleff_mask:
1340     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1341                                /*IsStore*/ false,
1342                                /*IsUnitStrided*/ true);
1343   case Intrinsic::riscv_vse:
1344   case Intrinsic::riscv_vse_mask:
1345     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1346                                /*IsStore*/ true,
1347                                /*IsUnitStrided*/ true);
1348   case Intrinsic::riscv_vlse:
1349   case Intrinsic::riscv_vlse_mask:
1350   case Intrinsic::riscv_vloxei:
1351   case Intrinsic::riscv_vloxei_mask:
1352   case Intrinsic::riscv_vluxei:
1353   case Intrinsic::riscv_vluxei_mask:
1354     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1355                                /*IsStore*/ false,
1356                                /*IsUnitStrided*/ false);
1357   case Intrinsic::riscv_vsse:
1358   case Intrinsic::riscv_vsse_mask:
1359   case Intrinsic::riscv_vsoxei:
1360   case Intrinsic::riscv_vsoxei_mask:
1361   case Intrinsic::riscv_vsuxei:
1362   case Intrinsic::riscv_vsuxei_mask:
1363     return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1364                                /*IsStore*/ true,
1365                                /*IsUnitStrided*/ false);
1366   case Intrinsic::riscv_vlseg2:
1367   case Intrinsic::riscv_vlseg3:
1368   case Intrinsic::riscv_vlseg4:
1369   case Intrinsic::riscv_vlseg5:
1370   case Intrinsic::riscv_vlseg6:
1371   case Intrinsic::riscv_vlseg7:
1372   case Intrinsic::riscv_vlseg8:
1373   case Intrinsic::riscv_vlseg2ff:
1374   case Intrinsic::riscv_vlseg3ff:
1375   case Intrinsic::riscv_vlseg4ff:
1376   case Intrinsic::riscv_vlseg5ff:
1377   case Intrinsic::riscv_vlseg6ff:
1378   case Intrinsic::riscv_vlseg7ff:
1379   case Intrinsic::riscv_vlseg8ff:
1380     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1381                                /*IsStore*/ false,
1382                                /*IsUnitStrided*/ false);
1383   case Intrinsic::riscv_vlseg2_mask:
1384   case Intrinsic::riscv_vlseg3_mask:
1385   case Intrinsic::riscv_vlseg4_mask:
1386   case Intrinsic::riscv_vlseg5_mask:
1387   case Intrinsic::riscv_vlseg6_mask:
1388   case Intrinsic::riscv_vlseg7_mask:
1389   case Intrinsic::riscv_vlseg8_mask:
1390   case Intrinsic::riscv_vlseg2ff_mask:
1391   case Intrinsic::riscv_vlseg3ff_mask:
1392   case Intrinsic::riscv_vlseg4ff_mask:
1393   case Intrinsic::riscv_vlseg5ff_mask:
1394   case Intrinsic::riscv_vlseg6ff_mask:
1395   case Intrinsic::riscv_vlseg7ff_mask:
1396   case Intrinsic::riscv_vlseg8ff_mask:
1397     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1398                                /*IsStore*/ false,
1399                                /*IsUnitStrided*/ false);
1400   case Intrinsic::riscv_vlsseg2:
1401   case Intrinsic::riscv_vlsseg3:
1402   case Intrinsic::riscv_vlsseg4:
1403   case Intrinsic::riscv_vlsseg5:
1404   case Intrinsic::riscv_vlsseg6:
1405   case Intrinsic::riscv_vlsseg7:
1406   case Intrinsic::riscv_vlsseg8:
1407   case Intrinsic::riscv_vloxseg2:
1408   case Intrinsic::riscv_vloxseg3:
1409   case Intrinsic::riscv_vloxseg4:
1410   case Intrinsic::riscv_vloxseg5:
1411   case Intrinsic::riscv_vloxseg6:
1412   case Intrinsic::riscv_vloxseg7:
1413   case Intrinsic::riscv_vloxseg8:
1414   case Intrinsic::riscv_vluxseg2:
1415   case Intrinsic::riscv_vluxseg3:
1416   case Intrinsic::riscv_vluxseg4:
1417   case Intrinsic::riscv_vluxseg5:
1418   case Intrinsic::riscv_vluxseg6:
1419   case Intrinsic::riscv_vluxseg7:
1420   case Intrinsic::riscv_vluxseg8:
1421     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1422                                /*IsStore*/ false,
1423                                /*IsUnitStrided*/ false);
1424   case Intrinsic::riscv_vlsseg2_mask:
1425   case Intrinsic::riscv_vlsseg3_mask:
1426   case Intrinsic::riscv_vlsseg4_mask:
1427   case Intrinsic::riscv_vlsseg5_mask:
1428   case Intrinsic::riscv_vlsseg6_mask:
1429   case Intrinsic::riscv_vlsseg7_mask:
1430   case Intrinsic::riscv_vlsseg8_mask:
1431   case Intrinsic::riscv_vloxseg2_mask:
1432   case Intrinsic::riscv_vloxseg3_mask:
1433   case Intrinsic::riscv_vloxseg4_mask:
1434   case Intrinsic::riscv_vloxseg5_mask:
1435   case Intrinsic::riscv_vloxseg6_mask:
1436   case Intrinsic::riscv_vloxseg7_mask:
1437   case Intrinsic::riscv_vloxseg8_mask:
1438   case Intrinsic::riscv_vluxseg2_mask:
1439   case Intrinsic::riscv_vluxseg3_mask:
1440   case Intrinsic::riscv_vluxseg4_mask:
1441   case Intrinsic::riscv_vluxseg5_mask:
1442   case Intrinsic::riscv_vluxseg6_mask:
1443   case Intrinsic::riscv_vluxseg7_mask:
1444   case Intrinsic::riscv_vluxseg8_mask:
1445     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1446                                /*IsStore*/ false,
1447                                /*IsUnitStrided*/ false);
1448   case Intrinsic::riscv_vsseg2:
1449   case Intrinsic::riscv_vsseg3:
1450   case Intrinsic::riscv_vsseg4:
1451   case Intrinsic::riscv_vsseg5:
1452   case Intrinsic::riscv_vsseg6:
1453   case Intrinsic::riscv_vsseg7:
1454   case Intrinsic::riscv_vsseg8:
1455     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1456                                /*IsStore*/ true,
1457                                /*IsUnitStrided*/ false);
1458   case Intrinsic::riscv_vsseg2_mask:
1459   case Intrinsic::riscv_vsseg3_mask:
1460   case Intrinsic::riscv_vsseg4_mask:
1461   case Intrinsic::riscv_vsseg5_mask:
1462   case Intrinsic::riscv_vsseg6_mask:
1463   case Intrinsic::riscv_vsseg7_mask:
1464   case Intrinsic::riscv_vsseg8_mask:
1465     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1466                                /*IsStore*/ true,
1467                                /*IsUnitStrided*/ false);
1468   case Intrinsic::riscv_vssseg2:
1469   case Intrinsic::riscv_vssseg3:
1470   case Intrinsic::riscv_vssseg4:
1471   case Intrinsic::riscv_vssseg5:
1472   case Intrinsic::riscv_vssseg6:
1473   case Intrinsic::riscv_vssseg7:
1474   case Intrinsic::riscv_vssseg8:
1475   case Intrinsic::riscv_vsoxseg2:
1476   case Intrinsic::riscv_vsoxseg3:
1477   case Intrinsic::riscv_vsoxseg4:
1478   case Intrinsic::riscv_vsoxseg5:
1479   case Intrinsic::riscv_vsoxseg6:
1480   case Intrinsic::riscv_vsoxseg7:
1481   case Intrinsic::riscv_vsoxseg8:
1482   case Intrinsic::riscv_vsuxseg2:
1483   case Intrinsic::riscv_vsuxseg3:
1484   case Intrinsic::riscv_vsuxseg4:
1485   case Intrinsic::riscv_vsuxseg5:
1486   case Intrinsic::riscv_vsuxseg6:
1487   case Intrinsic::riscv_vsuxseg7:
1488   case Intrinsic::riscv_vsuxseg8:
1489     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1490                                /*IsStore*/ true,
1491                                /*IsUnitStrided*/ false);
1492   case Intrinsic::riscv_vssseg2_mask:
1493   case Intrinsic::riscv_vssseg3_mask:
1494   case Intrinsic::riscv_vssseg4_mask:
1495   case Intrinsic::riscv_vssseg5_mask:
1496   case Intrinsic::riscv_vssseg6_mask:
1497   case Intrinsic::riscv_vssseg7_mask:
1498   case Intrinsic::riscv_vssseg8_mask:
1499   case Intrinsic::riscv_vsoxseg2_mask:
1500   case Intrinsic::riscv_vsoxseg3_mask:
1501   case Intrinsic::riscv_vsoxseg4_mask:
1502   case Intrinsic::riscv_vsoxseg5_mask:
1503   case Intrinsic::riscv_vsoxseg6_mask:
1504   case Intrinsic::riscv_vsoxseg7_mask:
1505   case Intrinsic::riscv_vsoxseg8_mask:
1506   case Intrinsic::riscv_vsuxseg2_mask:
1507   case Intrinsic::riscv_vsuxseg3_mask:
1508   case Intrinsic::riscv_vsuxseg4_mask:
1509   case Intrinsic::riscv_vsuxseg5_mask:
1510   case Intrinsic::riscv_vsuxseg6_mask:
1511   case Intrinsic::riscv_vsuxseg7_mask:
1512   case Intrinsic::riscv_vsuxseg8_mask:
1513     return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1514                                /*IsStore*/ true,
1515                                /*IsUnitStrided*/ false);
1516   }
1517 }
1518 
1519 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1520                                                 const AddrMode &AM, Type *Ty,
1521                                                 unsigned AS,
1522                                                 Instruction *I) const {
1523   // No global is ever allowed as a base.
1524   if (AM.BaseGV)
1525     return false;
1526 
1527   // RVV instructions only support register addressing.
1528   if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1529     return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1530 
1531   // Require a 12-bit signed offset.
1532   if (!isInt<12>(AM.BaseOffs))
1533     return false;
1534 
1535   switch (AM.Scale) {
1536   case 0: // "r+i" or just "i", depending on HasBaseReg.
1537     break;
1538   case 1:
1539     if (!AM.HasBaseReg) // allow "r+i".
1540       break;
1541     return false; // disallow "r+r" or "r+r+i".
1542   default:
1543     return false;
1544   }
1545 
1546   return true;
1547 }
1548 
1549 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
1550   return isInt<12>(Imm);
1551 }
1552 
1553 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
1554   return isInt<12>(Imm);
1555 }
1556 
1557 // On RV32, 64-bit integers are split into their high and low parts and held
1558 // in two different registers, so the trunc is free since the low register can
1559 // just be used.
1560 // FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1561 // isTruncateFree?
1562 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1563   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1564     return false;
1565   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1566   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1567   return (SrcBits == 64 && DestBits == 32);
1568 }
1569 
1570 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1571   // We consider i64->i32 free on RV64 since we have good selection of W
1572   // instructions that make promoting operations back to i64 free in many cases.
1573   if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1574       !DstVT.isInteger())
1575     return false;
1576   unsigned SrcBits = SrcVT.getSizeInBits();
1577   unsigned DestBits = DstVT.getSizeInBits();
1578   return (SrcBits == 64 && DestBits == 32);
1579 }
1580 
1581 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
1582   // Zexts are free if they can be combined with a load.
1583   // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1584   // poorly with type legalization of compares preferring sext.
1585   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1586     EVT MemVT = LD->getMemoryVT();
1587     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1588         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1589          LD->getExtensionType() == ISD::ZEXTLOAD))
1590       return true;
1591   }
1592 
1593   return TargetLowering::isZExtFree(Val, VT2);
1594 }
1595 
1596 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
1597   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1598 }
1599 
1600 bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
1601   return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1602 }
1603 
1604 bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1605   return Subtarget.hasStdExtZbb();
1606 }
1607 
1608 bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1609   return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1610 }
1611 
1612 bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
1613     const Instruction &AndI) const {
1614   // We expect to be able to match a bit extraction instruction if the Zbs
1615   // extension is supported and the mask is a power of two. However, we
1616   // conservatively return false if the mask would fit in an ANDI instruction,
1617   // on the basis that it's possible the sinking+duplication of the AND in
1618   // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1619   // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1620   if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1621     return false;
1622   ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1623   if (!Mask)
1624     return false;
1625   return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1626 }
1627 
1628 bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
1629   EVT VT = Y.getValueType();
1630 
1631   // FIXME: Support vectors once we have tests.
1632   if (VT.isVector())
1633     return false;
1634 
1635   return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1636          !isa<ConstantSDNode>(Y);
1637 }
1638 
1639 bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
1640   // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1641   if (Subtarget.hasStdExtZbs())
1642     return X.getValueType().isScalarInteger();
1643   auto *C = dyn_cast<ConstantSDNode>(Y);
1644   // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1645   if (Subtarget.hasVendorXTHeadBs())
1646     return C != nullptr;
1647   // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1648   return C && C->getAPIntValue().ule(10);
1649 }
1650 
1651 bool RISCVTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
1652                                                                EVT VT) const {
1653   // Only enable for rvv.
1654   if (!VT.isVector() || !Subtarget.hasVInstructions())
1655     return false;
1656 
1657   if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1658     return false;
1659 
1660   return true;
1661 }
1662 
1663 bool RISCVTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
1664                                                             Type *Ty) const {
1665   assert(Ty->isIntegerTy());
1666 
1667   unsigned BitSize = Ty->getIntegerBitWidth();
1668   if (BitSize > Subtarget.getXLen())
1669     return false;
1670 
1671   // Fast path, assume 32-bit immediates are cheap.
1672   int64_t Val = Imm.getSExtValue();
1673   if (isInt<32>(Val))
1674     return true;
1675 
1676   // A constant pool entry may be more aligned thant he load we're trying to
1677   // replace. If we don't support unaligned scalar mem, prefer the constant
1678   // pool.
1679   // TODO: Can the caller pass down the alignment?
1680   if (!Subtarget.enableUnalignedScalarMem())
1681     return true;
1682 
1683   // Prefer to keep the load if it would require many instructions.
1684   // This uses the same threshold we use for constant pools but doesn't
1685   // check useConstantPoolForLargeInts.
1686   // TODO: Should we keep the load only when we're definitely going to emit a
1687   // constant pool?
1688 
1689   RISCVMatInt::InstSeq Seq =
1690       RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1691   return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1692 }
1693 
1694 bool RISCVTargetLowering::
1695     shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1696         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1697         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1698         SelectionDAG &DAG) const {
1699   // One interesting pattern that we'd want to form is 'bit extract':
1700   //   ((1 >> Y) & 1) ==/!= 0
1701   // But we also need to be careful not to try to reverse that fold.
1702 
1703   // Is this '((1 >> Y) & 1)'?
1704   if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1705     return false; // Keep the 'bit extract' pattern.
1706 
1707   // Will this be '((1 >> Y) & 1)' after the transform?
1708   if (NewShiftOpcode == ISD::SRL && CC->isOne())
1709     return true; // Do form the 'bit extract' pattern.
1710 
1711   // If 'X' is a constant, and we transform, then we will immediately
1712   // try to undo the fold, thus causing endless combine loop.
1713   // So only do the transform if X is not a constant. This matches the default
1714   // implementation of this function.
1715   return !XC;
1716 }
1717 
1718 bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1719   switch (Opcode) {
1720   case Instruction::Add:
1721   case Instruction::Sub:
1722   case Instruction::Mul:
1723   case Instruction::And:
1724   case Instruction::Or:
1725   case Instruction::Xor:
1726   case Instruction::FAdd:
1727   case Instruction::FSub:
1728   case Instruction::FMul:
1729   case Instruction::FDiv:
1730   case Instruction::ICmp:
1731   case Instruction::FCmp:
1732     return true;
1733   case Instruction::Shl:
1734   case Instruction::LShr:
1735   case Instruction::AShr:
1736   case Instruction::UDiv:
1737   case Instruction::SDiv:
1738   case Instruction::URem:
1739   case Instruction::SRem:
1740     return Operand == 1;
1741   default:
1742     return false;
1743   }
1744 }
1745 
1746 
1747 bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
1748   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1749     return false;
1750 
1751   if (canSplatOperand(I->getOpcode(), Operand))
1752     return true;
1753 
1754   auto *II = dyn_cast<IntrinsicInst>(I);
1755   if (!II)
1756     return false;
1757 
1758   switch (II->getIntrinsicID()) {
1759   case Intrinsic::fma:
1760   case Intrinsic::vp_fma:
1761     return Operand == 0 || Operand == 1;
1762   case Intrinsic::vp_shl:
1763   case Intrinsic::vp_lshr:
1764   case Intrinsic::vp_ashr:
1765   case Intrinsic::vp_udiv:
1766   case Intrinsic::vp_sdiv:
1767   case Intrinsic::vp_urem:
1768   case Intrinsic::vp_srem:
1769     return Operand == 1;
1770     // These intrinsics are commutative.
1771   case Intrinsic::vp_add:
1772   case Intrinsic::vp_mul:
1773   case Intrinsic::vp_and:
1774   case Intrinsic::vp_or:
1775   case Intrinsic::vp_xor:
1776   case Intrinsic::vp_fadd:
1777   case Intrinsic::vp_fmul:
1778   case Intrinsic::vp_icmp:
1779   case Intrinsic::vp_fcmp:
1780     // These intrinsics have 'vr' versions.
1781   case Intrinsic::vp_sub:
1782   case Intrinsic::vp_fsub:
1783   case Intrinsic::vp_fdiv:
1784     return Operand == 0 || Operand == 1;
1785   default:
1786     return false;
1787   }
1788 }
1789 
1790 /// Check if sinking \p I's operands to I's basic block is profitable, because
1791 /// the operands can be folded into a target instruction, e.g.
1792 /// splats of scalars can fold into vector instructions.
1793 bool RISCVTargetLowering::shouldSinkOperands(
1794     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1795   using namespace llvm::PatternMatch;
1796 
1797   if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1798     return false;
1799 
1800   for (auto OpIdx : enumerate(I->operands())) {
1801     if (!canSplatOperand(I, OpIdx.index()))
1802       continue;
1803 
1804     Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1805     // Make sure we are not already sinking this operand
1806     if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1807       continue;
1808 
1809     // We are looking for a splat that can be sunk.
1810     if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
1811                              m_Undef(), m_ZeroMask())))
1812       continue;
1813 
1814     // Don't sink i1 splats.
1815     if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
1816       continue;
1817 
1818     // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1819     // and vector registers
1820     for (Use &U : Op->uses()) {
1821       Instruction *Insn = cast<Instruction>(U.getUser());
1822       if (!canSplatOperand(Insn, U.getOperandNo()))
1823         return false;
1824     }
1825 
1826     Ops.push_back(&Op->getOperandUse(0));
1827     Ops.push_back(&OpIdx.value());
1828   }
1829   return true;
1830 }
1831 
1832 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
1833   unsigned Opc = VecOp.getOpcode();
1834 
1835   // Assume target opcodes can't be scalarized.
1836   // TODO - do we have any exceptions?
1837   if (Opc >= ISD::BUILTIN_OP_END)
1838     return false;
1839 
1840   // If the vector op is not supported, try to convert to scalar.
1841   EVT VecVT = VecOp.getValueType();
1842   if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1843     return true;
1844 
1845   // If the vector op is supported, but the scalar op is not, the transform may
1846   // not be worthwhile.
1847   EVT ScalarVT = VecVT.getScalarType();
1848   return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
1849 }
1850 
1851 bool RISCVTargetLowering::isOffsetFoldingLegal(
1852     const GlobalAddressSDNode *GA) const {
1853   // In order to maximise the opportunity for common subexpression elimination,
1854   // keep a separate ADD node for the global address offset instead of folding
1855   // it in the global address node. Later peephole optimisations may choose to
1856   // fold it back in when profitable.
1857   return false;
1858 }
1859 
1860 // Returns 0-31 if the fli instruction is available for the type and this is
1861 // legal FP immediate for the type. Returns -1 otherwise.
1862 int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
1863   if (!Subtarget.hasStdExtZfa())
1864     return -1;
1865 
1866   bool IsSupportedVT = false;
1867   if (VT == MVT::f16) {
1868     IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1869   } else if (VT == MVT::f32) {
1870     IsSupportedVT = true;
1871   } else if (VT == MVT::f64) {
1872     assert(Subtarget.hasStdExtD() && "Expect D extension");
1873     IsSupportedVT = true;
1874   }
1875 
1876   if (!IsSupportedVT)
1877     return -1;
1878 
1879   return RISCVLoadFPImm::getLoadFPImm(Imm);
1880 }
1881 
1882 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1883                                        bool ForCodeSize) const {
1884   bool IsLegalVT = false;
1885   if (VT == MVT::f16)
1886     IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
1887   else if (VT == MVT::f32)
1888     IsLegalVT = Subtarget.hasStdExtFOrZfinx();
1889   else if (VT == MVT::f64)
1890     IsLegalVT = Subtarget.hasStdExtDOrZdinx();
1891 
1892   if (!IsLegalVT)
1893     return false;
1894 
1895   if (getLegalZfaFPImm(Imm, VT) >= 0)
1896     return true;
1897 
1898   // Cannot create a 64 bit floating-point immediate value for rv32.
1899   if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
1900     // td can handle +0.0 or -0.0 already.
1901     // -0.0 can be created by fmv + fneg.
1902     return Imm.isZero();
1903   }
1904   // Special case: the cost for -0.0 is 1.
1905   int Cost = Imm.isNegZero()
1906                  ? 1
1907                  : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
1908                                               Subtarget.getXLen(),
1909                                               Subtarget.getFeatureBits());
1910   // If the constantpool data is already in cache, only Cost 1 is cheaper.
1911   return Cost < FPImmCost;
1912 }
1913 
1914 // TODO: This is very conservative.
1915 bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1916                                                   unsigned Index) const {
1917   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
1918     return false;
1919 
1920   // Only support extracting a fixed from a fixed vector for now.
1921   if (ResVT.isScalableVector() || SrcVT.isScalableVector())
1922     return false;
1923 
1924   unsigned ResElts = ResVT.getVectorNumElements();
1925   unsigned SrcElts = SrcVT.getVectorNumElements();
1926 
1927   // Convervatively only handle extracting half of a vector.
1928   // TODO: Relax this.
1929   if ((ResElts * 2) != SrcElts)
1930     return false;
1931 
1932   // The smallest type we can slide is i8.
1933   // TODO: We can extract index 0 from a mask vector without a slide.
1934   if (ResVT.getVectorElementType() == MVT::i1)
1935     return false;
1936 
1937   // Slide can support arbitrary index, but we only treat vslidedown.vi as
1938   // cheap.
1939   if (Index >= 32)
1940     return false;
1941 
1942   // TODO: We can do arbitrary slidedowns, but for now only support extracting
1943   // the upper half of a vector until we have more test coverage.
1944   return Index == 0 || Index == ResElts;
1945 }
1946 
1947 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
1948                                                       CallingConv::ID CC,
1949                                                       EVT VT) const {
1950   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1951   // We might still end up using a GPR but that will be decided based on ABI.
1952   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1953       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1954     return MVT::f32;
1955 
1956   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
1957 }
1958 
1959 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
1960                                                            CallingConv::ID CC,
1961                                                            EVT VT) const {
1962   // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
1963   // We might still end up using a GPR but that will be decided based on ABI.
1964   if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
1965       !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
1966     return 1;
1967 
1968   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
1969 }
1970 
1971 // Changes the condition code and swaps operands if necessary, so the SetCC
1972 // operation matches one of the comparisons supported directly by branches
1973 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1974 // with 1/-1.
1975 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1976                                     ISD::CondCode &CC, SelectionDAG &DAG) {
1977   // If this is a single bit test that can't be handled by ANDI, shift the
1978   // bit to be tested to the MSB and perform a signed compare with 0.
1979   if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1980       LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1981       isa<ConstantSDNode>(LHS.getOperand(1))) {
1982     uint64_t Mask = LHS.getConstantOperandVal(1);
1983     if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1984       unsigned ShAmt = 0;
1985       if (isPowerOf2_64(Mask)) {
1986         CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1987         ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1988       } else {
1989         ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1990       }
1991 
1992       LHS = LHS.getOperand(0);
1993       if (ShAmt != 0)
1994         LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1995                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1996       return;
1997     }
1998   }
1999 
2000   if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2001     int64_t C = RHSC->getSExtValue();
2002     switch (CC) {
2003     default: break;
2004     case ISD::SETGT:
2005       // Convert X > -1 to X >= 0.
2006       if (C == -1) {
2007         RHS = DAG.getConstant(0, DL, RHS.getValueType());
2008         CC = ISD::SETGE;
2009         return;
2010       }
2011       break;
2012     case ISD::SETLT:
2013       // Convert X < 1 to 0 <= X.
2014       if (C == 1) {
2015         RHS = LHS;
2016         LHS = DAG.getConstant(0, DL, RHS.getValueType());
2017         CC = ISD::SETGE;
2018         return;
2019       }
2020       break;
2021     }
2022   }
2023 
2024   switch (CC) {
2025   default:
2026     break;
2027   case ISD::SETGT:
2028   case ISD::SETLE:
2029   case ISD::SETUGT:
2030   case ISD::SETULE:
2031     CC = ISD::getSetCCSwappedOperands(CC);
2032     std::swap(LHS, RHS);
2033     break;
2034   }
2035 }
2036 
2037 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
2038   assert(VT.isScalableVector() && "Expecting a scalable vector type");
2039   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2040   if (VT.getVectorElementType() == MVT::i1)
2041     KnownSize *= 8;
2042 
2043   switch (KnownSize) {
2044   default:
2045     llvm_unreachable("Invalid LMUL.");
2046   case 8:
2047     return RISCVII::VLMUL::LMUL_F8;
2048   case 16:
2049     return RISCVII::VLMUL::LMUL_F4;
2050   case 32:
2051     return RISCVII::VLMUL::LMUL_F2;
2052   case 64:
2053     return RISCVII::VLMUL::LMUL_1;
2054   case 128:
2055     return RISCVII::VLMUL::LMUL_2;
2056   case 256:
2057     return RISCVII::VLMUL::LMUL_4;
2058   case 512:
2059     return RISCVII::VLMUL::LMUL_8;
2060   }
2061 }
2062 
2063 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
2064   switch (LMul) {
2065   default:
2066     llvm_unreachable("Invalid LMUL.");
2067   case RISCVII::VLMUL::LMUL_F8:
2068   case RISCVII::VLMUL::LMUL_F4:
2069   case RISCVII::VLMUL::LMUL_F2:
2070   case RISCVII::VLMUL::LMUL_1:
2071     return RISCV::VRRegClassID;
2072   case RISCVII::VLMUL::LMUL_2:
2073     return RISCV::VRM2RegClassID;
2074   case RISCVII::VLMUL::LMUL_4:
2075     return RISCV::VRM4RegClassID;
2076   case RISCVII::VLMUL::LMUL_8:
2077     return RISCV::VRM8RegClassID;
2078   }
2079 }
2080 
2081 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2082   RISCVII::VLMUL LMUL = getLMUL(VT);
2083   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2084       LMUL == RISCVII::VLMUL::LMUL_F4 ||
2085       LMUL == RISCVII::VLMUL::LMUL_F2 ||
2086       LMUL == RISCVII::VLMUL::LMUL_1) {
2087     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2088                   "Unexpected subreg numbering");
2089     return RISCV::sub_vrm1_0 + Index;
2090   }
2091   if (LMUL == RISCVII::VLMUL::LMUL_2) {
2092     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2093                   "Unexpected subreg numbering");
2094     return RISCV::sub_vrm2_0 + Index;
2095   }
2096   if (LMUL == RISCVII::VLMUL::LMUL_4) {
2097     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2098                   "Unexpected subreg numbering");
2099     return RISCV::sub_vrm4_0 + Index;
2100   }
2101   llvm_unreachable("Invalid vector type.");
2102 }
2103 
2104 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
2105   if (VT.getVectorElementType() == MVT::i1)
2106     return RISCV::VRRegClassID;
2107   return getRegClassIDForLMUL(getLMUL(VT));
2108 }
2109 
2110 // Attempt to decompose a subvector insert/extract between VecVT and
2111 // SubVecVT via subregister indices. Returns the subregister index that
2112 // can perform the subvector insert/extract with the given element index, as
2113 // well as the index corresponding to any leftover subvectors that must be
2114 // further inserted/extracted within the register class for SubVecVT.
2115 std::pair<unsigned, unsigned>
2116 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2117     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2118     const RISCVRegisterInfo *TRI) {
2119   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2120                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2121                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2122                 "Register classes not ordered");
2123   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2124   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2125   // Try to compose a subregister index that takes us from the incoming
2126   // LMUL>1 register class down to the outgoing one. At each step we half
2127   // the LMUL:
2128   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2129   // Note that this is not guaranteed to find a subregister index, such as
2130   // when we are extracting from one VR type to another.
2131   unsigned SubRegIdx = RISCV::NoSubRegister;
2132   for (const unsigned RCID :
2133        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2134     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2135       VecVT = VecVT.getHalfNumVectorElementsVT();
2136       bool IsHi =
2137           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2138       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2139                                             getSubregIndexByMVT(VecVT, IsHi));
2140       if (IsHi)
2141         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2142     }
2143   return {SubRegIdx, InsertExtractIdx};
2144 }
2145 
2146 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2147 // stores for those types.
2148 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2149   return !Subtarget.useRVVForFixedLengthVectors() ||
2150          (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2151 }
2152 
2153 bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
2154   if (!ScalarTy.isSimple())
2155     return false;
2156   switch (ScalarTy.getSimpleVT().SimpleTy) {
2157   case MVT::iPTR:
2158     return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2159   case MVT::i8:
2160   case MVT::i16:
2161   case MVT::i32:
2162     return true;
2163   case MVT::i64:
2164     return Subtarget.hasVInstructionsI64();
2165   case MVT::f16:
2166     return Subtarget.hasVInstructionsF16();
2167   case MVT::f32:
2168     return Subtarget.hasVInstructionsF32();
2169   case MVT::f64:
2170     return Subtarget.hasVInstructionsF64();
2171   default:
2172     return false;
2173   }
2174 }
2175 
2176 
2177 unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2178   return NumRepeatedDivisors;
2179 }
2180 
2181 static SDValue getVLOperand(SDValue Op) {
2182   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2183           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2184          "Unexpected opcode");
2185   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2186   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2187   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
2188       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2189   if (!II)
2190     return SDValue();
2191   return Op.getOperand(II->VLOperand + 1 + HasChain);
2192 }
2193 
2194 static bool useRVVForFixedLengthVectorVT(MVT VT,
2195                                          const RISCVSubtarget &Subtarget) {
2196   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2197   if (!Subtarget.useRVVForFixedLengthVectors())
2198     return false;
2199 
2200   // We only support a set of vector types with a consistent maximum fixed size
2201   // across all supported vector element types to avoid legalization issues.
2202   // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2203   // fixed-length vector type we support is 1024 bytes.
2204   if (VT.getFixedSizeInBits() > 1024 * 8)
2205     return false;
2206 
2207   unsigned MinVLen = Subtarget.getRealMinVLen();
2208 
2209   MVT EltVT = VT.getVectorElementType();
2210 
2211   // Don't use RVV for vectors we cannot scalarize if required.
2212   switch (EltVT.SimpleTy) {
2213   // i1 is supported but has different rules.
2214   default:
2215     return false;
2216   case MVT::i1:
2217     // Masks can only use a single register.
2218     if (VT.getVectorNumElements() > MinVLen)
2219       return false;
2220     MinVLen /= 8;
2221     break;
2222   case MVT::i8:
2223   case MVT::i16:
2224   case MVT::i32:
2225     break;
2226   case MVT::i64:
2227     if (!Subtarget.hasVInstructionsI64())
2228       return false;
2229     break;
2230   case MVT::f16:
2231     if (!Subtarget.hasVInstructionsF16())
2232       return false;
2233     break;
2234   case MVT::f32:
2235     if (!Subtarget.hasVInstructionsF32())
2236       return false;
2237     break;
2238   case MVT::f64:
2239     if (!Subtarget.hasVInstructionsF64())
2240       return false;
2241     break;
2242   }
2243 
2244   // Reject elements larger than ELEN.
2245   if (EltVT.getSizeInBits() > Subtarget.getELEN())
2246     return false;
2247 
2248   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2249   // Don't use RVV for types that don't fit.
2250   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2251     return false;
2252 
2253   // TODO: Perhaps an artificial restriction, but worth having whilst getting
2254   // the base fixed length RVV support in place.
2255   if (!VT.isPow2VectorType())
2256     return false;
2257 
2258   return true;
2259 }
2260 
2261 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2262   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2263 }
2264 
2265 // Return the largest legal scalable vector type that matches VT's element type.
2266 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
2267                                             const RISCVSubtarget &Subtarget) {
2268   // This may be called before legal types are setup.
2269   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2270           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2271          "Expected legal fixed length vector!");
2272 
2273   unsigned MinVLen = Subtarget.getRealMinVLen();
2274   unsigned MaxELen = Subtarget.getELEN();
2275 
2276   MVT EltVT = VT.getVectorElementType();
2277   switch (EltVT.SimpleTy) {
2278   default:
2279     llvm_unreachable("unexpected element type for RVV container");
2280   case MVT::i1:
2281   case MVT::i8:
2282   case MVT::i16:
2283   case MVT::i32:
2284   case MVT::i64:
2285   case MVT::f16:
2286   case MVT::f32:
2287   case MVT::f64: {
2288     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2289     // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2290     // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2291     unsigned NumElts =
2292         (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
2293     NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2294     assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2295     return MVT::getScalableVectorVT(EltVT, NumElts);
2296   }
2297   }
2298 }
2299 
2300 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
2301                                             const RISCVSubtarget &Subtarget) {
2302   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
2303                                           Subtarget);
2304 }
2305 
2306 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
2307   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2308 }
2309 
2310 // Grow V to consume an entire RVV register.
2311 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2312                                        const RISCVSubtarget &Subtarget) {
2313   assert(VT.isScalableVector() &&
2314          "Expected to convert into a scalable vector!");
2315   assert(V.getValueType().isFixedLengthVector() &&
2316          "Expected a fixed length vector operand!");
2317   SDLoc DL(V);
2318   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2319   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2320 }
2321 
2322 // Shrink V so it's just big enough to maintain a VT's worth of data.
2323 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
2324                                          const RISCVSubtarget &Subtarget) {
2325   assert(VT.isFixedLengthVector() &&
2326          "Expected to convert into a fixed length vector!");
2327   assert(V.getValueType().isScalableVector() &&
2328          "Expected a scalable vector operand!");
2329   SDLoc DL(V);
2330   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2331   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2332 }
2333 
2334 /// Return the type of the mask type suitable for masking the provided
2335 /// vector type.  This is simply an i1 element type vector of the same
2336 /// (possibly scalable) length.
2337 static MVT getMaskTypeFor(MVT VecVT) {
2338   assert(VecVT.isVector());
2339   ElementCount EC = VecVT.getVectorElementCount();
2340   return MVT::getVectorVT(MVT::i1, EC);
2341 }
2342 
2343 /// Creates an all ones mask suitable for masking a vector of type VecTy with
2344 /// vector length VL.  .
2345 static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2346                               SelectionDAG &DAG) {
2347   MVT MaskVT = getMaskTypeFor(VecVT);
2348   return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2349 }
2350 
2351 static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2352                        const RISCVSubtarget &Subtarget) {
2353   return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2354 }
2355 
2356 static std::pair<SDValue, SDValue>
2357 getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2358                 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2359   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2360   SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2361   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2362   return {Mask, VL};
2363 }
2364 
2365 // Gets the two common "VL" operands: an all-ones mask and the vector length.
2366 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2367 // the vector type that the fixed-length vector is contained in. Otherwise if
2368 // VecVT is scalable, then ContainerVT should be the same as VecVT.
2369 static std::pair<SDValue, SDValue>
2370 getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2371                 const RISCVSubtarget &Subtarget) {
2372   if (VecVT.isFixedLengthVector())
2373     return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2374                            Subtarget);
2375   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2376   MVT XLenVT = Subtarget.getXLenVT();
2377   SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2378   SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2379   return {Mask, VL};
2380 }
2381 
2382 // As above but assuming the given type is a scalable vector type.
2383 static std::pair<SDValue, SDValue>
2384 getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
2385                         const RISCVSubtarget &Subtarget) {
2386   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2387   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2388 }
2389 
2390 SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
2391                                           SelectionDAG &DAG) const {
2392   assert(VecVT.isScalableVector() && "Expected scalable vector");
2393   return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2394                              VecVT.getVectorElementCount());
2395 }
2396 
2397 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2398 // of either is (currently) supported. This can get us into an infinite loop
2399 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2400 // as a ..., etc.
2401 // Until either (or both) of these can reliably lower any node, reporting that
2402 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2403 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2404 // which is not desirable.
2405 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
2406     EVT VT, unsigned DefinedValues) const {
2407   return false;
2408 }
2409 
2410 static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
2411                                   const RISCVSubtarget &Subtarget) {
2412   // RISC-V FP-to-int conversions saturate to the destination register size, but
2413   // don't produce 0 for nan. We can use a conversion instruction and fix the
2414   // nan case with a compare and a select.
2415   SDValue Src = Op.getOperand(0);
2416 
2417   MVT DstVT = Op.getSimpleValueType();
2418   EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2419 
2420   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2421 
2422   if (!DstVT.isVector()) {
2423     // In absense of Zfh, promote f16 to f32, then saturate the result.
2424     if (Src.getSimpleValueType() == MVT::f16 &&
2425         !Subtarget.hasStdExtZfhOrZhinx()) {
2426       Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2427     }
2428 
2429     unsigned Opc;
2430     if (SatVT == DstVT)
2431       Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2432     else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2433       Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
2434     else
2435       return SDValue();
2436     // FIXME: Support other SatVTs by clamping before or after the conversion.
2437 
2438     SDLoc DL(Op);
2439     SDValue FpToInt = DAG.getNode(
2440         Opc, DL, DstVT, Src,
2441         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
2442 
2443     if (Opc == RISCVISD::FCVT_WU_RV64)
2444       FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2445 
2446     SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2447     return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2448                            ISD::CondCode::SETUO);
2449   }
2450 
2451   // Vectors.
2452 
2453   MVT DstEltVT = DstVT.getVectorElementType();
2454   MVT SrcVT = Src.getSimpleValueType();
2455   MVT SrcEltVT = SrcVT.getVectorElementType();
2456   unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2457   unsigned DstEltSize = DstEltVT.getSizeInBits();
2458 
2459   // Only handle saturating to the destination type.
2460   if (SatVT != DstEltVT)
2461     return SDValue();
2462 
2463   // FIXME: Don't support narrowing by more than 1 steps for now.
2464   if (SrcEltSize > (2 * DstEltSize))
2465     return SDValue();
2466 
2467   MVT DstContainerVT = DstVT;
2468   MVT SrcContainerVT = SrcVT;
2469   if (DstVT.isFixedLengthVector()) {
2470     DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2471     SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2472     assert(DstContainerVT.getVectorElementCount() ==
2473                SrcContainerVT.getVectorElementCount() &&
2474            "Expected same element count");
2475     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2476   }
2477 
2478   SDLoc DL(Op);
2479 
2480   auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2481 
2482   SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2483                               {Src, Src, DAG.getCondCode(ISD::SETNE),
2484                                DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2485 
2486   // Need to widen by more than 1 step, promote the FP type, then do a widening
2487   // convert.
2488   if (DstEltSize > (2 * SrcEltSize)) {
2489     assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2490     MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2491     Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2492   }
2493 
2494   unsigned RVVOpc =
2495       IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
2496   SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2497 
2498   SDValue SplatZero = DAG.getNode(
2499       RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2500       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2501   Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2502                     Res, VL);
2503 
2504   if (DstVT.isFixedLengthVector())
2505     Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2506 
2507   return Res;
2508 }
2509 
2510 static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
2511   switch (Opc) {
2512   case ISD::FROUNDEVEN:
2513   case ISD::STRICT_FROUNDEVEN:
2514   case ISD::VP_FROUNDEVEN:
2515     return RISCVFPRndMode::RNE;
2516   case ISD::FTRUNC:
2517   case ISD::STRICT_FTRUNC:
2518   case ISD::VP_FROUNDTOZERO:
2519     return RISCVFPRndMode::RTZ;
2520   case ISD::FFLOOR:
2521   case ISD::STRICT_FFLOOR:
2522   case ISD::VP_FFLOOR:
2523     return RISCVFPRndMode::RDN;
2524   case ISD::FCEIL:
2525   case ISD::STRICT_FCEIL:
2526   case ISD::VP_FCEIL:
2527     return RISCVFPRndMode::RUP;
2528   case ISD::FROUND:
2529   case ISD::STRICT_FROUND:
2530   case ISD::VP_FROUND:
2531     return RISCVFPRndMode::RMM;
2532   case ISD::FRINT:
2533     return RISCVFPRndMode::DYN;
2534   }
2535 
2536   return RISCVFPRndMode::Invalid;
2537 }
2538 
2539 // Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2540 // VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2541 // the integer domain and back. Taking care to avoid converting values that are
2542 // nan or already correct.
2543 static SDValue
2544 lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2545                                       const RISCVSubtarget &Subtarget) {
2546   MVT VT = Op.getSimpleValueType();
2547   assert(VT.isVector() && "Unexpected type");
2548 
2549   SDLoc DL(Op);
2550 
2551   SDValue Src = Op.getOperand(0);
2552 
2553   MVT ContainerVT = VT;
2554   if (VT.isFixedLengthVector()) {
2555     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2556     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2557   }
2558 
2559   SDValue Mask, VL;
2560   if (Op->isVPOpcode()) {
2561     Mask = Op.getOperand(1);
2562     if (VT.isFixedLengthVector())
2563       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2564                                      Subtarget);
2565     VL = Op.getOperand(2);
2566   } else {
2567     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2568   }
2569 
2570   // Freeze the source since we are increasing the number of uses.
2571   Src = DAG.getFreeze(Src);
2572 
2573   // We do the conversion on the absolute value and fix the sign at the end.
2574   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2575 
2576   // Determine the largest integer that can be represented exactly. This and
2577   // values larger than it don't have any fractional bits so don't need to
2578   // be converted.
2579   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2580   unsigned Precision = APFloat::semanticsPrecision(FltSem);
2581   APFloat MaxVal = APFloat(FltSem);
2582   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2583                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2584   SDValue MaxValNode =
2585       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2586   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2587                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2588 
2589   // If abs(Src) was larger than MaxVal or nan, keep it.
2590   MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2591   Mask =
2592       DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2593                   {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2594                    Mask, Mask, VL});
2595 
2596   // Truncate to integer and convert back to FP.
2597   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2598   MVT XLenVT = Subtarget.getXLenVT();
2599   SDValue Truncated;
2600 
2601   switch (Op.getOpcode()) {
2602   default:
2603     llvm_unreachable("Unexpected opcode");
2604   case ISD::FCEIL:
2605   case ISD::VP_FCEIL:
2606   case ISD::FFLOOR:
2607   case ISD::VP_FFLOOR:
2608   case ISD::FROUND:
2609   case ISD::FROUNDEVEN:
2610   case ISD::VP_FROUND:
2611   case ISD::VP_FROUNDEVEN:
2612   case ISD::VP_FROUNDTOZERO: {
2613     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2614     assert(FRM != RISCVFPRndMode::Invalid);
2615     Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2616                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2617     break;
2618   }
2619   case ISD::FTRUNC:
2620     Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2621                             Mask, VL);
2622     break;
2623   case ISD::FRINT:
2624   case ISD::VP_FRINT:
2625     Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2626     break;
2627   case ISD::FNEARBYINT:
2628   case ISD::VP_FNEARBYINT:
2629     Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2630                             Mask, VL);
2631     break;
2632   }
2633 
2634   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2635   if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2636     Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2637                             Mask, VL);
2638 
2639   // Restore the original sign so that -0.0 is preserved.
2640   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2641                           Src, Src, Mask, VL);
2642 
2643   if (!VT.isFixedLengthVector())
2644     return Truncated;
2645 
2646   return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2647 }
2648 
2649 // Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2650 // STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2651 // qNan and coverting the new source to integer and back to FP.
2652 static SDValue
2653 lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2654                                             const RISCVSubtarget &Subtarget) {
2655   SDLoc DL(Op);
2656   MVT VT = Op.getSimpleValueType();
2657   SDValue Chain = Op.getOperand(0);
2658   SDValue Src = Op.getOperand(1);
2659 
2660   MVT ContainerVT = VT;
2661   if (VT.isFixedLengthVector()) {
2662     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2663     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2664   }
2665 
2666   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2667 
2668   // Freeze the source since we are increasing the number of uses.
2669   Src = DAG.getFreeze(Src);
2670 
2671   // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2672   MVT MaskVT = Mask.getSimpleValueType();
2673   SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
2674                                 DAG.getVTList(MaskVT, MVT::Other),
2675                                 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2676                                  DAG.getUNDEF(MaskVT), Mask, VL});
2677   Chain = Unorder.getValue(1);
2678   Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
2679                     DAG.getVTList(ContainerVT, MVT::Other),
2680                     {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2681   Chain = Src.getValue(1);
2682 
2683   // We do the conversion on the absolute value and fix the sign at the end.
2684   SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2685 
2686   // Determine the largest integer that can be represented exactly. This and
2687   // values larger than it don't have any fractional bits so don't need to
2688   // be converted.
2689   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2690   unsigned Precision = APFloat::semanticsPrecision(FltSem);
2691   APFloat MaxVal = APFloat(FltSem);
2692   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2693                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2694   SDValue MaxValNode =
2695       DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2696   SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2697                                     DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2698 
2699   // If abs(Src) was larger than MaxVal or nan, keep it.
2700   Mask = DAG.getNode(
2701       RISCVISD::SETCC_VL, DL, MaskVT,
2702       {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2703 
2704   // Truncate to integer and convert back to FP.
2705   MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2706   MVT XLenVT = Subtarget.getXLenVT();
2707   SDValue Truncated;
2708 
2709   switch (Op.getOpcode()) {
2710   default:
2711     llvm_unreachable("Unexpected opcode");
2712   case ISD::STRICT_FCEIL:
2713   case ISD::STRICT_FFLOOR:
2714   case ISD::STRICT_FROUND:
2715   case ISD::STRICT_FROUNDEVEN: {
2716     RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2717     assert(FRM != RISCVFPRndMode::Invalid);
2718     Truncated = DAG.getNode(
2719         RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2720         {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2721     break;
2722   }
2723   case ISD::STRICT_FTRUNC:
2724     Truncated =
2725         DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
2726                     DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2727     break;
2728   case ISD::STRICT_FNEARBYINT:
2729     Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
2730                             DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
2731                             Mask, VL);
2732     break;
2733   }
2734   Chain = Truncated.getValue(1);
2735 
2736   // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2737   if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
2738     Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
2739                             DAG.getVTList(ContainerVT, MVT::Other), Chain,
2740                             Truncated, Mask, VL);
2741     Chain = Truncated.getValue(1);
2742   }
2743 
2744   // Restore the original sign so that -0.0 is preserved.
2745   Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2746                           Src, Src, Mask, VL);
2747 
2748   if (VT.isFixedLengthVector())
2749     Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2750   return DAG.getMergeValues({Truncated, Chain}, DL);
2751 }
2752 
2753 static SDValue
2754 lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
2755                                 const RISCVSubtarget &Subtarget) {
2756   MVT VT = Op.getSimpleValueType();
2757   if (VT.isVector())
2758     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2759 
2760   if (DAG.shouldOptForSize())
2761     return SDValue();
2762 
2763   SDLoc DL(Op);
2764   SDValue Src = Op.getOperand(0);
2765 
2766   // Create an integer the size of the mantissa with the MSB set. This and all
2767   // values larger than it don't have any fractional bits so don't need to be
2768   // converted.
2769   const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2770   unsigned Precision = APFloat::semanticsPrecision(FltSem);
2771   APFloat MaxVal = APFloat(FltSem);
2772   MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2773                           /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2774   SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2775 
2776   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
2777   return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2778                      DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2779 }
2780 
2781 static SDValue
2782 getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
2783               const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
2784               SDValue Offset, SDValue Mask, SDValue VL,
2785               unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
2786   if (Merge.isUndef())
2787     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
2788   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2789   SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2790   return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
2791 }
2792 
2793 static SDValue
2794 getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
2795             EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
2796             SDValue VL,
2797             unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
2798   if (Merge.isUndef())
2799     Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
2800   SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2801   SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2802   return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
2803 }
2804 
2805 struct VIDSequence {
2806   int64_t StepNumerator;
2807   unsigned StepDenominator;
2808   int64_t Addend;
2809 };
2810 
2811 static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2812                                                uint32_t BitWidth) {
2813   APSInt ValInt(BitWidth, !APF.isNegative());
2814   // We use an arbitrary rounding mode here. If a floating-point is an exact
2815   // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2816   // the rounding mode changes the output value, then it is not an exact
2817   // integer.
2818   RoundingMode ArbitraryRM = RoundingMode::TowardZero;
2819   bool IsExact;
2820   // If it is out of signed integer range, it will return an invalid operation.
2821   // If it is not an exact integer, IsExact is false.
2822   if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2823        APFloatBase::opInvalidOp) ||
2824       !IsExact)
2825     return std::nullopt;
2826   return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2827 }
2828 
2829 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2830 // to the (non-zero) step S and start value X. This can be then lowered as the
2831 // RVV sequence (VID * S) + X, for example.
2832 // The step S is represented as an integer numerator divided by a positive
2833 // denominator. Note that the implementation currently only identifies
2834 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
2835 // cannot detect 2/3, for example.
2836 // Note that this method will also match potentially unappealing index
2837 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2838 // determine whether this is worth generating code for.
2839 static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2840   unsigned NumElts = Op.getNumOperands();
2841   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2842   bool IsInteger = Op.getValueType().isInteger();
2843 
2844   std::optional<unsigned> SeqStepDenom;
2845   std::optional<int64_t> SeqStepNum, SeqAddend;
2846   std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2847   unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2848   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2849     // Assume undef elements match the sequence; we just have to be careful
2850     // when interpolating across them.
2851     if (Op.getOperand(Idx).isUndef())
2852       continue;
2853 
2854     uint64_t Val;
2855     if (IsInteger) {
2856       // The BUILD_VECTOR must be all constants.
2857       if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
2858         return std::nullopt;
2859       Val = Op.getConstantOperandVal(Idx) &
2860             maskTrailingOnes<uint64_t>(EltSizeInBits);
2861     } else {
2862       // The BUILD_VECTOR must be all constants.
2863       if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
2864         return std::nullopt;
2865       if (auto ExactInteger = getExactInteger(
2866               cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2867               EltSizeInBits))
2868         Val = *ExactInteger;
2869       else
2870         return std::nullopt;
2871     }
2872 
2873     if (PrevElt) {
2874       // Calculate the step since the last non-undef element, and ensure
2875       // it's consistent across the entire sequence.
2876       unsigned IdxDiff = Idx - PrevElt->second;
2877       int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
2878 
2879       // A zero-value value difference means that we're somewhere in the middle
2880       // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
2881       // step change before evaluating the sequence.
2882       if (ValDiff == 0)
2883         continue;
2884 
2885       int64_t Remainder = ValDiff % IdxDiff;
2886       // Normalize the step if it's greater than 1.
2887       if (Remainder != ValDiff) {
2888         // The difference must cleanly divide the element span.
2889         if (Remainder != 0)
2890           return std::nullopt;
2891         ValDiff /= IdxDiff;
2892         IdxDiff = 1;
2893       }
2894 
2895       if (!SeqStepNum)
2896         SeqStepNum = ValDiff;
2897       else if (ValDiff != SeqStepNum)
2898         return std::nullopt;
2899 
2900       if (!SeqStepDenom)
2901         SeqStepDenom = IdxDiff;
2902       else if (IdxDiff != *SeqStepDenom)
2903         return std::nullopt;
2904     }
2905 
2906     // Record this non-undef element for later.
2907     if (!PrevElt || PrevElt->first != Val)
2908       PrevElt = std::make_pair(Val, Idx);
2909   }
2910 
2911   // We need to have logged a step for this to count as a legal index sequence.
2912   if (!SeqStepNum || !SeqStepDenom)
2913     return std::nullopt;
2914 
2915   // Loop back through the sequence and validate elements we might have skipped
2916   // while waiting for a valid step. While doing this, log any sequence addend.
2917   for (unsigned Idx = 0; Idx < NumElts; Idx++) {
2918     if (Op.getOperand(Idx).isUndef())
2919       continue;
2920     uint64_t Val;
2921     if (IsInteger) {
2922       Val = Op.getConstantOperandVal(Idx) &
2923             maskTrailingOnes<uint64_t>(EltSizeInBits);
2924     } else {
2925       Val = *getExactInteger(
2926           cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
2927           EltSizeInBits);
2928     }
2929     uint64_t ExpectedVal =
2930         (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
2931     int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
2932     if (!SeqAddend)
2933       SeqAddend = Addend;
2934     else if (Addend != SeqAddend)
2935       return std::nullopt;
2936   }
2937 
2938   assert(SeqAddend && "Must have an addend if we have a step");
2939 
2940   return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2941 }
2942 
2943 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
2944 // and lower it as a VRGATHER_VX_VL from the source vector.
2945 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
2946                                   SelectionDAG &DAG,
2947                                   const RISCVSubtarget &Subtarget) {
2948   if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2949     return SDValue();
2950   SDValue Vec = SplatVal.getOperand(0);
2951   // Only perform this optimization on vectors of the same size for simplicity.
2952   // Don't perform this optimization for i1 vectors.
2953   // FIXME: Support i1 vectors, maybe by promoting to i8?
2954   if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
2955     return SDValue();
2956   SDValue Idx = SplatVal.getOperand(1);
2957   // The index must be a legal type.
2958   if (Idx.getValueType() != Subtarget.getXLenVT())
2959     return SDValue();
2960 
2961   MVT ContainerVT = VT;
2962   if (VT.isFixedLengthVector()) {
2963     ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2964     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
2965   }
2966 
2967   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2968 
2969   SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
2970                                Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
2971 
2972   if (!VT.isFixedLengthVector())
2973     return Gather;
2974 
2975   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2976 }
2977 
2978 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
2979                                  const RISCVSubtarget &Subtarget) {
2980   MVT VT = Op.getSimpleValueType();
2981   assert(VT.isFixedLengthVector() && "Unexpected vector!");
2982 
2983   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2984 
2985   SDLoc DL(Op);
2986   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2987 
2988   MVT XLenVT = Subtarget.getXLenVT();
2989   unsigned NumElts = Op.getNumOperands();
2990 
2991   if (VT.getVectorElementType() == MVT::i1) {
2992     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2993       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2994       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2995     }
2996 
2997     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2998       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2999       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3000     }
3001 
3002     // Lower constant mask BUILD_VECTORs via an integer vector type, in
3003     // scalar integer chunks whose bit-width depends on the number of mask
3004     // bits and XLEN.
3005     // First, determine the most appropriate scalar integer type to use. This
3006     // is at most XLenVT, but may be shrunk to a smaller vector element type
3007     // according to the size of the final vector - use i8 chunks rather than
3008     // XLenVT if we're producing a v8i1. This results in more consistent
3009     // codegen across RV32 and RV64.
3010     unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3011     NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
3012     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
3013       // If we have to use more than one INSERT_VECTOR_ELT then this
3014       // optimization is likely to increase code size; avoid peforming it in
3015       // such a case. We can use a load from a constant pool in this case.
3016       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3017         return SDValue();
3018       // Now we can create our integer vector type. Note that it may be larger
3019       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3020       unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3021       MVT IntegerViaVecVT =
3022           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3023                            IntegerViaVecElts);
3024 
3025       uint64_t Bits = 0;
3026       unsigned BitPos = 0, IntegerEltIdx = 0;
3027       SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3028 
3029       for (unsigned I = 0; I < NumElts;) {
3030         SDValue V = Op.getOperand(I);
3031         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3032         Bits |= ((uint64_t)BitValue << BitPos);
3033         ++BitPos;
3034         ++I;
3035 
3036         // Once we accumulate enough bits to fill our scalar type or process the
3037         // last element, insert into our vector and clear our accumulated data.
3038         if (I % NumViaIntegerBits == 0 || I == NumElts) {
3039           if (NumViaIntegerBits <= 32)
3040             Bits = SignExtend64<32>(Bits);
3041           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3042           Elts[IntegerEltIdx] = Elt;
3043           Bits = 0;
3044           BitPos = 0;
3045           IntegerEltIdx++;
3046         }
3047       }
3048 
3049       SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3050 
3051       if (NumElts < NumViaIntegerBits) {
3052         // If we're producing a smaller vector than our minimum legal integer
3053         // type, bitcast to the equivalent (known-legal) mask type, and extract
3054         // our final mask.
3055         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3056         Vec = DAG.getBitcast(MVT::v8i1, Vec);
3057         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3058                           DAG.getConstant(0, DL, XLenVT));
3059       } else {
3060         // Else we must have produced an integer type with the same size as the
3061         // mask type; bitcast for the final result.
3062         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3063         Vec = DAG.getBitcast(VT, Vec);
3064       }
3065 
3066       return Vec;
3067     }
3068 
3069     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3070     // vector type, we have a legal equivalently-sized i8 type, so we can use
3071     // that.
3072     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3073     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3074 
3075     SDValue WideVec;
3076     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3077       // For a splat, perform a scalar truncate before creating the wider
3078       // vector.
3079       assert(Splat.getValueType() == XLenVT &&
3080              "Unexpected type for i1 splat value");
3081       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
3082                           DAG.getConstant(1, DL, XLenVT));
3083       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3084     } else {
3085       SmallVector<SDValue, 8> Ops(Op->op_values());
3086       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3087       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3088       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3089     }
3090 
3091     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3092   }
3093 
3094   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3095     if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3096       return Gather;
3097     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3098                                         : RISCVISD::VMV_V_X_VL;
3099     Splat =
3100         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3101     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3102   }
3103 
3104   // Try and match index sequences, which we can lower to the vid instruction
3105   // with optional modifications. An all-undef vector is matched by
3106   // getSplatValue, above.
3107   if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3108     int64_t StepNumerator = SimpleVID->StepNumerator;
3109     unsigned StepDenominator = SimpleVID->StepDenominator;
3110     int64_t Addend = SimpleVID->Addend;
3111 
3112     assert(StepNumerator != 0 && "Invalid step");
3113     bool Negate = false;
3114     int64_t SplatStepVal = StepNumerator;
3115     unsigned StepOpcode = ISD::MUL;
3116     // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3117     // anyway as the shift of 63 won't fit in uimm5.
3118     if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3119         isPowerOf2_64(std::abs(StepNumerator))) {
3120       Negate = StepNumerator < 0;
3121       StepOpcode = ISD::SHL;
3122       SplatStepVal = Log2_64(std::abs(StepNumerator));
3123     }
3124 
3125     // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3126     // threshold since it's the immediate value many RVV instructions accept.
3127     // There is no vmul.vi instruction so ensure multiply constant can fit in
3128     // a single addi instruction.
3129     if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3130          (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3131         isPowerOf2_32(StepDenominator) &&
3132         (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3133       MVT VIDVT =
3134           VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
3135       MVT VIDContainerVT =
3136           getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3137       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3138       // Convert right out of the scalable type so we can use standard ISD
3139       // nodes for the rest of the computation. If we used scalable types with
3140       // these, we'd lose the fixed-length vector info and generate worse
3141       // vsetvli code.
3142       VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3143       if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3144           (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3145         SDValue SplatStep = DAG.getSplatBuildVector(
3146             VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
3147         VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3148       }
3149       if (StepDenominator != 1) {
3150         SDValue SplatStep = DAG.getSplatBuildVector(
3151             VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
3152         VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3153       }
3154       if (Addend != 0 || Negate) {
3155         SDValue SplatAddend = DAG.getSplatBuildVector(
3156             VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
3157         VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3158                           VID);
3159       }
3160       if (VT.isFloatingPoint()) {
3161         // TODO: Use vfwcvt to reduce register pressure.
3162         VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3163       }
3164       return VID;
3165     }
3166   }
3167 
3168   // Attempt to detect "hidden" splats, which only reveal themselves as splats
3169   // when re-interpreted as a vector with a larger element type. For example,
3170   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3171   // could be instead splat as
3172   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
3173   // TODO: This optimization could also work on non-constant splats, but it
3174   // would require bit-manipulation instructions to construct the splat value.
3175   SmallVector<SDValue> Sequence;
3176   unsigned EltBitSize = VT.getScalarSizeInBits();
3177   const auto *BV = cast<BuildVectorSDNode>(Op);
3178   if (VT.isInteger() && EltBitSize < 64 &&
3179       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
3180       BV->getRepeatedSequence(Sequence) &&
3181       (Sequence.size() * EltBitSize) <= 64) {
3182     unsigned SeqLen = Sequence.size();
3183     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3184     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
3185     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3186             ViaIntVT == MVT::i64) &&
3187            "Unexpected sequence type");
3188 
3189     unsigned EltIdx = 0;
3190     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3191     uint64_t SplatValue = 0;
3192     // Construct the amalgamated value which can be splatted as this larger
3193     // vector type.
3194     for (const auto &SeqV : Sequence) {
3195       if (!SeqV.isUndef())
3196         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3197                        << (EltIdx * EltBitSize));
3198       EltIdx++;
3199     }
3200 
3201     // On RV64, sign-extend from 32 to 64 bits where possible in order to
3202     // achieve better constant materializion.
3203     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3204       SplatValue = SignExtend64<32>(SplatValue);
3205 
3206     // Since we can't introduce illegal i64 types at this stage, we can only
3207     // perform an i64 splat on RV32 if it is its own sign-extended value. That
3208     // way we can use RVV instructions to splat.
3209     assert((ViaIntVT.bitsLE(XLenVT) ||
3210             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3211            "Unexpected bitcast sequence");
3212     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3213       SDValue ViaVL =
3214           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3215       MVT ViaContainerVT =
3216           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3217       SDValue Splat =
3218           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3219                       DAG.getUNDEF(ViaContainerVT),
3220                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3221       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3222       return DAG.getBitcast(VT, Splat);
3223     }
3224   }
3225 
3226   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
3227   // which constitute a large proportion of the elements. In such cases we can
3228   // splat a vector with the dominant element and make up the shortfall with
3229   // INSERT_VECTOR_ELTs.
3230   // Note that this includes vectors of 2 elements by association. The
3231   // upper-most element is the "dominant" one, allowing us to use a splat to
3232   // "insert" the upper element, and an insert of the lower element at position
3233   // 0, which improves codegen.
3234   SDValue DominantValue;
3235   unsigned MostCommonCount = 0;
3236   DenseMap<SDValue, unsigned> ValueCounts;
3237   unsigned NumUndefElts =
3238       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3239 
3240   // Track the number of scalar loads we know we'd be inserting, estimated as
3241   // any non-zero floating-point constant. Other kinds of element are either
3242   // already in registers or are materialized on demand. The threshold at which
3243   // a vector load is more desirable than several scalar materializion and
3244   // vector-insertion instructions is not known.
3245   unsigned NumScalarLoads = 0;
3246 
3247   for (SDValue V : Op->op_values()) {
3248     if (V.isUndef())
3249       continue;
3250 
3251     ValueCounts.insert(std::make_pair(V, 0));
3252     unsigned &Count = ValueCounts[V];
3253     if (0 == Count)
3254       if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3255         NumScalarLoads += !CFP->isExactlyValue(+0.0);
3256 
3257     // Is this value dominant? In case of a tie, prefer the highest element as
3258     // it's cheaper to insert near the beginning of a vector than it is at the
3259     // end.
3260     if (++Count >= MostCommonCount) {
3261       DominantValue = V;
3262       MostCommonCount = Count;
3263     }
3264   }
3265 
3266   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3267   unsigned NumDefElts = NumElts - NumUndefElts;
3268   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3269 
3270   // Don't perform this optimization when optimizing for size, since
3271   // materializing elements and inserting them tends to cause code bloat.
3272   if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3273       (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3274       ((MostCommonCount > DominantValueCountThreshold) ||
3275        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3276     // Start by splatting the most common element.
3277     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3278 
3279     DenseSet<SDValue> Processed{DominantValue};
3280     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3281     for (const auto &OpIdx : enumerate(Op->ops())) {
3282       const SDValue &V = OpIdx.value();
3283       if (V.isUndef() || !Processed.insert(V).second)
3284         continue;
3285       if (ValueCounts[V] == 1) {
3286         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3287                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
3288       } else {
3289         // Blend in all instances of this value using a VSELECT, using a
3290         // mask where each bit signals whether that element is the one
3291         // we're after.
3292         SmallVector<SDValue> Ops;
3293         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3294           return DAG.getConstant(V == V1, DL, XLenVT);
3295         });
3296         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3297                           DAG.getBuildVector(SelMaskTy, DL, Ops),
3298                           DAG.getSplatBuildVector(VT, DL, V), Vec);
3299       }
3300     }
3301 
3302     return Vec;
3303   }
3304 
3305   // For constant vectors, use generic constant pool lowering.  Otherwise,
3306   // we'd have to materialize constants in GPRs just to move them into the
3307   // vector.
3308   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3309       ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
3310     return SDValue();
3311 
3312   assert((!VT.isFloatingPoint() ||
3313           VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3314          "Illegal type which will result in reserved encoding");
3315 
3316   const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3317 
3318   SDValue Vec = DAG.getUNDEF(ContainerVT);
3319   unsigned UndefCount = 0;
3320   for (const SDValue &V : Op->ops()) {
3321     if (V.isUndef()) {
3322       UndefCount++;
3323       continue;
3324     }
3325     if (UndefCount) {
3326       const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3327       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3328                           Vec, Offset, Mask, VL, Policy);
3329       UndefCount = 0;
3330     }
3331     auto OpCode =
3332       VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
3333     Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3334                       V, Mask, VL);
3335   }
3336   if (UndefCount) {
3337     const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3338     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3339                         Vec, Offset, Mask, VL, Policy);
3340   }
3341   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3342 }
3343 
3344 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3345                                    SDValue Lo, SDValue Hi, SDValue VL,
3346                                    SelectionDAG &DAG) {
3347   if (!Passthru)
3348     Passthru = DAG.getUNDEF(VT);
3349   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3350     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3351     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3352     // If Hi constant is all the same sign bit as Lo, lower this as a custom
3353     // node in order to try and match RVV vector/scalar instructions.
3354     if ((LoC >> 31) == HiC)
3355       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3356 
3357     // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
3358     // vmv.v.x whose EEW = 32 to lower it.
3359     if (LoC == HiC && isAllOnesConstant(VL)) {
3360       MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3361       // TODO: if vl <= min(VLMAX), we can also do this. But we could not
3362       // access the subtarget here now.
3363       auto InterVec = DAG.getNode(
3364           RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
3365                                   DAG.getRegister(RISCV::X0, MVT::i32));
3366       return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3367     }
3368   }
3369 
3370   // Fall back to a stack store and stride x0 vector load.
3371   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3372                      Hi, VL);
3373 }
3374 
3375 // Called by type legalization to handle splat of i64 on RV32.
3376 // FIXME: We can optimize this when the type has sign or zero bits in one
3377 // of the halves.
3378 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3379                                    SDValue Scalar, SDValue VL,
3380                                    SelectionDAG &DAG) {
3381   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3382   SDValue Lo, Hi;
3383   std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3384   return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3385 }
3386 
3387 // This function lowers a splat of a scalar operand Splat with the vector
3388 // length VL. It ensures the final sequence is type legal, which is useful when
3389 // lowering a splat after type legalization.
3390 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3391                                 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3392                                 const RISCVSubtarget &Subtarget) {
3393   bool HasPassthru = Passthru && !Passthru.isUndef();
3394   if (!HasPassthru && !Passthru)
3395     Passthru = DAG.getUNDEF(VT);
3396   if (VT.isFloatingPoint()) {
3397     // If VL is 1, we could use vfmv.s.f.
3398     if (isOneConstant(VL))
3399       return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
3400     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3401   }
3402 
3403   MVT XLenVT = Subtarget.getXLenVT();
3404 
3405   // Simplest case is that the operand needs to be promoted to XLenVT.
3406   if (Scalar.getValueType().bitsLE(XLenVT)) {
3407     // If the operand is a constant, sign extend to increase our chances
3408     // of being able to use a .vi instruction. ANY_EXTEND would become a
3409     // a zero extend and the simm5 check in isel would fail.
3410     // FIXME: Should we ignore the upper bits in isel instead?
3411     unsigned ExtOpc =
3412         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3413     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3414     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
3415     // If VL is 1 and the scalar value won't benefit from immediate, we could
3416     // use vmv.s.x.
3417     if (isOneConstant(VL) &&
3418         (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
3419       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
3420     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3421   }
3422 
3423   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3424          "Unexpected scalar for splat lowering!");
3425 
3426   if (isOneConstant(VL) && isNullConstant(Scalar))
3427     return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3428                        DAG.getConstant(0, DL, XLenVT), VL);
3429 
3430   // Otherwise use the more complicated splatting algorithm.
3431   return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3432 }
3433 
3434 static MVT getLMUL1VT(MVT VT) {
3435   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3436          "Unexpected vector MVT");
3437   return MVT::getScalableVectorVT(
3438       VT.getVectorElementType(),
3439       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
3440 }
3441 
3442 // This function lowers an insert of a scalar operand Scalar into lane
3443 // 0 of the vector regardless of the value of VL.  The contents of the
3444 // remaining lanes of the result vector are unspecified.  VL is assumed
3445 // to be non-zero.
3446 static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
3447                                  const SDLoc &DL, SelectionDAG &DAG,
3448                                  const RISCVSubtarget &Subtarget) {
3449   const MVT XLenVT = Subtarget.getXLenVT();
3450 
3451   SDValue Passthru = DAG.getUNDEF(VT);
3452   if (VT.isFloatingPoint()) {
3453     // TODO: Use vmv.v.i for appropriate constants
3454     // Use M1 or smaller to avoid over constraining register allocation
3455     const MVT M1VT = getLMUL1VT(VT);
3456     auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3457     SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT,
3458                                  DAG.getUNDEF(InnerVT), Scalar, VL);
3459     if (VT != InnerVT)
3460       Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3461                            DAG.getUNDEF(VT),
3462                            Result, DAG.getConstant(0, DL, XLenVT));
3463     return Result;
3464   }
3465 
3466 
3467   // Avoid the tricky legalization cases by falling back to using the
3468   // splat code which already handles it gracefully.
3469   if (!Scalar.getValueType().bitsLE(XLenVT))
3470     return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3471                             DAG.getConstant(1, DL, XLenVT),
3472                             VT, DL, DAG, Subtarget);
3473 
3474   // If the operand is a constant, sign extend to increase our chances
3475   // of being able to use a .vi instruction. ANY_EXTEND would become a
3476   // a zero extend and the simm5 check in isel would fail.
3477   // FIXME: Should we ignore the upper bits in isel instead?
3478   unsigned ExtOpc =
3479     isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3480   Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3481   // We use a vmv.v.i if possible.  We limit this to LMUL1.  LMUL2 or
3482   // higher would involve overly constraining the register allocator for
3483   // no purpose.
3484   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar)) {
3485     if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3486         VT.bitsLE(getLMUL1VT(VT)))
3487       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3488   }
3489   // Use M1 or smaller to avoid over constraining register allocation
3490   const MVT M1VT = getLMUL1VT(VT);
3491   auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT;
3492   SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT,
3493                                DAG.getUNDEF(InnerVT), Scalar, VL);
3494   if (VT != InnerVT)
3495     Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3496                          DAG.getUNDEF(VT),
3497                          Result, DAG.getConstant(0, DL, XLenVT));
3498   return Result;
3499 }
3500 
3501 // Is this a shuffle extracts either the even or odd elements of a vector?
3502 // That is, specifically, either (a) or (b) below.
3503 // t34: v8i8 = extract_subvector t11, Constant:i64<0>
3504 // t33: v8i8 = extract_subvector t11, Constant:i64<8>
3505 // a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3506 // b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3507 // Returns {Src Vector, Even Elements} om success
3508 static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3509                                   SDValue V2, ArrayRef<int> Mask,
3510                                   const RISCVSubtarget &Subtarget) {
3511   // Need to be able to widen the vector.
3512   if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3513     return false;
3514 
3515   // Both input must be extracts.
3516   if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
3517       V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3518     return false;
3519 
3520   // Extracting from the same source.
3521   SDValue Src = V1.getOperand(0);
3522   if (Src != V2.getOperand(0))
3523     return false;
3524 
3525   // Src needs to have twice the number of elements.
3526   if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3527     return false;
3528 
3529   // The extracts must extract the two halves of the source.
3530   if (V1.getConstantOperandVal(1) != 0 ||
3531       V2.getConstantOperandVal(1) != Mask.size())
3532     return false;
3533 
3534   // First index must be the first even or odd element from V1.
3535   if (Mask[0] != 0 && Mask[0] != 1)
3536     return false;
3537 
3538   // The others must increase by 2 each time.
3539   // TODO: Support undef elements?
3540   for (unsigned i = 1; i != Mask.size(); ++i)
3541     if (Mask[i] != Mask[i - 1] + 2)
3542       return false;
3543 
3544   return true;
3545 }
3546 
3547 /// Is this shuffle interleaving contiguous elements from one vector into the
3548 /// even elements and contiguous elements from another vector into the odd
3549 /// elements. \p EvenSrc will contain the element that should be in the first
3550 /// even element. \p OddSrc will contain the element that should be in the first
3551 /// odd element. These can be the first element in a source or the element half
3552 /// way through the source.
3553 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
3554                                 int &OddSrc, const RISCVSubtarget &Subtarget) {
3555   // We need to be able to widen elements to the next larger integer type.
3556   if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
3557     return false;
3558 
3559   int Size = Mask.size();
3560   int NumElts = VT.getVectorNumElements();
3561   assert(Size == (int)NumElts && "Unexpected mask size");
3562 
3563   SmallVector<unsigned, 2> StartIndexes;
3564   if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
3565     return false;
3566 
3567   EvenSrc = StartIndexes[0];
3568   OddSrc = StartIndexes[1];
3569 
3570   // One source should be low half of first vector.
3571   if (EvenSrc != 0 && OddSrc != 0)
3572     return false;
3573 
3574   // Subvectors will be subtracted from either at the start of the two input
3575   // vectors, or at the start and middle of the first vector if it's an unary
3576   // interleave.
3577   // In both cases, HalfNumElts will be extracted.
3578   // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
3579   // we'll create an illegal extract_subvector.
3580   // FIXME: We could support other values using a slidedown first.
3581   int HalfNumElts = NumElts / 2;
3582   return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
3583 }
3584 
3585 /// Match shuffles that concatenate two vectors, rotate the concatenation,
3586 /// and then extract the original number of elements from the rotated result.
3587 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
3588 /// returned rotation amount is for a rotate right, where elements move from
3589 /// higher elements to lower elements. \p LoSrc indicates the first source
3590 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
3591 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
3592 /// 0 or 1 if a rotation is found.
3593 ///
3594 /// NOTE: We talk about rotate to the right which matches how bit shift and
3595 /// rotate instructions are described where LSBs are on the right, but LLVM IR
3596 /// and the table below write vectors with the lowest elements on the left.
3597 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
3598   int Size = Mask.size();
3599 
3600   // We need to detect various ways of spelling a rotation:
3601   //   [11, 12, 13, 14, 15,  0,  1,  2]
3602   //   [-1, 12, 13, 14, -1, -1,  1, -1]
3603   //   [-1, -1, -1, -1, -1, -1,  1,  2]
3604   //   [ 3,  4,  5,  6,  7,  8,  9, 10]
3605   //   [-1,  4,  5,  6, -1, -1,  9, -1]
3606   //   [-1,  4,  5,  6, -1, -1, -1, -1]
3607   int Rotation = 0;
3608   LoSrc = -1;
3609   HiSrc = -1;
3610   for (int i = 0; i != Size; ++i) {
3611     int M = Mask[i];
3612     if (M < 0)
3613       continue;
3614 
3615     // Determine where a rotate vector would have started.
3616     int StartIdx = i - (M % Size);
3617     // The identity rotation isn't interesting, stop.
3618     if (StartIdx == 0)
3619       return -1;
3620 
3621     // If we found the tail of a vector the rotation must be the missing
3622     // front. If we found the head of a vector, it must be how much of the
3623     // head.
3624     int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
3625 
3626     if (Rotation == 0)
3627       Rotation = CandidateRotation;
3628     else if (Rotation != CandidateRotation)
3629       // The rotations don't match, so we can't match this mask.
3630       return -1;
3631 
3632     // Compute which value this mask is pointing at.
3633     int MaskSrc = M < Size ? 0 : 1;
3634 
3635     // Compute which of the two target values this index should be assigned to.
3636     // This reflects whether the high elements are remaining or the low elemnts
3637     // are remaining.
3638     int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3639 
3640     // Either set up this value if we've not encountered it before, or check
3641     // that it remains consistent.
3642     if (TargetSrc < 0)
3643       TargetSrc = MaskSrc;
3644     else if (TargetSrc != MaskSrc)
3645       // This may be a rotation, but it pulls from the inputs in some
3646       // unsupported interleaving.
3647       return -1;
3648   }
3649 
3650   // Check that we successfully analyzed the mask, and normalize the results.
3651   assert(Rotation != 0 && "Failed to locate a viable rotation!");
3652   assert((LoSrc >= 0 || HiSrc >= 0) &&
3653          "Failed to find a rotated input vector!");
3654 
3655   return Rotation;
3656 }
3657 
3658 // Lower a deinterleave shuffle to vnsrl.
3659 // [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
3660 //                          -> [p, q, r, s] (EvenElts == false)
3661 // VT is the type of the vector to return, <[vscale x ]n x ty>
3662 // Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
3663 static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
3664                                        bool EvenElts,
3665                                        const RISCVSubtarget &Subtarget,
3666                                        SelectionDAG &DAG) {
3667   // The result is a vector of type <m x n x ty>
3668   MVT ContainerVT = VT;
3669   // Convert fixed vectors to scalable if needed
3670   if (ContainerVT.isFixedLengthVector()) {
3671     assert(Src.getSimpleValueType().isFixedLengthVector());
3672     ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
3673 
3674     // The source is a vector of type <m x n*2 x ty>
3675     MVT SrcContainerVT =
3676         MVT::getVectorVT(ContainerVT.getVectorElementType(),
3677                          ContainerVT.getVectorElementCount() * 2);
3678     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3679   }
3680 
3681   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3682 
3683   // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
3684   // This also converts FP to int.
3685   unsigned EltBits = ContainerVT.getScalarSizeInBits();
3686   MVT WideSrcContainerVT = MVT::getVectorVT(
3687       MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
3688   Src = DAG.getBitcast(WideSrcContainerVT, Src);
3689 
3690   // The integer version of the container type.
3691   MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
3692 
3693   // If we want even elements, then the shift amount is 0. Otherwise, shift by
3694   // the original element size.
3695   unsigned Shift = EvenElts ? 0 : EltBits;
3696   SDValue SplatShift = DAG.getNode(
3697       RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
3698       DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
3699   SDValue Res =
3700       DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
3701                   DAG.getUNDEF(IntContainerVT), TrueMask, VL);
3702   // Cast back to FP if needed.
3703   Res = DAG.getBitcast(ContainerVT, Res);
3704 
3705   if (VT.isFixedLengthVector())
3706     Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
3707   return Res;
3708 }
3709 
3710 // Lower the following shuffle to vslidedown.
3711 // a)
3712 // t49: v8i8 = extract_subvector t13, Constant:i64<0>
3713 // t109: v8i8 = extract_subvector t13, Constant:i64<8>
3714 // t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
3715 // b)
3716 // t69: v16i16 = extract_subvector t68, Constant:i64<0>
3717 // t23: v8i16 = extract_subvector t69, Constant:i64<0>
3718 // t29: v4i16 = extract_subvector t23, Constant:i64<4>
3719 // t26: v8i16 = extract_subvector t69, Constant:i64<8>
3720 // t30: v4i16 = extract_subvector t26, Constant:i64<0>
3721 // t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
3722 static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
3723                                                SDValue V1, SDValue V2,
3724                                                ArrayRef<int> Mask,
3725                                                const RISCVSubtarget &Subtarget,
3726                                                SelectionDAG &DAG) {
3727   auto findNonEXTRACT_SUBVECTORParent =
3728       [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
3729     uint64_t Offset = 0;
3730     while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
3731            // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
3732            // a scalable vector. But we don't want to match the case.
3733            Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3734       Offset += Parent.getConstantOperandVal(1);
3735       Parent = Parent.getOperand(0);
3736     }
3737     return std::make_pair(Parent, Offset);
3738   };
3739 
3740   auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3741   auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3742 
3743   // Extracting from the same source.
3744   SDValue Src = V1Src;
3745   if (Src != V2Src)
3746     return SDValue();
3747 
3748   // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
3749   SmallVector<int, 16> NewMask(Mask);
3750   for (size_t i = 0; i != NewMask.size(); ++i) {
3751     if (NewMask[i] == -1)
3752       continue;
3753 
3754     if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
3755       NewMask[i] = NewMask[i] + V1IndexOffset;
3756     } else {
3757       // Minus NewMask.size() is needed. Otherwise, the b case would be
3758       // <5,6,7,12> instead of <5,6,7,8>.
3759       NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
3760     }
3761   }
3762 
3763   // First index must be known and non-zero. It will be used as the slidedown
3764   // amount.
3765   if (NewMask[0] <= 0)
3766     return SDValue();
3767 
3768   // NewMask is also continuous.
3769   for (unsigned i = 1; i != NewMask.size(); ++i)
3770     if (NewMask[i - 1] + 1 != NewMask[i])
3771       return SDValue();
3772 
3773   MVT XLenVT = Subtarget.getXLenVT();
3774   MVT SrcVT = Src.getSimpleValueType();
3775   MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3776   auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3777   SDValue Slidedown =
3778       getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3779                     convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
3780                     DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
3781   return DAG.getNode(
3782       ISD::EXTRACT_SUBVECTOR, DL, VT,
3783       convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
3784       DAG.getConstant(0, DL, XLenVT));
3785 }
3786 
3787 // Because vslideup leaves the destination elements at the start intact, we can
3788 // use it to perform shuffles that insert subvectors:
3789 //
3790 // vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
3791 // ->
3792 // vsetvli zero, 8, e8, mf2, ta, ma
3793 // vslideup.vi v8, v9, 4
3794 //
3795 // vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
3796 // ->
3797 // vsetvli zero, 5, e8, mf2, tu, ma
3798 // vslideup.v1 v8, v9, 2
3799 static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
3800                                              SDValue V1, SDValue V2,
3801                                              ArrayRef<int> Mask,
3802                                              const RISCVSubtarget &Subtarget,
3803                                              SelectionDAG &DAG) {
3804   unsigned NumElts = VT.getVectorNumElements();
3805   int NumSubElts, Index;
3806   if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
3807                                                 Index))
3808     return SDValue();
3809 
3810   bool OpsSwapped = Mask[Index] < (int)NumElts;
3811   SDValue InPlace = OpsSwapped ? V2 : V1;
3812   SDValue ToInsert = OpsSwapped ? V1 : V2;
3813 
3814   MVT XLenVT = Subtarget.getXLenVT();
3815   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3816   auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
3817   // We slide up by the index that the subvector is being inserted at, and set
3818   // VL to the index + the number of elements being inserted.
3819   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
3820   // If the we're adding a suffix to the in place vector, i.e. inserting right
3821   // up to the very end of it, then we don't actually care about the tail.
3822   if (NumSubElts + Index >= (int)NumElts)
3823     Policy |= RISCVII::TAIL_AGNOSTIC;
3824 
3825   InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
3826   ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
3827   SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
3828 
3829   SDValue Res;
3830   // If we're inserting into the lowest elements, use a tail undisturbed
3831   // vmv.v.v.
3832   if (Index == 0)
3833     Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
3834                       VL);
3835   else
3836     Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
3837                       DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
3838   return convertFromScalableVector(VT, Res, DAG, Subtarget);
3839 }
3840 
3841 /// Match v(f)slide1up/down idioms.  These operations involve sliding
3842 /// N-1 elements to make room for an inserted scalar at one end.
3843 static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
3844                                             SDValue V1, SDValue V2,
3845                                             ArrayRef<int> Mask,
3846                                             const RISCVSubtarget &Subtarget,
3847                                             SelectionDAG &DAG) {
3848   bool OpsSwapped = false;
3849   if (!isa<BuildVectorSDNode>(V1)) {
3850     if (!isa<BuildVectorSDNode>(V2))
3851       return SDValue();
3852     std::swap(V1, V2);
3853     OpsSwapped = true;
3854   }
3855   SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
3856   if (!Splat)
3857     return SDValue();
3858 
3859   // Return true if the mask could describe a slide of Mask.size() - 1
3860   // elements from concat_vector(V1, V2)[Base:] to [Offset:].
3861   auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
3862     const unsigned S = (Offset > 0) ? 0 : -Offset;
3863     const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
3864     for (unsigned i = S; i != E; ++i)
3865       if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
3866         return false;
3867     return true;
3868   };
3869 
3870   const unsigned NumElts = VT.getVectorNumElements();
3871   bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
3872   if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
3873     return SDValue();
3874 
3875   const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
3876   // Inserted lane must come from splat, undef scalar is legal but not profitable.
3877   if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
3878     return SDValue();
3879 
3880   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3881   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3882   auto OpCode = IsVSlidedown ?
3883     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
3884     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
3885   auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
3886                          DAG.getUNDEF(ContainerVT),
3887                          convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
3888                          Splat, TrueMask, VL);
3889   return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3890 }
3891 
3892 // Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
3893 // to create an interleaved vector of <[vscale x] n*2 x ty>.
3894 // This requires that the size of ty is less than the subtarget's maximum ELEN.
3895 static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
3896                                      const SDLoc &DL, SelectionDAG &DAG,
3897                                      const RISCVSubtarget &Subtarget) {
3898   MVT VecVT = EvenV.getSimpleValueType();
3899   MVT VecContainerVT = VecVT; // <vscale x n x ty>
3900   // Convert fixed vectors to scalable if needed
3901   if (VecContainerVT.isFixedLengthVector()) {
3902     VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
3903     EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
3904     OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
3905   }
3906 
3907   assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
3908 
3909   // We're working with a vector of the same size as the resulting
3910   // interleaved vector, but with half the number of elements and
3911   // twice the SEW (Hence the restriction on not using the maximum
3912   // ELEN)
3913   MVT WideVT =
3914       MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
3915                        VecVT.getVectorElementCount());
3916   MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
3917   if (WideContainerVT.isFixedLengthVector())
3918     WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
3919 
3920   // Bitcast the input vectors to integers in case they are FP
3921   VecContainerVT = VecContainerVT.changeTypeToInteger();
3922   EvenV = DAG.getBitcast(VecContainerVT, EvenV);
3923   OddV = DAG.getBitcast(VecContainerVT, OddV);
3924 
3925   auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
3926   SDValue Passthru = DAG.getUNDEF(WideContainerVT);
3927 
3928   // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
3929   // vwaddu.vv
3930   SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
3931                                     EvenV, OddV, Passthru, Mask, VL);
3932 
3933   // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
3934   SDValue AllOnesVec = DAG.getSplatVector(
3935       VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
3936   SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
3937                                 AllOnesVec, Passthru, Mask, VL);
3938 
3939   // Add the two together so we get
3940   //   (OddV * 0xff...ff) + (OddV + EvenV)
3941   // = (OddV * 0x100...00) + EvenV
3942   // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
3943   // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
3944   Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
3945                             OddsMul, Passthru, Mask, VL);
3946 
3947   // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
3948   MVT ResultContainerVT = MVT::getVectorVT(
3949       VecVT.getVectorElementType(), // Make sure to use original type
3950       VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
3951   Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
3952 
3953   // Convert back to a fixed vector if needed
3954   MVT ResultVT =
3955       MVT::getVectorVT(VecVT.getVectorElementType(),
3956                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
3957   if (ResultVT.isFixedLengthVector())
3958     Interleaved =
3959         convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
3960 
3961   return Interleaved;
3962 }
3963 
3964 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
3965                                    const RISCVSubtarget &Subtarget) {
3966   SDValue V1 = Op.getOperand(0);
3967   SDValue V2 = Op.getOperand(1);
3968   SDLoc DL(Op);
3969   MVT XLenVT = Subtarget.getXLenVT();
3970   MVT VT = Op.getSimpleValueType();
3971   unsigned NumElts = VT.getVectorNumElements();
3972   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3973 
3974   // Promote i1 shuffle to i8 shuffle.
3975   if (VT.getVectorElementType() == MVT::i1) {
3976     MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
3977     V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
3978     V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
3979                       : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
3980     SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
3981     return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
3982                         ISD::SETNE);
3983   }
3984 
3985   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3986 
3987   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3988 
3989   if (SVN->isSplat()) {
3990     const int Lane = SVN->getSplatIndex();
3991     if (Lane >= 0) {
3992       MVT SVT = VT.getVectorElementType();
3993 
3994       // Turn splatted vector load into a strided load with an X0 stride.
3995       SDValue V = V1;
3996       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
3997       // with undef.
3998       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
3999       int Offset = Lane;
4000       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4001         int OpElements =
4002             V.getOperand(0).getSimpleValueType().getVectorNumElements();
4003         V = V.getOperand(Offset / OpElements);
4004         Offset %= OpElements;
4005       }
4006 
4007       // We need to ensure the load isn't atomic or volatile.
4008       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4009         auto *Ld = cast<LoadSDNode>(V);
4010         Offset *= SVT.getStoreSize();
4011         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
4012                                                    TypeSize::Fixed(Offset), DL);
4013 
4014         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4015         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4016           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4017           SDValue IntID =
4018               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4019           SDValue Ops[] = {Ld->getChain(),
4020                            IntID,
4021                            DAG.getUNDEF(ContainerVT),
4022                            NewAddr,
4023                            DAG.getRegister(RISCV::X0, XLenVT),
4024                            VL};
4025           SDValue NewLoad = DAG.getMemIntrinsicNode(
4026               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4027               DAG.getMachineFunction().getMachineMemOperand(
4028                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4029           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4030           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4031         }
4032 
4033         // Otherwise use a scalar load and splat. This will give the best
4034         // opportunity to fold a splat into the operation. ISel can turn it into
4035         // the x0 strided load if we aren't able to fold away the select.
4036         if (SVT.isFloatingPoint())
4037           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4038                           Ld->getPointerInfo().getWithOffset(Offset),
4039                           Ld->getOriginalAlign(),
4040                           Ld->getMemOperand()->getFlags());
4041         else
4042           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4043                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
4044                              Ld->getOriginalAlign(),
4045                              Ld->getMemOperand()->getFlags());
4046         DAG.makeEquivalentMemoryOrdering(Ld, V);
4047 
4048         unsigned Opc =
4049             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4050         SDValue Splat =
4051             DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4052         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4053       }
4054 
4055       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4056       assert(Lane < (int)NumElts && "Unexpected lane!");
4057       SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4058                                    V1, DAG.getConstant(Lane, DL, XLenVT),
4059                                    DAG.getUNDEF(ContainerVT), TrueMask, VL);
4060       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4061     }
4062   }
4063 
4064   ArrayRef<int> Mask = SVN->getMask();
4065 
4066   if (SDValue V =
4067           lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4068     return V;
4069 
4070   if (SDValue V =
4071           lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4072     return V;
4073 
4074   // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4075   // be undef which can be handled with a single SLIDEDOWN/UP.
4076   int LoSrc, HiSrc;
4077   int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4078   if (Rotation > 0) {
4079     SDValue LoV, HiV;
4080     if (LoSrc >= 0) {
4081       LoV = LoSrc == 0 ? V1 : V2;
4082       LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4083     }
4084     if (HiSrc >= 0) {
4085       HiV = HiSrc == 0 ? V1 : V2;
4086       HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
4087     }
4088 
4089     // We found a rotation. We need to slide HiV down by Rotation. Then we need
4090     // to slide LoV up by (NumElts - Rotation).
4091     unsigned InvRotate = NumElts - Rotation;
4092 
4093     SDValue Res = DAG.getUNDEF(ContainerVT);
4094     if (HiV) {
4095       // Even though we could use a smaller VL, don't to avoid a vsetivli
4096       // toggle.
4097       Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
4098                           DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
4099     }
4100     if (LoV)
4101       Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
4102                         DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
4103                         RISCVII::TAIL_AGNOSTIC);
4104 
4105     return convertFromScalableVector(VT, Res, DAG, Subtarget);
4106   }
4107 
4108   // If this is a deinterleave and we can widen the vector, then we can use
4109   // vnsrl to deinterleave.
4110   if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
4111     return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
4112                                    Subtarget, DAG);
4113   }
4114 
4115   if (SDValue V =
4116           lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
4117     return V;
4118 
4119   // Detect an interleave shuffle and lower to
4120   // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
4121   int EvenSrc, OddSrc;
4122   if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
4123     // Extract the halves of the vectors.
4124     MVT HalfVT = VT.getHalfNumVectorElementsVT();
4125 
4126     int Size = Mask.size();
4127     SDValue EvenV, OddV;
4128     assert(EvenSrc >= 0 && "Undef source?");
4129     EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
4130     EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
4131                         DAG.getConstant(EvenSrc % Size, DL, XLenVT));
4132 
4133     assert(OddSrc >= 0 && "Undef source?");
4134     OddV = (OddSrc / Size) == 0 ? V1 : V2;
4135     OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
4136                        DAG.getConstant(OddSrc % Size, DL, XLenVT));
4137 
4138     return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
4139   }
4140 
4141   // Detect shuffles which can be re-expressed as vector selects; these are
4142   // shuffles in which each element in the destination is taken from an element
4143   // at the corresponding index in either source vectors.
4144   bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
4145     int MaskIndex = MaskIdx.value();
4146     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
4147   });
4148 
4149   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
4150 
4151   SmallVector<SDValue> MaskVals;
4152   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
4153   // merged with a second vrgather.
4154   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4155 
4156   // By default we preserve the original operand order, and use a mask to
4157   // select LHS as true and RHS as false. However, since RVV vector selects may
4158   // feature splats but only on the LHS, we may choose to invert our mask and
4159   // instead select between RHS and LHS.
4160   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
4161   bool InvertMask = IsSelect == SwapOps;
4162 
4163   // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4164   // half.
4165   DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4166 
4167   // Now construct the mask that will be used by the vselect or blended
4168   // vrgather operation. For vrgathers, construct the appropriate indices into
4169   // each vector.
4170   for (int MaskIndex : Mask) {
4171     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
4172     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4173     if (!IsSelect) {
4174       bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4175       GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4176                                      ? DAG.getConstant(MaskIndex, DL, XLenVT)
4177                                      : DAG.getUNDEF(XLenVT));
4178       GatherIndicesRHS.push_back(
4179           IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4180                             : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4181       if (IsLHSOrUndefIndex && MaskIndex >= 0)
4182         ++LHSIndexCounts[MaskIndex];
4183       if (!IsLHSOrUndefIndex)
4184         ++RHSIndexCounts[MaskIndex - NumElts];
4185     }
4186   }
4187 
4188   if (SwapOps) {
4189     std::swap(V1, V2);
4190     std::swap(GatherIndicesLHS, GatherIndicesRHS);
4191   }
4192 
4193   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
4194   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4195   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4196 
4197   if (IsSelect)
4198     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
4199 
4200   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
4201     // On such a large vector we're unable to use i8 as the index type.
4202     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
4203     // may involve vector splitting if we're already at LMUL=8, or our
4204     // user-supplied maximum fixed-length LMUL.
4205     return SDValue();
4206   }
4207 
4208   unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
4209   unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
4210   MVT IndexVT = VT.changeTypeToInteger();
4211   // Since we can't introduce illegal index types at this stage, use i16 and
4212   // vrgatherei16 if the corresponding index type for plain vrgather is greater
4213   // than XLenVT.
4214   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
4215     GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
4216     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
4217   }
4218 
4219   MVT IndexContainerVT =
4220       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
4221 
4222   SDValue Gather;
4223   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
4224   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
4225   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
4226     Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
4227                               Subtarget);
4228   } else {
4229     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4230     // If only one index is used, we can use a "splat" vrgather.
4231     // TODO: We can splat the most-common index and fix-up any stragglers, if
4232     // that's beneficial.
4233     if (LHSIndexCounts.size() == 1) {
4234       int SplatIndex = LHSIndexCounts.begin()->getFirst();
4235       Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
4236                            DAG.getConstant(SplatIndex, DL, XLenVT),
4237                            DAG.getUNDEF(ContainerVT), TrueMask, VL);
4238     } else {
4239       SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
4240       LHSIndices =
4241           convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
4242 
4243       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
4244                            DAG.getUNDEF(ContainerVT), TrueMask, VL);
4245     }
4246   }
4247 
4248   // If a second vector operand is used by this shuffle, blend it in with an
4249   // additional vrgather.
4250   if (!V2.isUndef()) {
4251     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4252 
4253     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
4254     SelectMask =
4255         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
4256 
4257     // If only one index is used, we can use a "splat" vrgather.
4258     // TODO: We can splat the most-common index and fix-up any stragglers, if
4259     // that's beneficial.
4260     if (RHSIndexCounts.size() == 1) {
4261       int SplatIndex = RHSIndexCounts.begin()->getFirst();
4262       Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
4263                            DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
4264                            SelectMask, VL);
4265     } else {
4266       SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
4267       RHSIndices =
4268           convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
4269       Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
4270                            SelectMask, VL);
4271     }
4272   }
4273 
4274   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4275 }
4276 
4277 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
4278   // Support splats for any type. These should type legalize well.
4279   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
4280     return true;
4281 
4282   // Only support legal VTs for other shuffles for now.
4283   if (!isTypeLegal(VT))
4284     return false;
4285 
4286   MVT SVT = VT.getSimpleVT();
4287 
4288   // Not for i1 vectors.
4289   if (SVT.getScalarType() == MVT::i1)
4290     return false;
4291 
4292   int Dummy1, Dummy2;
4293   return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
4294          isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
4295 }
4296 
4297 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
4298 // the exponent.
4299 SDValue
4300 RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
4301                                                SelectionDAG &DAG) const {
4302   MVT VT = Op.getSimpleValueType();
4303   unsigned EltSize = VT.getScalarSizeInBits();
4304   SDValue Src = Op.getOperand(0);
4305   SDLoc DL(Op);
4306   MVT ContainerVT = VT;
4307 
4308   SDValue Mask, VL;
4309   if (Op->isVPOpcode()) {
4310     Mask = Op.getOperand(1);
4311     if (VT.isFixedLengthVector())
4312       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
4313                                      Subtarget);
4314     VL = Op.getOperand(2);
4315   }
4316 
4317   // We choose FP type that can represent the value if possible. Otherwise, we
4318   // use rounding to zero conversion for correct exponent of the result.
4319   // TODO: Use f16 for i8 when possible?
4320   MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
4321   if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
4322     FloatEltVT = MVT::f32;
4323   MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
4324 
4325   // Legal types should have been checked in the RISCVTargetLowering
4326   // constructor.
4327   // TODO: Splitting may make sense in some cases.
4328   assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
4329          "Expected legal float type!");
4330 
4331   // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
4332   // The trailing zero count is equal to log2 of this single bit value.
4333   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
4334     SDValue Neg = DAG.getNegative(Src, DL, VT);
4335     Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
4336   } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
4337     SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
4338                               Src, Mask, VL);
4339     Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
4340   }
4341 
4342   // We have a legal FP type, convert to it.
4343   SDValue FloatVal;
4344   if (FloatVT.bitsGT(VT)) {
4345     if (Op->isVPOpcode())
4346       FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
4347     else
4348       FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
4349   } else {
4350     // Use RTZ to avoid rounding influencing exponent of FloatVal.
4351     if (VT.isFixedLengthVector()) {
4352       ContainerVT = getContainerForFixedLengthVector(VT);
4353       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
4354     }
4355     if (!Op->isVPOpcode())
4356       std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4357     SDValue RTZRM =
4358         DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
4359     MVT ContainerFloatVT =
4360         MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
4361     FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
4362                            Src, Mask, RTZRM, VL);
4363     if (VT.isFixedLengthVector())
4364       FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
4365   }
4366   // Bitcast to integer and shift the exponent to the LSB.
4367   EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
4368   SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
4369   unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
4370 
4371   SDValue Exp;
4372   // Restore back to original type. Truncation after SRL is to generate vnsrl.
4373   if (Op->isVPOpcode()) {
4374     Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
4375                       DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
4376     Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
4377   } else {
4378     Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
4379                       DAG.getConstant(ShiftAmt, DL, IntVT));
4380     if (IntVT.bitsLT(VT))
4381       Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
4382     else if (IntVT.bitsGT(VT))
4383       Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
4384   }
4385 
4386   // The exponent contains log2 of the value in biased form.
4387   unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
4388   // For trailing zeros, we just need to subtract the bias.
4389   if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
4390     return DAG.getNode(ISD::SUB, DL, VT, Exp,
4391                        DAG.getConstant(ExponentBias, DL, VT));
4392   if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
4393     return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
4394                        DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
4395 
4396   // For leading zeros, we need to remove the bias and convert from log2 to
4397   // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
4398   unsigned Adjust = ExponentBias + (EltSize - 1);
4399   SDValue Res;
4400   if (Op->isVPOpcode())
4401     Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
4402                       Mask, VL);
4403   else
4404     Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
4405 
4406   // The above result with zero input equals to Adjust which is greater than
4407   // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
4408   if (Op.getOpcode() == ISD::CTLZ)
4409     Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
4410   else if (Op.getOpcode() == ISD::VP_CTLZ)
4411     Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
4412                       DAG.getConstant(EltSize, DL, VT), Mask, VL);
4413   return Res;
4414 }
4415 
4416 // While RVV has alignment restrictions, we should always be able to load as a
4417 // legal equivalently-sized byte-typed vector instead. This method is
4418 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
4419 // the load is already correctly-aligned, it returns SDValue().
4420 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
4421                                                     SelectionDAG &DAG) const {
4422   auto *Load = cast<LoadSDNode>(Op);
4423   assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
4424 
4425   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4426                                      Load->getMemoryVT(),
4427                                      *Load->getMemOperand()))
4428     return SDValue();
4429 
4430   SDLoc DL(Op);
4431   MVT VT = Op.getSimpleValueType();
4432   unsigned EltSizeBits = VT.getScalarSizeInBits();
4433   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4434          "Unexpected unaligned RVV load type");
4435   MVT NewVT =
4436       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4437   assert(NewVT.isValid() &&
4438          "Expecting equally-sized RVV vector types to be legal");
4439   SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
4440                           Load->getPointerInfo(), Load->getOriginalAlign(),
4441                           Load->getMemOperand()->getFlags());
4442   return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
4443 }
4444 
4445 // While RVV has alignment restrictions, we should always be able to store as a
4446 // legal equivalently-sized byte-typed vector instead. This method is
4447 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
4448 // returns SDValue() if the store is already correctly aligned.
4449 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
4450                                                      SelectionDAG &DAG) const {
4451   auto *Store = cast<StoreSDNode>(Op);
4452   assert(Store && Store->getValue().getValueType().isVector() &&
4453          "Expected vector store");
4454 
4455   if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
4456                                      Store->getMemoryVT(),
4457                                      *Store->getMemOperand()))
4458     return SDValue();
4459 
4460   SDLoc DL(Op);
4461   SDValue StoredVal = Store->getValue();
4462   MVT VT = StoredVal.getSimpleValueType();
4463   unsigned EltSizeBits = VT.getScalarSizeInBits();
4464   assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
4465          "Unexpected unaligned RVV store type");
4466   MVT NewVT =
4467       MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
4468   assert(NewVT.isValid() &&
4469          "Expecting equally-sized RVV vector types to be legal");
4470   StoredVal = DAG.getBitcast(NewVT, StoredVal);
4471   return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
4472                       Store->getPointerInfo(), Store->getOriginalAlign(),
4473                       Store->getMemOperand()->getFlags());
4474 }
4475 
4476 static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
4477                              const RISCVSubtarget &Subtarget) {
4478   assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
4479 
4480   int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
4481 
4482   // All simm32 constants should be handled by isel.
4483   // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
4484   // this check redundant, but small immediates are common so this check
4485   // should have better compile time.
4486   if (isInt<32>(Imm))
4487     return Op;
4488 
4489   // We only need to cost the immediate, if constant pool lowering is enabled.
4490   if (!Subtarget.useConstantPoolForLargeInts())
4491     return Op;
4492 
4493   RISCVMatInt::InstSeq Seq =
4494       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
4495   if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
4496     return Op;
4497 
4498   // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
4499   // that if it will avoid a constant pool.
4500   // It will require an extra temporary register though.
4501   if (!DAG.shouldOptForSize()) {
4502     int64_t LoVal = SignExtend64<32>(Imm);
4503     int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
4504     if (LoVal == HiVal) {
4505       RISCVMatInt::InstSeq SeqLo =
4506           RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
4507       if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
4508         return Op;
4509     }
4510   }
4511 
4512   // Expand to a constant pool using the default expansion code.
4513   return SDValue();
4514 }
4515 
4516 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
4517                                  const RISCVSubtarget &Subtarget) {
4518   SDLoc dl(Op);
4519   AtomicOrdering FenceOrdering =
4520       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4521   SyncScope::ID FenceSSID =
4522       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4523 
4524   if (Subtarget.hasStdExtZtso()) {
4525     // The only fence that needs an instruction is a sequentially-consistent
4526     // cross-thread fence.
4527     if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4528         FenceSSID == SyncScope::System)
4529       return Op;
4530 
4531     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4532     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4533   }
4534 
4535   // singlethread fences only synchronize with signal handlers on the same
4536   // thread and thus only need to preserve instruction order, not actually
4537   // enforce memory ordering.
4538   if (FenceSSID == SyncScope::SingleThread)
4539     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4540     return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
4541 
4542   return Op;
4543 }
4544 
4545 SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
4546                                              SelectionDAG &DAG) const {
4547   SDLoc DL(Op);
4548   MVT VT = Op.getSimpleValueType();
4549   MVT XLenVT = Subtarget.getXLenVT();
4550   auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
4551   unsigned Check = CNode->getZExtValue();
4552   unsigned TDCMask = 0;
4553   if (Check & fcSNan)
4554     TDCMask |= RISCV::FPMASK_Signaling_NaN;
4555   if (Check & fcQNan)
4556     TDCMask |= RISCV::FPMASK_Quiet_NaN;
4557   if (Check & fcPosInf)
4558     TDCMask |= RISCV::FPMASK_Positive_Infinity;
4559   if (Check & fcNegInf)
4560     TDCMask |= RISCV::FPMASK_Negative_Infinity;
4561   if (Check & fcPosNormal)
4562     TDCMask |= RISCV::FPMASK_Positive_Normal;
4563   if (Check & fcNegNormal)
4564     TDCMask |= RISCV::FPMASK_Negative_Normal;
4565   if (Check & fcPosSubnormal)
4566     TDCMask |= RISCV::FPMASK_Positive_Subnormal;
4567   if (Check & fcNegSubnormal)
4568     TDCMask |= RISCV::FPMASK_Negative_Subnormal;
4569   if (Check & fcPosZero)
4570     TDCMask |= RISCV::FPMASK_Positive_Zero;
4571   if (Check & fcNegZero)
4572     TDCMask |= RISCV::FPMASK_Negative_Zero;
4573 
4574   bool IsOneBitMask = isPowerOf2_32(TDCMask);
4575 
4576   SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
4577 
4578   if (VT.isVector()) {
4579     SDValue Op0 = Op.getOperand(0);
4580     MVT VT0 = Op.getOperand(0).getSimpleValueType();
4581 
4582     if (VT.isScalableVector()) {
4583       MVT DstVT = VT0.changeVectorElementTypeToInteger();
4584       auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
4585       SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
4586                                     VL, Op->getFlags());
4587       if (IsOneBitMask)
4588         return DAG.getSetCC(DL, VT, FPCLASS,
4589                             DAG.getConstant(TDCMask, DL, DstVT),
4590                             ISD::CondCode::SETEQ);
4591       SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
4592                                 DAG.getConstant(TDCMask, DL, DstVT));
4593       return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
4594                           ISD::SETNE);
4595     }
4596 
4597     MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
4598     MVT ContainerVT = getContainerForFixedLengthVector(VT);
4599     MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
4600     auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
4601 
4602     Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
4603 
4604     SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
4605                                   Mask, VL, Op->getFlags());
4606 
4607     TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4608                            DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
4609     if (IsOneBitMask) {
4610       SDValue VMSEQ =
4611           DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4612                       {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
4613                        DAG.getUNDEF(ContainerVT), Mask, VL});
4614       return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
4615     }
4616     SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
4617                               TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
4618 
4619     SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
4620     SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
4621                             DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
4622 
4623     SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
4624                                 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
4625                                  DAG.getUNDEF(ContainerVT), Mask, VL});
4626     return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
4627   }
4628 
4629   SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
4630   SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
4631   return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
4632                       ISD::CondCode::SETNE);
4633 }
4634 
4635 // Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
4636 // operations propagate nans.
4637 static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
4638                                       const RISCVSubtarget &Subtarget) {
4639   SDLoc DL(Op);
4640   EVT VT = Op.getValueType();
4641 
4642   SDValue X = Op.getOperand(0);
4643   SDValue Y = Op.getOperand(1);
4644 
4645   MVT XLenVT = Subtarget.getXLenVT();
4646 
4647   // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
4648   // ensures that when one input is a nan, the other will also be a nan allowing
4649   // the nan to propagate. If both inputs are nan, this will swap the inputs
4650   // which is harmless.
4651   // FIXME: Handle nonans FMF and use isKnownNeverNaN.
4652   SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
4653   SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
4654 
4655   SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
4656   SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
4657 
4658   unsigned Opc =
4659       Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
4660   return DAG.getNode(Opc, DL, VT, NewX, NewY);
4661 }
4662 
4663 /// Get a RISCV target specified VL op for a given SDNode.
4664 static unsigned getRISCVVLOp(SDValue Op) {
4665 #define OP_CASE(NODE)                                                          \
4666   case ISD::NODE:                                                              \
4667     return RISCVISD::NODE##_VL;
4668   switch (Op.getOpcode()) {
4669   default:
4670     llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
4671     // clang-format off
4672   OP_CASE(ADD)
4673   OP_CASE(SUB)
4674   OP_CASE(MUL)
4675   OP_CASE(MULHS)
4676   OP_CASE(MULHU)
4677   OP_CASE(SDIV)
4678   OP_CASE(SREM)
4679   OP_CASE(UDIV)
4680   OP_CASE(UREM)
4681   OP_CASE(SHL)
4682   OP_CASE(SRA)
4683   OP_CASE(SRL)
4684   OP_CASE(SADDSAT)
4685   OP_CASE(UADDSAT)
4686   OP_CASE(SSUBSAT)
4687   OP_CASE(USUBSAT)
4688   OP_CASE(FADD)
4689   OP_CASE(FSUB)
4690   OP_CASE(FMUL)
4691   OP_CASE(FDIV)
4692   OP_CASE(FNEG)
4693   OP_CASE(FABS)
4694   OP_CASE(FSQRT)
4695   OP_CASE(SMIN)
4696   OP_CASE(SMAX)
4697   OP_CASE(UMIN)
4698   OP_CASE(UMAX)
4699   OP_CASE(FMINNUM)
4700   OP_CASE(FMAXNUM)
4701   OP_CASE(STRICT_FADD)
4702   OP_CASE(STRICT_FSUB)
4703   OP_CASE(STRICT_FMUL)
4704   OP_CASE(STRICT_FDIV)
4705   OP_CASE(STRICT_FSQRT)
4706     // clang-format on
4707 #undef OP_CASE
4708   case ISD::FMA:
4709     return RISCVISD::VFMADD_VL;
4710   case ISD::STRICT_FMA:
4711     return RISCVISD::STRICT_VFMADD_VL;
4712   case ISD::AND:
4713     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4714       return RISCVISD::VMAND_VL;
4715     return RISCVISD::AND_VL;
4716   case ISD::OR:
4717     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4718       return RISCVISD::VMOR_VL;
4719     return RISCVISD::OR_VL;
4720   case ISD::XOR:
4721     if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
4722       return RISCVISD::VMXOR_VL;
4723     return RISCVISD::XOR_VL;
4724   }
4725 }
4726 
4727 /// Return true if a RISC-V target specified op has a merge operand.
4728 static bool hasMergeOp(unsigned Opcode) {
4729   assert(Opcode > RISCVISD::FIRST_NUMBER &&
4730          Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
4731          "not a RISC-V target specific op");
4732   assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
4733          "adding target specific op should update this function");
4734   if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
4735     return true;
4736   if (Opcode == RISCVISD::FCOPYSIGN_VL)
4737     return true;
4738   if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
4739     return true;
4740   if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
4741     return true;
4742   return false;
4743 }
4744 
4745 /// Return true if a RISC-V target specified op has a mask operand.
4746 static bool hasMaskOp(unsigned Opcode) {
4747   assert(Opcode > RISCVISD::FIRST_NUMBER &&
4748          Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
4749          "not a RISC-V target specific op");
4750   assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
4751          "adding target specific op should update this function");
4752   if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
4753     return true;
4754   if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
4755     return true;
4756   if (Opcode >= RISCVISD::STRICT_FADD_VL &&
4757       Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
4758     return true;
4759   return false;
4760 }
4761 
4762 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
4763                                             SelectionDAG &DAG) const {
4764   switch (Op.getOpcode()) {
4765   default:
4766     report_fatal_error("unimplemented operand");
4767   case ISD::ATOMIC_FENCE:
4768     return LowerATOMIC_FENCE(Op, DAG, Subtarget);
4769   case ISD::GlobalAddress:
4770     return lowerGlobalAddress(Op, DAG);
4771   case ISD::BlockAddress:
4772     return lowerBlockAddress(Op, DAG);
4773   case ISD::ConstantPool:
4774     return lowerConstantPool(Op, DAG);
4775   case ISD::JumpTable:
4776     return lowerJumpTable(Op, DAG);
4777   case ISD::GlobalTLSAddress:
4778     return lowerGlobalTLSAddress(Op, DAG);
4779   case ISD::Constant:
4780     return lowerConstant(Op, DAG, Subtarget);
4781   case ISD::SELECT:
4782     return lowerSELECT(Op, DAG);
4783   case ISD::BRCOND:
4784     return lowerBRCOND(Op, DAG);
4785   case ISD::VASTART:
4786     return lowerVASTART(Op, DAG);
4787   case ISD::FRAMEADDR:
4788     return lowerFRAMEADDR(Op, DAG);
4789   case ISD::RETURNADDR:
4790     return lowerRETURNADDR(Op, DAG);
4791   case ISD::SHL_PARTS:
4792     return lowerShiftLeftParts(Op, DAG);
4793   case ISD::SRA_PARTS:
4794     return lowerShiftRightParts(Op, DAG, true);
4795   case ISD::SRL_PARTS:
4796     return lowerShiftRightParts(Op, DAG, false);
4797   case ISD::ROTL:
4798   case ISD::ROTR:
4799     assert(Subtarget.hasVendorXTHeadBb() &&
4800            !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
4801            "Unexpected custom legalization");
4802     // XTHeadBb only supports rotate by constant.
4803     if (!isa<ConstantSDNode>(Op.getOperand(1)))
4804       return SDValue();
4805     return Op;
4806   case ISD::BITCAST: {
4807     SDLoc DL(Op);
4808     EVT VT = Op.getValueType();
4809     SDValue Op0 = Op.getOperand(0);
4810     EVT Op0VT = Op0.getValueType();
4811     MVT XLenVT = Subtarget.getXLenVT();
4812     if (VT == MVT::f16 && Op0VT == MVT::i16 &&
4813         Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
4814       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4815       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
4816       return FPConv;
4817     }
4818     if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
4819         Subtarget.hasStdExtZfbfmin()) {
4820       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
4821       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
4822       return FPConv;
4823     }
4824     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
4825         Subtarget.hasStdExtFOrZfinx()) {
4826       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4827       SDValue FPConv =
4828           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
4829       return FPConv;
4830     }
4831     if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
4832         Subtarget.hasStdExtZfa()) {
4833       SDValue Lo, Hi;
4834       std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4835       SDValue RetReg =
4836           DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
4837       return RetReg;
4838     }
4839 
4840     // Consider other scalar<->scalar casts as legal if the types are legal.
4841     // Otherwise expand them.
4842     if (!VT.isVector() && !Op0VT.isVector()) {
4843       if (isTypeLegal(VT) && isTypeLegal(Op0VT))
4844         return Op;
4845       return SDValue();
4846     }
4847 
4848     assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
4849            "Unexpected types");
4850 
4851     if (VT.isFixedLengthVector()) {
4852       // We can handle fixed length vector bitcasts with a simple replacement
4853       // in isel.
4854       if (Op0VT.isFixedLengthVector())
4855         return Op;
4856       // When bitcasting from scalar to fixed-length vector, insert the scalar
4857       // into a one-element vector of the result type, and perform a vector
4858       // bitcast.
4859       if (!Op0VT.isVector()) {
4860         EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
4861         if (!isTypeLegal(BVT))
4862           return SDValue();
4863         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
4864                                               DAG.getUNDEF(BVT), Op0,
4865                                               DAG.getConstant(0, DL, XLenVT)));
4866       }
4867       return SDValue();
4868     }
4869     // Custom-legalize bitcasts from fixed-length vector types to scalar types
4870     // thus: bitcast the vector to a one-element vector type whose element type
4871     // is the same as the result type, and extract the first element.
4872     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
4873       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
4874       if (!isTypeLegal(BVT))
4875         return SDValue();
4876       SDValue BVec = DAG.getBitcast(BVT, Op0);
4877       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
4878                          DAG.getConstant(0, DL, XLenVT));
4879     }
4880     return SDValue();
4881   }
4882   case ISD::INTRINSIC_WO_CHAIN:
4883     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4884   case ISD::INTRINSIC_W_CHAIN:
4885     return LowerINTRINSIC_W_CHAIN(Op, DAG);
4886   case ISD::INTRINSIC_VOID:
4887     return LowerINTRINSIC_VOID(Op, DAG);
4888   case ISD::IS_FPCLASS:
4889     return LowerIS_FPCLASS(Op, DAG);
4890   case ISD::BITREVERSE: {
4891     MVT VT = Op.getSimpleValueType();
4892     SDLoc DL(Op);
4893     assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
4894     assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
4895     // Expand bitreverse to a bswap(rev8) followed by brev8.
4896     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
4897     return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
4898   }
4899   case ISD::TRUNCATE:
4900     // Only custom-lower vector truncates
4901     if (!Op.getSimpleValueType().isVector())
4902       return Op;
4903     return lowerVectorTruncLike(Op, DAG);
4904   case ISD::ANY_EXTEND:
4905   case ISD::ZERO_EXTEND:
4906     if (Op.getOperand(0).getValueType().isVector() &&
4907         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4908       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
4909     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
4910   case ISD::SIGN_EXTEND:
4911     if (Op.getOperand(0).getValueType().isVector() &&
4912         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
4913       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
4914     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
4915   case ISD::SPLAT_VECTOR_PARTS:
4916     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4917   case ISD::INSERT_VECTOR_ELT:
4918     return lowerINSERT_VECTOR_ELT(Op, DAG);
4919   case ISD::EXTRACT_VECTOR_ELT:
4920     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4921   case ISD::SCALAR_TO_VECTOR: {
4922     MVT VT = Op.getSimpleValueType();
4923     SDLoc DL(Op);
4924     SDValue Scalar = Op.getOperand(0);
4925     if (VT.getVectorElementType() == MVT::i1) {
4926       MVT WideVT = VT.changeVectorElementType(MVT::i8);
4927       SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
4928       return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
4929     }
4930     MVT ContainerVT = VT;
4931     if (VT.isFixedLengthVector())
4932       ContainerVT = getContainerForFixedLengthVector(VT);
4933     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
4934     SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
4935                             DAG.getUNDEF(ContainerVT), Scalar, VL);
4936     if (VT.isFixedLengthVector())
4937       V = convertFromScalableVector(VT, V, DAG, Subtarget);
4938     return V;
4939   }
4940   case ISD::VSCALE: {
4941     MVT VT = Op.getSimpleValueType();
4942     SDLoc DL(Op);
4943     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
4944     // We define our scalable vector types for lmul=1 to use a 64 bit known
4945     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
4946     // vscale as VLENB / 8.
4947     static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
4948     if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
4949       report_fatal_error("Support for VLEN==32 is incomplete.");
4950     // We assume VLENB is a multiple of 8. We manually choose the best shift
4951     // here because SimplifyDemandedBits isn't always able to simplify it.
4952     uint64_t Val = Op.getConstantOperandVal(0);
4953     if (isPowerOf2_64(Val)) {
4954       uint64_t Log2 = Log2_64(Val);
4955       if (Log2 < 3)
4956         return DAG.getNode(ISD::SRL, DL, VT, VLENB,
4957                            DAG.getConstant(3 - Log2, DL, VT));
4958       if (Log2 > 3)
4959         return DAG.getNode(ISD::SHL, DL, VT, VLENB,
4960                            DAG.getConstant(Log2 - 3, DL, VT));
4961       return VLENB;
4962     }
4963     // If the multiplier is a multiple of 8, scale it down to avoid needing
4964     // to shift the VLENB value.
4965     if ((Val % 8) == 0)
4966       return DAG.getNode(ISD::MUL, DL, VT, VLENB,
4967                          DAG.getConstant(Val / 8, DL, VT));
4968 
4969     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
4970                                  DAG.getConstant(3, DL, VT));
4971     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
4972   }
4973   case ISD::FPOWI: {
4974     // Custom promote f16 powi with illegal i32 integer type on RV64. Once
4975     // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
4976     if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
4977         Op.getOperand(1).getValueType() == MVT::i32) {
4978       SDLoc DL(Op);
4979       SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
4980       SDValue Powi =
4981           DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
4982       return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
4983                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
4984     }
4985     return SDValue();
4986   }
4987   case ISD::FMAXIMUM:
4988   case ISD::FMINIMUM:
4989     return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
4990   case ISD::FP_EXTEND: {
4991     SDLoc DL(Op);
4992     EVT VT = Op.getValueType();
4993     SDValue Op0 = Op.getOperand(0);
4994     EVT Op0VT = Op0.getValueType();
4995     if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
4996       return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
4997     if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
4998       SDValue FloatVal =
4999           DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
5000       return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
5001     }
5002 
5003     if (!Op.getValueType().isVector())
5004       return Op;
5005     return lowerVectorFPExtendOrRoundLike(Op, DAG);
5006   }
5007   case ISD::FP_ROUND: {
5008     SDLoc DL(Op);
5009     EVT VT = Op.getValueType();
5010     SDValue Op0 = Op.getOperand(0);
5011     EVT Op0VT = Op0.getValueType();
5012     if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
5013       return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
5014     if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
5015         Subtarget.hasStdExtDOrZdinx()) {
5016       SDValue FloatVal =
5017           DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
5018                       DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
5019       return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
5020     }
5021 
5022     if (!Op.getValueType().isVector())
5023       return Op;
5024     return lowerVectorFPExtendOrRoundLike(Op, DAG);
5025   }
5026   case ISD::STRICT_FP_ROUND:
5027   case ISD::STRICT_FP_EXTEND:
5028     return lowerStrictFPExtendOrRoundLike(Op, DAG);
5029   case ISD::FP_TO_SINT:
5030   case ISD::FP_TO_UINT:
5031   case ISD::SINT_TO_FP:
5032   case ISD::UINT_TO_FP:
5033   case ISD::STRICT_FP_TO_SINT:
5034   case ISD::STRICT_FP_TO_UINT:
5035   case ISD::STRICT_SINT_TO_FP:
5036   case ISD::STRICT_UINT_TO_FP: {
5037     // RVV can only do fp<->int conversions to types half/double the size as
5038     // the source. We custom-lower any conversions that do two hops into
5039     // sequences.
5040     MVT VT = Op.getSimpleValueType();
5041     if (!VT.isVector())
5042       return Op;
5043     SDLoc DL(Op);
5044     bool IsStrict = Op->isStrictFPOpcode();
5045     SDValue Src = Op.getOperand(0 + IsStrict);
5046     MVT EltVT = VT.getVectorElementType();
5047     MVT SrcVT = Src.getSimpleValueType();
5048     MVT SrcEltVT = SrcVT.getVectorElementType();
5049     unsigned EltSize = EltVT.getSizeInBits();
5050     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
5051     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
5052            "Unexpected vector element types");
5053 
5054     bool IsInt2FP = SrcEltVT.isInteger();
5055     // Widening conversions
5056     if (EltSize > (2 * SrcEltSize)) {
5057       if (IsInt2FP) {
5058         // Do a regular integer sign/zero extension then convert to float.
5059         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
5060                                       VT.getVectorElementCount());
5061         unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
5062                               Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
5063                                  ? ISD::ZERO_EXTEND
5064                                  : ISD::SIGN_EXTEND;
5065         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
5066         if (IsStrict)
5067           return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
5068                              Op.getOperand(0), Ext);
5069         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
5070       }
5071       // FP2Int
5072       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
5073       // Do one doubling fp_extend then complete the operation by converting
5074       // to int.
5075       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5076       if (IsStrict) {
5077         auto [FExt, Chain] =
5078             DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
5079         return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
5080       }
5081       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
5082       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
5083     }
5084 
5085     // Narrowing conversions
5086     if (SrcEltSize > (2 * EltSize)) {
5087       if (IsInt2FP) {
5088         // One narrowing int_to_fp, then an fp_round.
5089         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
5090         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
5091         if (IsStrict) {
5092           SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
5093                                        DAG.getVTList(InterimFVT, MVT::Other),
5094                                        Op.getOperand(0), Src);
5095           SDValue Chain = Int2FP.getValue(1);
5096           return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
5097         }
5098         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
5099         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
5100       }
5101       // FP2Int
5102       // One narrowing fp_to_int, then truncate the integer. If the float isn't
5103       // representable by the integer, the result is poison.
5104       MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
5105                                     VT.getVectorElementCount());
5106       if (IsStrict) {
5107         SDValue FP2Int =
5108             DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
5109                         Op.getOperand(0), Src);
5110         SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5111         return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
5112       }
5113       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
5114       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
5115     }
5116 
5117     // Scalable vectors can exit here. Patterns will handle equally-sized
5118     // conversions halving/doubling ones.
5119     if (!VT.isFixedLengthVector())
5120       return Op;
5121 
5122     // For fixed-length vectors we lower to a custom "VL" node.
5123     unsigned RVVOpc = 0;
5124     switch (Op.getOpcode()) {
5125     default:
5126       llvm_unreachable("Impossible opcode");
5127     case ISD::FP_TO_SINT:
5128       RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
5129       break;
5130     case ISD::FP_TO_UINT:
5131       RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
5132       break;
5133     case ISD::SINT_TO_FP:
5134       RVVOpc = RISCVISD::SINT_TO_FP_VL;
5135       break;
5136     case ISD::UINT_TO_FP:
5137       RVVOpc = RISCVISD::UINT_TO_FP_VL;
5138       break;
5139     case ISD::STRICT_FP_TO_SINT:
5140       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
5141       break;
5142     case ISD::STRICT_FP_TO_UINT:
5143       RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
5144       break;
5145     case ISD::STRICT_SINT_TO_FP:
5146       RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
5147       break;
5148     case ISD::STRICT_UINT_TO_FP:
5149       RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
5150       break;
5151     }
5152 
5153     MVT ContainerVT = getContainerForFixedLengthVector(VT);
5154     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
5155     assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
5156            "Expected same element count");
5157 
5158     auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5159 
5160     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
5161     if (IsStrict) {
5162       Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
5163                         Op.getOperand(0), Src, Mask, VL);
5164       SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
5165       return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
5166     }
5167     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
5168     return convertFromScalableVector(VT, Src, DAG, Subtarget);
5169   }
5170   case ISD::FP_TO_SINT_SAT:
5171   case ISD::FP_TO_UINT_SAT:
5172     return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
5173   case ISD::FP_TO_BF16: {
5174     // Custom lower to ensure the libcall return is passed in an FPR on hard
5175     // float ABIs.
5176     assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
5177     SDLoc DL(Op);
5178     MakeLibCallOptions CallOptions;
5179     RTLIB::Libcall LC =
5180         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
5181     SDValue Res =
5182         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5183     if (Subtarget.is64Bit())
5184       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5185     return DAG.getBitcast(MVT::i32, Res);
5186   }
5187   case ISD::BF16_TO_FP: {
5188     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
5189     MVT VT = Op.getSimpleValueType();
5190     SDLoc DL(Op);
5191     Op = DAG.getNode(
5192         ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
5193         DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
5194     SDValue Res = Subtarget.is64Bit()
5195                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
5196                       : DAG.getBitcast(MVT::f32, Op);
5197     // fp_extend if the target VT is bigger than f32.
5198     if (VT != MVT::f32)
5199       return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
5200     return Res;
5201   }
5202   case ISD::FP_TO_FP16: {
5203     // Custom lower to ensure the libcall return is passed in an FPR on hard
5204     // float ABIs.
5205     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5206     SDLoc DL(Op);
5207     MakeLibCallOptions CallOptions;
5208     RTLIB::Libcall LC =
5209         RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
5210     SDValue Res =
5211         makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
5212     if (Subtarget.is64Bit())
5213       return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
5214     return DAG.getBitcast(MVT::i32, Res);
5215   }
5216   case ISD::FP16_TO_FP: {
5217     // Custom lower to ensure the libcall argument is passed in an FPR on hard
5218     // float ABIs.
5219     assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
5220     SDLoc DL(Op);
5221     MakeLibCallOptions CallOptions;
5222     SDValue Arg = Subtarget.is64Bit()
5223                       ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
5224                                     Op.getOperand(0))
5225                       : DAG.getBitcast(MVT::f32, Op.getOperand(0));
5226     SDValue Res =
5227         makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
5228             .first;
5229     return Res;
5230   }
5231   case ISD::FTRUNC:
5232   case ISD::FCEIL:
5233   case ISD::FFLOOR:
5234   case ISD::FNEARBYINT:
5235   case ISD::FRINT:
5236   case ISD::FROUND:
5237   case ISD::FROUNDEVEN:
5238     return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5239   case ISD::VECREDUCE_ADD:
5240   case ISD::VECREDUCE_UMAX:
5241   case ISD::VECREDUCE_SMAX:
5242   case ISD::VECREDUCE_UMIN:
5243   case ISD::VECREDUCE_SMIN:
5244     return lowerVECREDUCE(Op, DAG);
5245   case ISD::VECREDUCE_AND:
5246   case ISD::VECREDUCE_OR:
5247   case ISD::VECREDUCE_XOR:
5248     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
5249       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
5250     return lowerVECREDUCE(Op, DAG);
5251   case ISD::VECREDUCE_FADD:
5252   case ISD::VECREDUCE_SEQ_FADD:
5253   case ISD::VECREDUCE_FMIN:
5254   case ISD::VECREDUCE_FMAX:
5255     return lowerFPVECREDUCE(Op, DAG);
5256   case ISD::VP_REDUCE_ADD:
5257   case ISD::VP_REDUCE_UMAX:
5258   case ISD::VP_REDUCE_SMAX:
5259   case ISD::VP_REDUCE_UMIN:
5260   case ISD::VP_REDUCE_SMIN:
5261   case ISD::VP_REDUCE_FADD:
5262   case ISD::VP_REDUCE_SEQ_FADD:
5263   case ISD::VP_REDUCE_FMIN:
5264   case ISD::VP_REDUCE_FMAX:
5265     return lowerVPREDUCE(Op, DAG);
5266   case ISD::VP_REDUCE_AND:
5267   case ISD::VP_REDUCE_OR:
5268   case ISD::VP_REDUCE_XOR:
5269     if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
5270       return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
5271     return lowerVPREDUCE(Op, DAG);
5272   case ISD::UNDEF: {
5273     MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
5274     return convertFromScalableVector(Op.getSimpleValueType(),
5275                                      DAG.getUNDEF(ContainerVT), DAG, Subtarget);
5276   }
5277   case ISD::INSERT_SUBVECTOR:
5278     return lowerINSERT_SUBVECTOR(Op, DAG);
5279   case ISD::EXTRACT_SUBVECTOR:
5280     return lowerEXTRACT_SUBVECTOR(Op, DAG);
5281   case ISD::VECTOR_DEINTERLEAVE:
5282     return lowerVECTOR_DEINTERLEAVE(Op, DAG);
5283   case ISD::VECTOR_INTERLEAVE:
5284     return lowerVECTOR_INTERLEAVE(Op, DAG);
5285   case ISD::STEP_VECTOR:
5286     return lowerSTEP_VECTOR(Op, DAG);
5287   case ISD::VECTOR_REVERSE:
5288     return lowerVECTOR_REVERSE(Op, DAG);
5289   case ISD::VECTOR_SPLICE:
5290     return lowerVECTOR_SPLICE(Op, DAG);
5291   case ISD::BUILD_VECTOR:
5292     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
5293   case ISD::SPLAT_VECTOR:
5294     if (Op.getValueType().getVectorElementType() == MVT::i1)
5295       return lowerVectorMaskSplat(Op, DAG);
5296     return SDValue();
5297   case ISD::VECTOR_SHUFFLE:
5298     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
5299   case ISD::CONCAT_VECTORS: {
5300     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
5301     // better than going through the stack, as the default expansion does.
5302     SDLoc DL(Op);
5303     MVT VT = Op.getSimpleValueType();
5304     unsigned NumOpElts =
5305         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
5306     SDValue Vec = DAG.getUNDEF(VT);
5307     for (const auto &OpIdx : enumerate(Op->ops())) {
5308       SDValue SubVec = OpIdx.value();
5309       // Don't insert undef subvectors.
5310       if (SubVec.isUndef())
5311         continue;
5312       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
5313                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
5314     }
5315     return Vec;
5316   }
5317   case ISD::LOAD:
5318     if (auto V = expandUnalignedRVVLoad(Op, DAG))
5319       return V;
5320     if (Op.getValueType().isFixedLengthVector())
5321       return lowerFixedLengthVectorLoadToRVV(Op, DAG);
5322     return Op;
5323   case ISD::STORE:
5324     if (auto V = expandUnalignedRVVStore(Op, DAG))
5325       return V;
5326     if (Op.getOperand(1).getValueType().isFixedLengthVector())
5327       return lowerFixedLengthVectorStoreToRVV(Op, DAG);
5328     return Op;
5329   case ISD::MLOAD:
5330   case ISD::VP_LOAD:
5331     return lowerMaskedLoad(Op, DAG);
5332   case ISD::MSTORE:
5333   case ISD::VP_STORE:
5334     return lowerMaskedStore(Op, DAG);
5335   case ISD::SELECT_CC: {
5336     // This occurs because we custom legalize SETGT and SETUGT for setcc. That
5337     // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
5338     // into separate SETCC+SELECT just like LegalizeDAG.
5339     SDValue Tmp1 = Op.getOperand(0);
5340     SDValue Tmp2 = Op.getOperand(1);
5341     SDValue True = Op.getOperand(2);
5342     SDValue False = Op.getOperand(3);
5343     EVT VT = Op.getValueType();
5344     SDValue CC = Op.getOperand(4);
5345     EVT CmpVT = Tmp1.getValueType();
5346     EVT CCVT =
5347         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
5348     SDLoc DL(Op);
5349     SDValue Cond =
5350         DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
5351     return DAG.getSelect(DL, VT, Cond, True, False);
5352   }
5353   case ISD::SETCC: {
5354     MVT OpVT = Op.getOperand(0).getSimpleValueType();
5355     if (OpVT.isScalarInteger()) {
5356       MVT VT = Op.getSimpleValueType();
5357       SDValue LHS = Op.getOperand(0);
5358       SDValue RHS = Op.getOperand(1);
5359       ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5360       assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
5361              "Unexpected CondCode");
5362 
5363       SDLoc DL(Op);
5364 
5365       // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
5366       // convert this to the equivalent of (set(u)ge X, C+1) by using
5367       // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
5368       // in a register.
5369       if (isa<ConstantSDNode>(RHS)) {
5370         int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
5371         if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
5372           // If this is an unsigned compare and the constant is -1, incrementing
5373           // the constant would change behavior. The result should be false.
5374           if (CCVal == ISD::SETUGT && Imm == -1)
5375             return DAG.getConstant(0, DL, VT);
5376           // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
5377           CCVal = ISD::getSetCCSwappedOperands(CCVal);
5378           SDValue SetCC = DAG.getSetCC(
5379               DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
5380           return DAG.getLogicalNOT(DL, SetCC, VT);
5381         }
5382       }
5383 
5384       // Not a constant we could handle, swap the operands and condition code to
5385       // SETLT/SETULT.
5386       CCVal = ISD::getSetCCSwappedOperands(CCVal);
5387       return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
5388     }
5389 
5390     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
5391   }
5392   case ISD::ADD:
5393   case ISD::SUB:
5394   case ISD::MUL:
5395   case ISD::MULHS:
5396   case ISD::MULHU:
5397   case ISD::AND:
5398   case ISD::OR:
5399   case ISD::XOR:
5400   case ISD::SDIV:
5401   case ISD::SREM:
5402   case ISD::UDIV:
5403   case ISD::UREM:
5404     return lowerToScalableOp(Op, DAG);
5405   case ISD::SHL:
5406   case ISD::SRA:
5407   case ISD::SRL:
5408     if (Op.getSimpleValueType().isFixedLengthVector())
5409       return lowerToScalableOp(Op, DAG);
5410     // This can be called for an i32 shift amount that needs to be promoted.
5411     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
5412            "Unexpected custom legalisation");
5413     return SDValue();
5414   case ISD::SADDSAT:
5415   case ISD::UADDSAT:
5416   case ISD::SSUBSAT:
5417   case ISD::USUBSAT:
5418   case ISD::FADD:
5419   case ISD::FSUB:
5420   case ISD::FMUL:
5421   case ISD::FDIV:
5422   case ISD::FNEG:
5423   case ISD::FABS:
5424   case ISD::FSQRT:
5425   case ISD::FMA:
5426   case ISD::SMIN:
5427   case ISD::SMAX:
5428   case ISD::UMIN:
5429   case ISD::UMAX:
5430   case ISD::FMINNUM:
5431   case ISD::FMAXNUM:
5432     return lowerToScalableOp(Op, DAG);
5433   case ISD::ABS:
5434   case ISD::VP_ABS:
5435     return lowerABS(Op, DAG);
5436   case ISD::CTLZ:
5437   case ISD::CTLZ_ZERO_UNDEF:
5438   case ISD::CTTZ_ZERO_UNDEF:
5439     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5440   case ISD::VSELECT:
5441     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
5442   case ISD::FCOPYSIGN:
5443     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
5444   case ISD::STRICT_FADD:
5445   case ISD::STRICT_FSUB:
5446   case ISD::STRICT_FMUL:
5447   case ISD::STRICT_FDIV:
5448   case ISD::STRICT_FSQRT:
5449   case ISD::STRICT_FMA:
5450     return lowerToScalableOp(Op, DAG);
5451   case ISD::STRICT_FSETCC:
5452   case ISD::STRICT_FSETCCS:
5453     return lowerVectorStrictFSetcc(Op, DAG);
5454   case ISD::STRICT_FCEIL:
5455   case ISD::STRICT_FRINT:
5456   case ISD::STRICT_FFLOOR:
5457   case ISD::STRICT_FTRUNC:
5458   case ISD::STRICT_FNEARBYINT:
5459   case ISD::STRICT_FROUND:
5460   case ISD::STRICT_FROUNDEVEN:
5461     return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5462   case ISD::MGATHER:
5463   case ISD::VP_GATHER:
5464     return lowerMaskedGather(Op, DAG);
5465   case ISD::MSCATTER:
5466   case ISD::VP_SCATTER:
5467     return lowerMaskedScatter(Op, DAG);
5468   case ISD::GET_ROUNDING:
5469     return lowerGET_ROUNDING(Op, DAG);
5470   case ISD::SET_ROUNDING:
5471     return lowerSET_ROUNDING(Op, DAG);
5472   case ISD::EH_DWARF_CFA:
5473     return lowerEH_DWARF_CFA(Op, DAG);
5474   case ISD::VP_SELECT:
5475     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
5476   case ISD::VP_MERGE:
5477     return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
5478   case ISD::VP_ADD:
5479     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
5480   case ISD::VP_SUB:
5481     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
5482   case ISD::VP_MUL:
5483     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
5484   case ISD::VP_SDIV:
5485     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
5486   case ISD::VP_UDIV:
5487     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
5488   case ISD::VP_SREM:
5489     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
5490   case ISD::VP_UREM:
5491     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
5492   case ISD::VP_AND:
5493     return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
5494   case ISD::VP_OR:
5495     return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
5496   case ISD::VP_XOR:
5497     return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
5498   case ISD::VP_ASHR:
5499     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL, /*HasMergeOp*/ true);
5500   case ISD::VP_LSHR:
5501     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL, /*HasMergeOp*/ true);
5502   case ISD::VP_SHL:
5503     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL, /*HasMergeOp*/ true);
5504   case ISD::VP_FADD:
5505     return lowerVPOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
5506   case ISD::VP_FSUB:
5507     return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
5508   case ISD::VP_FMUL:
5509     return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
5510   case ISD::VP_FDIV:
5511     return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
5512   case ISD::VP_FNEG:
5513     return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
5514   case ISD::VP_FABS:
5515     return lowerVPOp(Op, DAG, RISCVISD::FABS_VL);
5516   case ISD::VP_SQRT:
5517     return lowerVPOp(Op, DAG, RISCVISD::FSQRT_VL);
5518   case ISD::VP_FMA:
5519     return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
5520   case ISD::VP_FMINNUM:
5521     return lowerVPOp(Op, DAG, RISCVISD::FMINNUM_VL, /*HasMergeOp*/ true);
5522   case ISD::VP_FMAXNUM:
5523     return lowerVPOp(Op, DAG, RISCVISD::FMAXNUM_VL, /*HasMergeOp*/ true);
5524   case ISD::VP_FCOPYSIGN:
5525     return lowerVPOp(Op, DAG, RISCVISD::FCOPYSIGN_VL, /*HasMergeOp*/ true);
5526   case ISD::VP_SIGN_EXTEND:
5527   case ISD::VP_ZERO_EXTEND:
5528     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5529       return lowerVPExtMaskOp(Op, DAG);
5530     return lowerVPOp(Op, DAG,
5531                      Op.getOpcode() == ISD::VP_SIGN_EXTEND
5532                          ? RISCVISD::VSEXT_VL
5533                          : RISCVISD::VZEXT_VL);
5534   case ISD::VP_TRUNCATE:
5535     return lowerVectorTruncLike(Op, DAG);
5536   case ISD::VP_FP_EXTEND:
5537   case ISD::VP_FP_ROUND:
5538     return lowerVectorFPExtendOrRoundLike(Op, DAG);
5539   case ISD::VP_FP_TO_SINT:
5540     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_X_F_VL);
5541   case ISD::VP_FP_TO_UINT:
5542     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::VFCVT_RTZ_XU_F_VL);
5543   case ISD::VP_SINT_TO_FP:
5544     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
5545   case ISD::VP_UINT_TO_FP:
5546     return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
5547   case ISD::VP_SETCC:
5548     if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
5549       return lowerVPSetCCMaskOp(Op, DAG);
5550     return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL, /*HasMergeOp*/ true);
5551   case ISD::VP_SMIN:
5552     return lowerVPOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
5553   case ISD::VP_SMAX:
5554     return lowerVPOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
5555   case ISD::VP_UMIN:
5556     return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
5557   case ISD::VP_UMAX:
5558     return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
5559   case ISD::VP_BITREVERSE:
5560     return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
5561   case ISD::VP_BSWAP:
5562     return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
5563   case ISD::VP_CTLZ:
5564   case ISD::VP_CTLZ_ZERO_UNDEF:
5565     if (Subtarget.hasStdExtZvbb())
5566       return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
5567     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5568   case ISD::VP_CTTZ:
5569   case ISD::VP_CTTZ_ZERO_UNDEF:
5570     if (Subtarget.hasStdExtZvbb())
5571       return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
5572     return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
5573   case ISD::VP_CTPOP:
5574     return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
5575   case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
5576     return lowerVPStridedLoad(Op, DAG);
5577   case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
5578     return lowerVPStridedStore(Op, DAG);
5579   case ISD::VP_FCEIL:
5580   case ISD::VP_FFLOOR:
5581   case ISD::VP_FRINT:
5582   case ISD::VP_FNEARBYINT:
5583   case ISD::VP_FROUND:
5584   case ISD::VP_FROUNDEVEN:
5585   case ISD::VP_FROUNDTOZERO:
5586     return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
5587   }
5588 }
5589 
5590 static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
5591                              SelectionDAG &DAG, unsigned Flags) {
5592   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
5593 }
5594 
5595 static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
5596                              SelectionDAG &DAG, unsigned Flags) {
5597   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
5598                                    Flags);
5599 }
5600 
5601 static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
5602                              SelectionDAG &DAG, unsigned Flags) {
5603   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5604                                    N->getOffset(), Flags);
5605 }
5606 
5607 static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
5608                              SelectionDAG &DAG, unsigned Flags) {
5609   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
5610 }
5611 
5612 template <class NodeTy>
5613 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
5614                                      bool IsLocal, bool IsExternWeak) const {
5615   SDLoc DL(N);
5616   EVT Ty = getPointerTy(DAG.getDataLayout());
5617 
5618   // When HWASAN is used and tagging of global variables is enabled
5619   // they should be accessed via the GOT, since the tagged address of a global
5620   // is incompatible with existing code models. This also applies to non-pic
5621   // mode.
5622   if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
5623     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5624     if (IsLocal && !Subtarget.allowTaggedGlobals())
5625       // Use PC-relative addressing to access the symbol. This generates the
5626       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
5627       // %pcrel_lo(auipc)).
5628       return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5629 
5630     // Use PC-relative addressing to access the GOT for this symbol, then load
5631     // the address from the GOT. This generates the pattern (PseudoLGA sym),
5632     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5633     MachineFunction &MF = DAG.getMachineFunction();
5634     MachineMemOperand *MemOp = MF.getMachineMemOperand(
5635         MachinePointerInfo::getGOT(MF),
5636         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5637             MachineMemOperand::MOInvariant,
5638         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5639     SDValue Load =
5640         DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
5641                                 {DAG.getEntryNode(), Addr}, Ty, MemOp);
5642     return Load;
5643   }
5644 
5645   switch (getTargetMachine().getCodeModel()) {
5646   default:
5647     report_fatal_error("Unsupported code model for lowering");
5648   case CodeModel::Small: {
5649     // Generate a sequence for accessing addresses within the first 2 GiB of
5650     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
5651     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
5652     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
5653     SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5654     return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
5655   }
5656   case CodeModel::Medium: {
5657     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
5658     if (IsExternWeak) {
5659       // An extern weak symbol may be undefined, i.e. have value 0, which may
5660       // not be within 2GiB of PC, so use GOT-indirect addressing to access the
5661       // symbol. This generates the pattern (PseudoLGA sym), which expands to
5662       // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
5663       MachineFunction &MF = DAG.getMachineFunction();
5664       MachineMemOperand *MemOp = MF.getMachineMemOperand(
5665           MachinePointerInfo::getGOT(MF),
5666           MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5667               MachineMemOperand::MOInvariant,
5668           LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5669       SDValue Load =
5670           DAG.getMemIntrinsicNode(RISCVISD::LGA, DL,
5671                                   DAG.getVTList(Ty, MVT::Other),
5672                                   {DAG.getEntryNode(), Addr}, Ty, MemOp);
5673       return Load;
5674     }
5675 
5676     // Generate a sequence for accessing addresses within any 2GiB range within
5677     // the address space. This generates the pattern (PseudoLLA sym), which
5678     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
5679     return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
5680   }
5681   }
5682 }
5683 
5684 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
5685                                                 SelectionDAG &DAG) const {
5686   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
5687   assert(N->getOffset() == 0 && "unexpected offset in global node");
5688   const GlobalValue *GV = N->getGlobal();
5689   return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
5690 }
5691 
5692 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
5693                                                SelectionDAG &DAG) const {
5694   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
5695 
5696   return getAddr(N, DAG);
5697 }
5698 
5699 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
5700                                                SelectionDAG &DAG) const {
5701   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
5702 
5703   return getAddr(N, DAG);
5704 }
5705 
5706 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
5707                                             SelectionDAG &DAG) const {
5708   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
5709 
5710   return getAddr(N, DAG);
5711 }
5712 
5713 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
5714                                               SelectionDAG &DAG,
5715                                               bool UseGOT) const {
5716   SDLoc DL(N);
5717   EVT Ty = getPointerTy(DAG.getDataLayout());
5718   const GlobalValue *GV = N->getGlobal();
5719   MVT XLenVT = Subtarget.getXLenVT();
5720 
5721   if (UseGOT) {
5722     // Use PC-relative addressing to access the GOT for this TLS symbol, then
5723     // load the address from the GOT and add the thread pointer. This generates
5724     // the pattern (PseudoLA_TLS_IE sym), which expands to
5725     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
5726     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5727     MachineFunction &MF = DAG.getMachineFunction();
5728     MachineMemOperand *MemOp = MF.getMachineMemOperand(
5729         MachinePointerInfo::getGOT(MF),
5730         MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
5731             MachineMemOperand::MOInvariant,
5732         LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
5733     SDValue Load = DAG.getMemIntrinsicNode(
5734         RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
5735         {DAG.getEntryNode(), Addr}, Ty, MemOp);
5736 
5737     // Add the thread pointer.
5738     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5739     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
5740   }
5741 
5742   // Generate a sequence for accessing the address relative to the thread
5743   // pointer, with the appropriate adjustment for the thread pointer offset.
5744   // This generates the pattern
5745   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
5746   SDValue AddrHi =
5747       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
5748   SDValue AddrAdd =
5749       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
5750   SDValue AddrLo =
5751       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
5752 
5753   SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
5754   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
5755   SDValue MNAdd =
5756       DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
5757   return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
5758 }
5759 
5760 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
5761                                                SelectionDAG &DAG) const {
5762   SDLoc DL(N);
5763   EVT Ty = getPointerTy(DAG.getDataLayout());
5764   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
5765   const GlobalValue *GV = N->getGlobal();
5766 
5767   // Use a PC-relative addressing mode to access the global dynamic GOT address.
5768   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
5769   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
5770   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
5771   SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
5772 
5773   // Prepare argument list to generate call.
5774   ArgListTy Args;
5775   ArgListEntry Entry;
5776   Entry.Node = Load;
5777   Entry.Ty = CallTy;
5778   Args.push_back(Entry);
5779 
5780   // Setup call to __tls_get_addr.
5781   TargetLowering::CallLoweringInfo CLI(DAG);
5782   CLI.setDebugLoc(DL)
5783       .setChain(DAG.getEntryNode())
5784       .setLibCallee(CallingConv::C, CallTy,
5785                     DAG.getExternalSymbol("__tls_get_addr", Ty),
5786                     std::move(Args));
5787 
5788   return LowerCallTo(CLI).first;
5789 }
5790 
5791 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
5792                                                    SelectionDAG &DAG) const {
5793   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
5794   assert(N->getOffset() == 0 && "unexpected offset in global node");
5795 
5796   if (DAG.getTarget().useEmulatedTLS())
5797     return LowerToTLSEmulatedModel(N, DAG);
5798 
5799   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
5800 
5801   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
5802       CallingConv::GHC)
5803     report_fatal_error("In GHC calling convention TLS is not supported");
5804 
5805   SDValue Addr;
5806   switch (Model) {
5807   case TLSModel::LocalExec:
5808     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
5809     break;
5810   case TLSModel::InitialExec:
5811     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
5812     break;
5813   case TLSModel::LocalDynamic:
5814   case TLSModel::GeneralDynamic:
5815     Addr = getDynamicTLSAddr(N, DAG);
5816     break;
5817   }
5818 
5819   return Addr;
5820 }
5821 
5822 // Return true if Val is equal to (setcc LHS, RHS, CC).
5823 // Return false if Val is the inverse of (setcc LHS, RHS, CC).
5824 // Otherwise, return std::nullopt.
5825 static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
5826                                       ISD::CondCode CC, SDValue Val) {
5827   assert(Val->getOpcode() == ISD::SETCC);
5828   SDValue LHS2 = Val.getOperand(0);
5829   SDValue RHS2 = Val.getOperand(1);
5830   ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
5831 
5832   if (LHS == LHS2 && RHS == RHS2) {
5833     if (CC == CC2)
5834       return true;
5835     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5836       return false;
5837   } else if (LHS == RHS2 && RHS == LHS2) {
5838     CC2 = ISD::getSetCCSwappedOperands(CC2);
5839     if (CC == CC2)
5840       return true;
5841     if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
5842       return false;
5843   }
5844 
5845   return std::nullopt;
5846 }
5847 
5848 static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
5849                                     const RISCVSubtarget &Subtarget) {
5850   SDValue CondV = N->getOperand(0);
5851   SDValue TrueV = N->getOperand(1);
5852   SDValue FalseV = N->getOperand(2);
5853   MVT VT = N->getSimpleValueType(0);
5854   SDLoc DL(N);
5855 
5856   if (!Subtarget.hasShortForwardBranchOpt()) {
5857     // (select c, -1, y) -> -c | y
5858     if (isAllOnesConstant(TrueV)) {
5859       SDValue Neg = DAG.getNegative(CondV, DL, VT);
5860       return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
5861     }
5862     // (select c, y, -1) -> (c-1) | y
5863     if (isAllOnesConstant(FalseV)) {
5864       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5865                                 DAG.getAllOnesConstant(DL, VT));
5866       return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
5867     }
5868 
5869     // (select c, 0, y) -> (c-1) & y
5870     if (isNullConstant(TrueV)) {
5871       SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
5872                                 DAG.getAllOnesConstant(DL, VT));
5873       return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
5874     }
5875     // (select c, y, 0) -> -c & y
5876     if (isNullConstant(FalseV)) {
5877       SDValue Neg = DAG.getNegative(CondV, DL, VT);
5878       return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
5879     }
5880   }
5881 
5882   // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
5883   // when both truev and falsev are also setcc.
5884   if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
5885       FalseV.getOpcode() == ISD::SETCC) {
5886     SDValue LHS = CondV.getOperand(0);
5887     SDValue RHS = CondV.getOperand(1);
5888     ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
5889 
5890     // (select x, x, y) -> x | y
5891     // (select !x, x, y) -> x & y
5892     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
5893       return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
5894                          FalseV);
5895     }
5896     // (select x, y, x) -> x & y
5897     // (select !x, y, x) -> x | y
5898     if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
5899       return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
5900                          FalseV);
5901     }
5902   }
5903 
5904   return SDValue();
5905 }
5906 
5907 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
5908 /// check for equality with 0. This function emits nodes that convert the
5909 /// seteq/setne into something that can be compared with 0.
5910 /// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
5911 /// target-independent SelectionDAG nodes rather than machine nodes.
5912 static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
5913                            SelectionDAG &DAG) {
5914   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
5915          "Unexpected condition code!");
5916 
5917   // We're looking for a setcc.
5918   if (N->getOpcode() != ISD::SETCC)
5919     return SDValue();
5920 
5921   // Must be an equality comparison.
5922   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
5923   if (CCVal != ExpectedCCVal)
5924     return SDValue();
5925 
5926   SDValue LHS = N->getOperand(0);
5927   SDValue RHS = N->getOperand(1);
5928 
5929   if (!LHS.getValueType().isScalarInteger())
5930     return SDValue();
5931 
5932   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
5933   if (isNullConstant(RHS))
5934     return LHS;
5935 
5936   SDLoc DL(N);
5937 
5938   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
5939     int64_t CVal = C->getSExtValue();
5940     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
5941     // non-zero otherwise.
5942     if (CVal == -2048)
5943       return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
5944                          DAG.getConstant(CVal, DL, N->getValueType(0)));
5945     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
5946     // LHS is equal to the RHS and non-zero otherwise.
5947     if (isInt<12>(CVal) || CVal == 2048)
5948       return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
5949                          DAG.getConstant(-CVal, DL, N->getValueType(0)));
5950   }
5951 
5952   // If nothing else we can XOR the LHS and RHS to produce zero if they are
5953   // equal and a non-zero value if they aren't.
5954   return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
5955 }
5956 
5957 // Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
5958 // into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
5959 // For now we only consider transformation profitable if `binOp(c0, c1)` ends up
5960 // being `0` or `-1`. In such cases we can replace `select` with `and`.
5961 // TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
5962 // than `c0`?
5963 static SDValue
5964 foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
5965                                 const RISCVSubtarget &Subtarget) {
5966   if (Subtarget.hasShortForwardBranchOpt())
5967     return SDValue();
5968 
5969   unsigned SelOpNo = 0;
5970   SDValue Sel = BO->getOperand(0);
5971   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
5972     SelOpNo = 1;
5973     Sel = BO->getOperand(1);
5974   }
5975 
5976   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
5977     return SDValue();
5978 
5979   unsigned ConstSelOpNo = 1;
5980   unsigned OtherSelOpNo = 2;
5981   if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
5982     ConstSelOpNo = 2;
5983     OtherSelOpNo = 1;
5984   }
5985   SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
5986   ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
5987   if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
5988     return SDValue();
5989 
5990   SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
5991   ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
5992   if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
5993     return SDValue();
5994 
5995   SDLoc DL(Sel);
5996   EVT VT = BO->getValueType(0);
5997 
5998   SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
5999   if (SelOpNo == 1)
6000     std::swap(NewConstOps[0], NewConstOps[1]);
6001 
6002   SDValue NewConstOp =
6003       DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
6004   if (!NewConstOp)
6005     return SDValue();
6006 
6007   const APInt &NewConstAPInt =
6008       cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
6009   if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
6010     return SDValue();
6011 
6012   SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
6013   SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
6014   if (SelOpNo == 1)
6015     std::swap(NewNonConstOps[0], NewNonConstOps[1]);
6016   SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
6017 
6018   SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
6019   SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
6020   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
6021 }
6022 
6023 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
6024   SDValue CondV = Op.getOperand(0);
6025   SDValue TrueV = Op.getOperand(1);
6026   SDValue FalseV = Op.getOperand(2);
6027   SDLoc DL(Op);
6028   MVT VT = Op.getSimpleValueType();
6029   MVT XLenVT = Subtarget.getXLenVT();
6030 
6031   // Lower vector SELECTs to VSELECTs by splatting the condition.
6032   if (VT.isVector()) {
6033     MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
6034     SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
6035     return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
6036   }
6037 
6038   // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
6039   // nodes to implement the SELECT. Performing the lowering here allows for
6040   // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
6041   // sequence or RISCVISD::SELECT_CC node (branch-based select).
6042   if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
6043       VT.isScalarInteger()) {
6044     if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
6045       // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
6046       if (isNullConstant(FalseV))
6047         return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
6048       // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
6049       if (isNullConstant(TrueV))
6050         return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
6051       // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
6052       // c)
6053       return DAG.getNode(
6054           ISD::OR, DL, VT,
6055           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
6056           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
6057     }
6058     if (SDValue NewCondV =  selectSETCC(CondV, ISD::SETEQ, DAG)) {
6059       // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
6060       if (isNullConstant(FalseV))
6061         return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
6062       // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
6063       if (isNullConstant(TrueV))
6064         return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
6065       // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
6066       // c)
6067       return DAG.getNode(
6068           ISD::OR, DL, VT,
6069           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
6070           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
6071     }
6072 
6073     // (select c, t, 0) -> (czero_eqz t, c)
6074     if (isNullConstant(FalseV))
6075       return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
6076     // (select c, 0, f) -> (czero_nez f, c)
6077     if (isNullConstant(TrueV))
6078       return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
6079 
6080     // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
6081     if (TrueV.getOpcode() == ISD::AND &&
6082         (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
6083       return DAG.getNode(
6084           ISD::OR, DL, VT, TrueV,
6085           DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6086     // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
6087     if (FalseV.getOpcode() == ISD::AND &&
6088         (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
6089       return DAG.getNode(
6090           ISD::OR, DL, VT, FalseV,
6091           DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
6092 
6093     // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
6094     return DAG.getNode(ISD::OR, DL, VT,
6095                        DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
6096                        DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
6097   }
6098 
6099   if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
6100     return V;
6101 
6102   if (Op.hasOneUse()) {
6103     unsigned UseOpc = Op->use_begin()->getOpcode();
6104     if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
6105       SDNode *BinOp = *Op->use_begin();
6106       if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
6107                                                            DAG, Subtarget)) {
6108         DAG.ReplaceAllUsesWith(BinOp, &NewSel);
6109         return lowerSELECT(NewSel, DAG);
6110       }
6111     }
6112   }
6113 
6114   // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
6115   // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
6116   const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
6117   const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
6118   if (FPTV && FPFV) {
6119     if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
6120       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
6121     if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
6122       SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
6123                                 DAG.getConstant(1, DL, XLenVT));
6124       return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
6125     }
6126   }
6127 
6128   // If the condition is not an integer SETCC which operates on XLenVT, we need
6129   // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
6130   // (select condv, truev, falsev)
6131   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
6132   if (CondV.getOpcode() != ISD::SETCC ||
6133       CondV.getOperand(0).getSimpleValueType() != XLenVT) {
6134     SDValue Zero = DAG.getConstant(0, DL, XLenVT);
6135     SDValue SetNE = DAG.getCondCode(ISD::SETNE);
6136 
6137     SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
6138 
6139     return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6140   }
6141 
6142   // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
6143   // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
6144   // advantage of the integer compare+branch instructions. i.e.:
6145   // (select (setcc lhs, rhs, cc), truev, falsev)
6146   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
6147   SDValue LHS = CondV.getOperand(0);
6148   SDValue RHS = CondV.getOperand(1);
6149   ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6150 
6151   // Special case for a select of 2 constants that have a diffence of 1.
6152   // Normally this is done by DAGCombine, but if the select is introduced by
6153   // type legalization or op legalization, we miss it. Restricting to SETLT
6154   // case for now because that is what signed saturating add/sub need.
6155   // FIXME: We don't need the condition to be SETLT or even a SETCC,
6156   // but we would probably want to swap the true/false values if the condition
6157   // is SETGE/SETLE to avoid an XORI.
6158   if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6159       CCVal == ISD::SETLT) {
6160     const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
6161     const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
6162     if (TrueVal - 1 == FalseVal)
6163       return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
6164     if (TrueVal + 1 == FalseVal)
6165       return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
6166   }
6167 
6168   translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6169   // 1 < x ? x : 1 -> 0 < x ? x : 1
6170   if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
6171       RHS == TrueV && LHS == FalseV) {
6172     LHS = DAG.getConstant(0, DL, VT);
6173     // 0 <u x is the same as x != 0.
6174     if (CCVal == ISD::SETULT) {
6175       std::swap(LHS, RHS);
6176       CCVal = ISD::SETNE;
6177     }
6178   }
6179 
6180   // x <s -1 ? x : -1 -> x <s 0 ? x : -1
6181   if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
6182       RHS == FalseV) {
6183     RHS = DAG.getConstant(0, DL, VT);
6184   }
6185 
6186   SDValue TargetCC = DAG.getCondCode(CCVal);
6187 
6188   if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
6189     // (select (setcc lhs, rhs, CC), constant, falsev)
6190     // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
6191     std::swap(TrueV, FalseV);
6192     TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
6193   }
6194 
6195   SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
6196   return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
6197 }
6198 
6199 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
6200   SDValue CondV = Op.getOperand(1);
6201   SDLoc DL(Op);
6202   MVT XLenVT = Subtarget.getXLenVT();
6203 
6204   if (CondV.getOpcode() == ISD::SETCC &&
6205       CondV.getOperand(0).getValueType() == XLenVT) {
6206     SDValue LHS = CondV.getOperand(0);
6207     SDValue RHS = CondV.getOperand(1);
6208     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
6209 
6210     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6211 
6212     SDValue TargetCC = DAG.getCondCode(CCVal);
6213     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6214                        LHS, RHS, TargetCC, Op.getOperand(2));
6215   }
6216 
6217   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
6218                      CondV, DAG.getConstant(0, DL, XLenVT),
6219                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
6220 }
6221 
6222 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
6223   MachineFunction &MF = DAG.getMachineFunction();
6224   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
6225 
6226   SDLoc DL(Op);
6227   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
6228                                  getPointerTy(MF.getDataLayout()));
6229 
6230   // vastart just stores the address of the VarArgsFrameIndex slot into the
6231   // memory location argument.
6232   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
6233   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
6234                       MachinePointerInfo(SV));
6235 }
6236 
6237 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
6238                                             SelectionDAG &DAG) const {
6239   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6240   MachineFunction &MF = DAG.getMachineFunction();
6241   MachineFrameInfo &MFI = MF.getFrameInfo();
6242   MFI.setFrameAddressIsTaken(true);
6243   Register FrameReg = RI.getFrameRegister(MF);
6244   int XLenInBytes = Subtarget.getXLen() / 8;
6245 
6246   EVT VT = Op.getValueType();
6247   SDLoc DL(Op);
6248   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
6249   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6250   while (Depth--) {
6251     int Offset = -(XLenInBytes * 2);
6252     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
6253                               DAG.getIntPtrConstant(Offset, DL));
6254     FrameAddr =
6255         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
6256   }
6257   return FrameAddr;
6258 }
6259 
6260 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
6261                                              SelectionDAG &DAG) const {
6262   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
6263   MachineFunction &MF = DAG.getMachineFunction();
6264   MachineFrameInfo &MFI = MF.getFrameInfo();
6265   MFI.setReturnAddressIsTaken(true);
6266   MVT XLenVT = Subtarget.getXLenVT();
6267   int XLenInBytes = Subtarget.getXLen() / 8;
6268 
6269   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
6270     return SDValue();
6271 
6272   EVT VT = Op.getValueType();
6273   SDLoc DL(Op);
6274   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6275   if (Depth) {
6276     int Off = -XLenInBytes;
6277     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
6278     SDValue Offset = DAG.getConstant(Off, DL, VT);
6279     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
6280                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
6281                        MachinePointerInfo());
6282   }
6283 
6284   // Return the value of the return address register, marking it an implicit
6285   // live-in.
6286   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
6287   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
6288 }
6289 
6290 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
6291                                                  SelectionDAG &DAG) const {
6292   SDLoc DL(Op);
6293   SDValue Lo = Op.getOperand(0);
6294   SDValue Hi = Op.getOperand(1);
6295   SDValue Shamt = Op.getOperand(2);
6296   EVT VT = Lo.getValueType();
6297 
6298   // if Shamt-XLEN < 0: // Shamt < XLEN
6299   //   Lo = Lo << Shamt
6300   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
6301   // else:
6302   //   Lo = 0
6303   //   Hi = Lo << (Shamt-XLEN)
6304 
6305   SDValue Zero = DAG.getConstant(0, DL, VT);
6306   SDValue One = DAG.getConstant(1, DL, VT);
6307   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6308   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6309   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6310   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6311 
6312   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
6313   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
6314   SDValue ShiftRightLo =
6315       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
6316   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
6317   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
6318   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
6319 
6320   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6321 
6322   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
6323   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6324 
6325   SDValue Parts[2] = {Lo, Hi};
6326   return DAG.getMergeValues(Parts, DL);
6327 }
6328 
6329 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
6330                                                   bool IsSRA) const {
6331   SDLoc DL(Op);
6332   SDValue Lo = Op.getOperand(0);
6333   SDValue Hi = Op.getOperand(1);
6334   SDValue Shamt = Op.getOperand(2);
6335   EVT VT = Lo.getValueType();
6336 
6337   // SRA expansion:
6338   //   if Shamt-XLEN < 0: // Shamt < XLEN
6339   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6340   //     Hi = Hi >>s Shamt
6341   //   else:
6342   //     Lo = Hi >>s (Shamt-XLEN);
6343   //     Hi = Hi >>s (XLEN-1)
6344   //
6345   // SRL expansion:
6346   //   if Shamt-XLEN < 0: // Shamt < XLEN
6347   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
6348   //     Hi = Hi >>u Shamt
6349   //   else:
6350   //     Lo = Hi >>u (Shamt-XLEN);
6351   //     Hi = 0;
6352 
6353   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
6354 
6355   SDValue Zero = DAG.getConstant(0, DL, VT);
6356   SDValue One = DAG.getConstant(1, DL, VT);
6357   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
6358   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
6359   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
6360   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
6361 
6362   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
6363   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
6364   SDValue ShiftLeftHi =
6365       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
6366   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
6367   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
6368   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
6369   SDValue HiFalse =
6370       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
6371 
6372   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
6373 
6374   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
6375   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
6376 
6377   SDValue Parts[2] = {Lo, Hi};
6378   return DAG.getMergeValues(Parts, DL);
6379 }
6380 
6381 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
6382 // legal equivalently-sized i8 type, so we can use that as a go-between.
6383 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
6384                                                   SelectionDAG &DAG) const {
6385   SDLoc DL(Op);
6386   MVT VT = Op.getSimpleValueType();
6387   SDValue SplatVal = Op.getOperand(0);
6388   // All-zeros or all-ones splats are handled specially.
6389   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
6390     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6391     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
6392   }
6393   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
6394     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
6395     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
6396   }
6397   MVT XLenVT = Subtarget.getXLenVT();
6398   assert(SplatVal.getValueType() == XLenVT &&
6399          "Unexpected type for i1 splat value");
6400   MVT InterVT = VT.changeVectorElementType(MVT::i8);
6401   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
6402                          DAG.getConstant(1, DL, XLenVT));
6403   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
6404   SDValue Zero = DAG.getConstant(0, DL, InterVT);
6405   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
6406 }
6407 
6408 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
6409 // illegal (currently only vXi64 RV32).
6410 // FIXME: We could also catch non-constant sign-extended i32 values and lower
6411 // them to VMV_V_X_VL.
6412 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
6413                                                      SelectionDAG &DAG) const {
6414   SDLoc DL(Op);
6415   MVT VecVT = Op.getSimpleValueType();
6416   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
6417          "Unexpected SPLAT_VECTOR_PARTS lowering");
6418 
6419   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
6420   SDValue Lo = Op.getOperand(0);
6421   SDValue Hi = Op.getOperand(1);
6422 
6423   if (VecVT.isFixedLengthVector()) {
6424     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6425     SDLoc DL(Op);
6426     auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6427 
6428     SDValue Res =
6429         splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
6430     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
6431   }
6432 
6433   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
6434     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
6435     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
6436     // If Hi constant is all the same sign bit as Lo, lower this as a custom
6437     // node in order to try and match RVV vector/scalar instructions.
6438     if ((LoC >> 31) == HiC)
6439       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
6440                          Lo, DAG.getRegister(RISCV::X0, MVT::i32));
6441   }
6442 
6443   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
6444   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
6445       isa<ConstantSDNode>(Hi.getOperand(1)) &&
6446       Hi.getConstantOperandVal(1) == 31)
6447     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
6448                        DAG.getRegister(RISCV::X0, MVT::i32));
6449 
6450   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
6451   return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
6452                      DAG.getUNDEF(VecVT), Lo, Hi,
6453                      DAG.getRegister(RISCV::X0, MVT::i32));
6454 }
6455 
6456 // Custom-lower extensions from mask vectors by using a vselect either with 1
6457 // for zero/any-extension or -1 for sign-extension:
6458 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
6459 // Note that any-extension is lowered identically to zero-extension.
6460 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
6461                                                 int64_t ExtTrueVal) const {
6462   SDLoc DL(Op);
6463   MVT VecVT = Op.getSimpleValueType();
6464   SDValue Src = Op.getOperand(0);
6465   // Only custom-lower extensions from mask types
6466   assert(Src.getValueType().isVector() &&
6467          Src.getValueType().getVectorElementType() == MVT::i1);
6468 
6469   if (VecVT.isScalableVector()) {
6470     SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
6471     SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
6472     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
6473   }
6474 
6475   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
6476   MVT I1ContainerVT =
6477       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
6478 
6479   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
6480 
6481   SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
6482 
6483   MVT XLenVT = Subtarget.getXLenVT();
6484   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6485   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
6486 
6487   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6488                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
6489   SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6490                              DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
6491   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
6492                                SplatTrueVal, SplatZero, VL);
6493 
6494   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
6495 }
6496 
6497 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
6498     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
6499   MVT ExtVT = Op.getSimpleValueType();
6500   // Only custom-lower extensions from fixed-length vector types.
6501   if (!ExtVT.isFixedLengthVector())
6502     return Op;
6503   MVT VT = Op.getOperand(0).getSimpleValueType();
6504   // Grab the canonical container type for the extended type. Infer the smaller
6505   // type from that to ensure the same number of vector elements, as we know
6506   // the LMUL will be sufficient to hold the smaller type.
6507   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
6508   // Get the extended container type manually to ensure the same number of
6509   // vector elements between source and dest.
6510   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
6511                                      ContainerExtVT.getVectorElementCount());
6512 
6513   SDValue Op1 =
6514       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
6515 
6516   SDLoc DL(Op);
6517   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6518 
6519   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
6520 
6521   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
6522 }
6523 
6524 // Custom-lower truncations from vectors to mask vectors by using a mask and a
6525 // setcc operation:
6526 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
6527 SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
6528                                                       SelectionDAG &DAG) const {
6529   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6530   SDLoc DL(Op);
6531   EVT MaskVT = Op.getValueType();
6532   // Only expect to custom-lower truncations to mask types
6533   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
6534          "Unexpected type for vector mask lowering");
6535   SDValue Src = Op.getOperand(0);
6536   MVT VecVT = Src.getSimpleValueType();
6537   SDValue Mask, VL;
6538   if (IsVPTrunc) {
6539     Mask = Op.getOperand(1);
6540     VL = Op.getOperand(2);
6541   }
6542   // If this is a fixed vector, we need to convert it to a scalable vector.
6543   MVT ContainerVT = VecVT;
6544 
6545   if (VecVT.isFixedLengthVector()) {
6546     ContainerVT = getContainerForFixedLengthVector(VecVT);
6547     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6548     if (IsVPTrunc) {
6549       MVT MaskContainerVT =
6550           getContainerForFixedLengthVector(Mask.getSimpleValueType());
6551       Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6552     }
6553   }
6554 
6555   if (!IsVPTrunc) {
6556     std::tie(Mask, VL) =
6557         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6558   }
6559 
6560   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
6561   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
6562 
6563   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6564                          DAG.getUNDEF(ContainerVT), SplatOne, VL);
6565   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
6566                           DAG.getUNDEF(ContainerVT), SplatZero, VL);
6567 
6568   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
6569   SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
6570                               DAG.getUNDEF(ContainerVT), Mask, VL);
6571   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
6572                       {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
6573                        DAG.getUNDEF(MaskContainerVT), Mask, VL});
6574   if (MaskVT.isFixedLengthVector())
6575     Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
6576   return Trunc;
6577 }
6578 
6579 SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
6580                                                   SelectionDAG &DAG) const {
6581   bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
6582   SDLoc DL(Op);
6583 
6584   MVT VT = Op.getSimpleValueType();
6585   // Only custom-lower vector truncates
6586   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6587 
6588   // Truncates to mask types are handled differently
6589   if (VT.getVectorElementType() == MVT::i1)
6590     return lowerVectorMaskTruncLike(Op, DAG);
6591 
6592   // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
6593   // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
6594   // truncate by one power of two at a time.
6595   MVT DstEltVT = VT.getVectorElementType();
6596 
6597   SDValue Src = Op.getOperand(0);
6598   MVT SrcVT = Src.getSimpleValueType();
6599   MVT SrcEltVT = SrcVT.getVectorElementType();
6600 
6601   assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
6602          isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
6603          "Unexpected vector truncate lowering");
6604 
6605   MVT ContainerVT = SrcVT;
6606   SDValue Mask, VL;
6607   if (IsVPTrunc) {
6608     Mask = Op.getOperand(1);
6609     VL = Op.getOperand(2);
6610   }
6611   if (SrcVT.isFixedLengthVector()) {
6612     ContainerVT = getContainerForFixedLengthVector(SrcVT);
6613     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6614     if (IsVPTrunc) {
6615       MVT MaskVT = getMaskTypeFor(ContainerVT);
6616       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6617     }
6618   }
6619 
6620   SDValue Result = Src;
6621   if (!IsVPTrunc) {
6622     std::tie(Mask, VL) =
6623         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6624   }
6625 
6626   LLVMContext &Context = *DAG.getContext();
6627   const ElementCount Count = ContainerVT.getVectorElementCount();
6628   do {
6629     SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
6630     EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
6631     Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
6632                          Mask, VL);
6633   } while (SrcEltVT != DstEltVT);
6634 
6635   if (SrcVT.isFixedLengthVector())
6636     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
6637 
6638   return Result;
6639 }
6640 
6641 SDValue
6642 RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
6643                                                     SelectionDAG &DAG) const {
6644   SDLoc DL(Op);
6645   SDValue Chain = Op.getOperand(0);
6646   SDValue Src = Op.getOperand(1);
6647   MVT VT = Op.getSimpleValueType();
6648   MVT SrcVT = Src.getSimpleValueType();
6649   MVT ContainerVT = VT;
6650   if (VT.isFixedLengthVector()) {
6651     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6652     ContainerVT =
6653         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6654     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6655   }
6656 
6657   auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6658 
6659   // RVV can only widen/truncate fp to types double/half the size as the source.
6660   if ((VT.getVectorElementType() == MVT::f64 &&
6661        SrcVT.getVectorElementType() == MVT::f16) ||
6662       (VT.getVectorElementType() == MVT::f16 &&
6663        SrcVT.getVectorElementType() == MVT::f64)) {
6664     // For double rounding, the intermediate rounding should be round-to-odd.
6665     unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6666                                 ? RISCVISD::STRICT_FP_EXTEND_VL
6667                                 : RISCVISD::STRICT_VFNCVT_ROD_VL;
6668     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6669     Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
6670                       Chain, Src, Mask, VL);
6671     Chain = Src.getValue(1);
6672   }
6673 
6674   unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
6675                          ? RISCVISD::STRICT_FP_EXTEND_VL
6676                          : RISCVISD::STRICT_FP_ROUND_VL;
6677   SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6678                             Chain, Src, Mask, VL);
6679   if (VT.isFixedLengthVector()) {
6680     // StrictFP operations have two result values. Their lowered result should
6681     // have same result count.
6682     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
6683     Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
6684   }
6685   return Res;
6686 }
6687 
6688 SDValue
6689 RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
6690                                                     SelectionDAG &DAG) const {
6691   bool IsVP =
6692       Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
6693   bool IsExtend =
6694       Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
6695   // RVV can only do truncate fp to types half the size as the source. We
6696   // custom-lower f64->f16 rounds via RVV's round-to-odd float
6697   // conversion instruction.
6698   SDLoc DL(Op);
6699   MVT VT = Op.getSimpleValueType();
6700 
6701   assert(VT.isVector() && "Unexpected type for vector truncate lowering");
6702 
6703   SDValue Src = Op.getOperand(0);
6704   MVT SrcVT = Src.getSimpleValueType();
6705 
6706   bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
6707                                      SrcVT.getVectorElementType() != MVT::f16);
6708   bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
6709                                      SrcVT.getVectorElementType() != MVT::f64);
6710 
6711   bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
6712 
6713   // Prepare any fixed-length vector operands.
6714   MVT ContainerVT = VT;
6715   SDValue Mask, VL;
6716   if (IsVP) {
6717     Mask = Op.getOperand(1);
6718     VL = Op.getOperand(2);
6719   }
6720   if (VT.isFixedLengthVector()) {
6721     MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6722     ContainerVT =
6723         SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
6724     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6725     if (IsVP) {
6726       MVT MaskVT = getMaskTypeFor(ContainerVT);
6727       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
6728     }
6729   }
6730 
6731   if (!IsVP)
6732     std::tie(Mask, VL) =
6733         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
6734 
6735   unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
6736 
6737   if (IsDirectConv) {
6738     Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
6739     if (VT.isFixedLengthVector())
6740       Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
6741     return Src;
6742   }
6743 
6744   unsigned InterConvOpc =
6745       IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
6746 
6747   MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
6748   SDValue IntermediateConv =
6749       DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
6750   SDValue Result =
6751       DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
6752   if (VT.isFixedLengthVector())
6753     return convertFromScalableVector(VT, Result, DAG, Subtarget);
6754   return Result;
6755 }
6756 
6757 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
6758 // first position of a vector, and that vector is slid up to the insert index.
6759 // By limiting the active vector length to index+1 and merging with the
6760 // original vector (with an undisturbed tail policy for elements >= VL), we
6761 // achieve the desired result of leaving all elements untouched except the one
6762 // at VL-1, which is replaced with the desired value.
6763 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6764                                                     SelectionDAG &DAG) const {
6765   SDLoc DL(Op);
6766   MVT VecVT = Op.getSimpleValueType();
6767   SDValue Vec = Op.getOperand(0);
6768   SDValue Val = Op.getOperand(1);
6769   SDValue Idx = Op.getOperand(2);
6770 
6771   if (VecVT.getVectorElementType() == MVT::i1) {
6772     // FIXME: For now we just promote to an i8 vector and insert into that,
6773     // but this is probably not optimal.
6774     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6775     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6776     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
6777     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
6778   }
6779 
6780   MVT ContainerVT = VecVT;
6781   // If the operand is a fixed-length vector, convert to a scalable one.
6782   if (VecVT.isFixedLengthVector()) {
6783     ContainerVT = getContainerForFixedLengthVector(VecVT);
6784     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6785   }
6786 
6787   MVT XLenVT = Subtarget.getXLenVT();
6788 
6789   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
6790   // Even i64-element vectors on RV32 can be lowered without scalar
6791   // legalization if the most-significant 32 bits of the value are not affected
6792   // by the sign-extension of the lower 32 bits.
6793   // TODO: We could also catch sign extensions of a 32-bit value.
6794   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
6795     const auto *CVal = cast<ConstantSDNode>(Val);
6796     if (isInt<32>(CVal->getSExtValue())) {
6797       IsLegalInsert = true;
6798       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
6799     }
6800   }
6801 
6802   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6803 
6804   SDValue ValInVec;
6805 
6806   if (IsLegalInsert) {
6807     unsigned Opc =
6808         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
6809     if (isNullConstant(Idx)) {
6810       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
6811       if (!VecVT.isFixedLengthVector())
6812         return Vec;
6813       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
6814     }
6815     ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
6816   } else {
6817     // On RV32, i64-element vectors must be specially handled to place the
6818     // value at element 0, by using two vslide1down instructions in sequence on
6819     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
6820     // this.
6821     SDValue ValLo, ValHi;
6822     std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
6823     MVT I32ContainerVT =
6824         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
6825     SDValue I32Mask =
6826         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
6827     // Limit the active VL to two.
6828     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
6829     // If the Idx is 0 we can insert directly into the vector.
6830     if (isNullConstant(Idx)) {
6831       // First slide in the lo value, then the hi in above it. We use slide1down
6832       // to avoid the register group overlap constraint of vslide1up.
6833       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6834                              Vec, Vec, ValLo, I32Mask, InsertI64VL);
6835       // If the source vector is undef don't pass along the tail elements from
6836       // the previous slide1down.
6837       SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
6838       ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6839                              Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
6840       // Bitcast back to the right container type.
6841       ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6842 
6843       if (!VecVT.isFixedLengthVector())
6844         return ValInVec;
6845       return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
6846     }
6847 
6848     // First slide in the lo value, then the hi in above it. We use slide1down
6849     // to avoid the register group overlap constraint of vslide1up.
6850     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6851                            DAG.getUNDEF(I32ContainerVT),
6852                            DAG.getUNDEF(I32ContainerVT), ValLo,
6853                            I32Mask, InsertI64VL);
6854     ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
6855                            DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
6856                            I32Mask, InsertI64VL);
6857     // Bitcast back to the right container type.
6858     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
6859   }
6860 
6861   // Now that the value is in a vector, slide it into position.
6862   SDValue InsertVL =
6863       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
6864 
6865   // Use tail agnostic policy if Idx is the last index of Vec.
6866   unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
6867   if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
6868       cast<ConstantSDNode>(Idx)->getZExtValue() + 1 ==
6869           VecVT.getVectorNumElements())
6870     Policy = RISCVII::TAIL_AGNOSTIC;
6871   SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
6872                                 Idx, Mask, InsertVL, Policy);
6873   if (!VecVT.isFixedLengthVector())
6874     return Slideup;
6875   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
6876 }
6877 
6878 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
6879 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
6880 // types this is done using VMV_X_S to allow us to glean information about the
6881 // sign bits of the result.
6882 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6883                                                      SelectionDAG &DAG) const {
6884   SDLoc DL(Op);
6885   SDValue Idx = Op.getOperand(1);
6886   SDValue Vec = Op.getOperand(0);
6887   EVT EltVT = Op.getValueType();
6888   MVT VecVT = Vec.getSimpleValueType();
6889   MVT XLenVT = Subtarget.getXLenVT();
6890 
6891   if (VecVT.getVectorElementType() == MVT::i1) {
6892     // Use vfirst.m to extract the first bit.
6893     if (isNullConstant(Idx)) {
6894       MVT ContainerVT = VecVT;
6895       if (VecVT.isFixedLengthVector()) {
6896         ContainerVT = getContainerForFixedLengthVector(VecVT);
6897         Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6898       }
6899       auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
6900       SDValue Vfirst =
6901           DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
6902       return DAG.getSetCC(DL, XLenVT, Vfirst, DAG.getConstant(0, DL, XLenVT),
6903                           ISD::SETEQ);
6904     }
6905     if (VecVT.isFixedLengthVector()) {
6906       unsigned NumElts = VecVT.getVectorNumElements();
6907       if (NumElts >= 8) {
6908         MVT WideEltVT;
6909         unsigned WidenVecLen;
6910         SDValue ExtractElementIdx;
6911         SDValue ExtractBitIdx;
6912         unsigned MaxEEW = Subtarget.getELEN();
6913         MVT LargestEltVT = MVT::getIntegerVT(
6914             std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
6915         if (NumElts <= LargestEltVT.getSizeInBits()) {
6916           assert(isPowerOf2_32(NumElts) &&
6917                  "the number of elements should be power of 2");
6918           WideEltVT = MVT::getIntegerVT(NumElts);
6919           WidenVecLen = 1;
6920           ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
6921           ExtractBitIdx = Idx;
6922         } else {
6923           WideEltVT = LargestEltVT;
6924           WidenVecLen = NumElts / WideEltVT.getSizeInBits();
6925           // extract element index = index / element width
6926           ExtractElementIdx = DAG.getNode(
6927               ISD::SRL, DL, XLenVT, Idx,
6928               DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
6929           // mask bit index = index % element width
6930           ExtractBitIdx = DAG.getNode(
6931               ISD::AND, DL, XLenVT, Idx,
6932               DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
6933         }
6934         MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
6935         Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
6936         SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
6937                                          Vec, ExtractElementIdx);
6938         // Extract the bit from GPR.
6939         SDValue ShiftRight =
6940             DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
6941         return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
6942                            DAG.getConstant(1, DL, XLenVT));
6943       }
6944     }
6945     // Otherwise, promote to an i8 vector and extract from that.
6946     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
6947     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
6948     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
6949   }
6950 
6951   // If this is a fixed vector, we need to convert it to a scalable vector.
6952   MVT ContainerVT = VecVT;
6953   if (VecVT.isFixedLengthVector()) {
6954     ContainerVT = getContainerForFixedLengthVector(VecVT);
6955     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
6956   }
6957 
6958   // If the index is 0, the vector is already in the right position.
6959   if (!isNullConstant(Idx)) {
6960     // Use a VL of 1 to avoid processing more elements than we need.
6961     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
6962     Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
6963                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
6964   }
6965 
6966   if (!EltVT.isInteger()) {
6967     // Floating-point extracts are handled in TableGen.
6968     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6969                        DAG.getConstant(0, DL, XLenVT));
6970   }
6971 
6972   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
6973   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
6974 }
6975 
6976 // Some RVV intrinsics may claim that they want an integer operand to be
6977 // promoted or expanded.
6978 static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
6979                                            const RISCVSubtarget &Subtarget) {
6980   assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
6981           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
6982           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
6983          "Unexpected opcode");
6984 
6985   if (!Subtarget.hasVInstructions())
6986     return SDValue();
6987 
6988   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
6989                   Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
6990   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
6991 
6992   SDLoc DL(Op);
6993 
6994   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
6995       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
6996   if (!II || !II->hasScalarOperand())
6997     return SDValue();
6998 
6999   unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
7000   assert(SplatOp < Op.getNumOperands());
7001 
7002   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
7003   SDValue &ScalarOp = Operands[SplatOp];
7004   MVT OpVT = ScalarOp.getSimpleValueType();
7005   MVT XLenVT = Subtarget.getXLenVT();
7006 
7007   // If this isn't a scalar, or its type is XLenVT we're done.
7008   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
7009     return SDValue();
7010 
7011   // Simplest case is that the operand needs to be promoted to XLenVT.
7012   if (OpVT.bitsLT(XLenVT)) {
7013     // If the operand is a constant, sign extend to increase our chances
7014     // of being able to use a .vi instruction. ANY_EXTEND would become a
7015     // a zero extend and the simm5 check in isel would fail.
7016     // FIXME: Should we ignore the upper bits in isel instead?
7017     unsigned ExtOpc =
7018         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
7019     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
7020     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7021   }
7022 
7023   // Use the previous operand to get the vXi64 VT. The result might be a mask
7024   // VT for compares. Using the previous operand assumes that the previous
7025   // operand will never have a smaller element size than a scalar operand and
7026   // that a widening operation never uses SEW=64.
7027   // NOTE: If this fails the below assert, we can probably just find the
7028   // element count from any operand or result and use it to construct the VT.
7029   assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
7030   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
7031 
7032   // The more complex case is when the scalar is larger than XLenVT.
7033   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
7034          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
7035 
7036   // If this is a sign-extended 32-bit value, we can truncate it and rely on the
7037   // instruction to sign-extend since SEW>XLEN.
7038   if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
7039     ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
7040     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7041   }
7042 
7043   switch (IntNo) {
7044   case Intrinsic::riscv_vslide1up:
7045   case Intrinsic::riscv_vslide1down:
7046   case Intrinsic::riscv_vslide1up_mask:
7047   case Intrinsic::riscv_vslide1down_mask: {
7048     // We need to special case these when the scalar is larger than XLen.
7049     unsigned NumOps = Op.getNumOperands();
7050     bool IsMasked = NumOps == 7;
7051 
7052     // Convert the vector source to the equivalent nxvXi32 vector.
7053     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
7054     SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
7055     SDValue ScalarLo, ScalarHi;
7056     std::tie(ScalarLo, ScalarHi) =
7057         DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
7058 
7059     // Double the VL since we halved SEW.
7060     SDValue AVL = getVLOperand(Op);
7061     SDValue I32VL;
7062 
7063     // Optimize for constant AVL
7064     if (isa<ConstantSDNode>(AVL)) {
7065       unsigned EltSize = VT.getScalarSizeInBits();
7066       unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
7067 
7068       unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
7069       unsigned MaxVLMAX =
7070           RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
7071 
7072       unsigned VectorBitsMin = Subtarget.getRealMinVLen();
7073       unsigned MinVLMAX =
7074           RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
7075 
7076       uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
7077       if (AVLInt <= MinVLMAX) {
7078         I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
7079       } else if (AVLInt >= 2 * MaxVLMAX) {
7080         // Just set vl to VLMAX in this situation
7081         RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
7082         SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7083         unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
7084         SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7085         SDValue SETVLMAX = DAG.getTargetConstant(
7086             Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
7087         I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
7088                             LMUL);
7089       } else {
7090         // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
7091         // is related to the hardware implementation.
7092         // So let the following code handle
7093       }
7094     }
7095     if (!I32VL) {
7096       RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
7097       SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
7098       unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
7099       SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
7100       SDValue SETVL =
7101           DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
7102       // Using vsetvli instruction to get actually used length which related to
7103       // the hardware implementation
7104       SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
7105                                SEW, LMUL);
7106       I32VL =
7107           DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
7108     }
7109 
7110     SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
7111 
7112     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
7113     // instructions.
7114     SDValue Passthru;
7115     if (IsMasked)
7116       Passthru = DAG.getUNDEF(I32VT);
7117     else
7118       Passthru = DAG.getBitcast(I32VT, Operands[1]);
7119 
7120     if (IntNo == Intrinsic::riscv_vslide1up ||
7121         IntNo == Intrinsic::riscv_vslide1up_mask) {
7122       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7123                         ScalarHi, I32Mask, I32VL);
7124       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
7125                         ScalarLo, I32Mask, I32VL);
7126     } else {
7127       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7128                         ScalarLo, I32Mask, I32VL);
7129       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
7130                         ScalarHi, I32Mask, I32VL);
7131     }
7132 
7133     // Convert back to nxvXi64.
7134     Vec = DAG.getBitcast(VT, Vec);
7135 
7136     if (!IsMasked)
7137       return Vec;
7138     // Apply mask after the operation.
7139     SDValue Mask = Operands[NumOps - 3];
7140     SDValue MaskedOff = Operands[1];
7141     // Assume Policy operand is the last operand.
7142     uint64_t Policy =
7143         cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
7144     // We don't need to select maskedoff if it's undef.
7145     if (MaskedOff.isUndef())
7146       return Vec;
7147     // TAMU
7148     if (Policy == RISCVII::TAIL_AGNOSTIC)
7149       return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
7150                          AVL);
7151     // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
7152     // It's fine because vmerge does not care mask policy.
7153     return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
7154                        AVL);
7155   }
7156   }
7157 
7158   // We need to convert the scalar to a splat vector.
7159   SDValue VL = getVLOperand(Op);
7160   assert(VL.getValueType() == XLenVT);
7161   ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
7162   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
7163 }
7164 
7165 // Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
7166 // scalable vector llvm.get.vector.length for now.
7167 //
7168 // We need to convert from a scalable VF to a vsetvli with VLMax equal to
7169 // (vscale * VF). The vscale and VF are independent of element width. We use
7170 // SEW=8 for the vsetvli because it is the only element width that supports all
7171 // fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
7172 // (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
7173 // InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
7174 // SEW and LMUL are better for the surrounding vector instructions.
7175 static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
7176                                     const RISCVSubtarget &Subtarget) {
7177   MVT XLenVT = Subtarget.getXLenVT();
7178 
7179   // The smallest LMUL is only valid for the smallest element width.
7180   const unsigned ElementWidth = 8;
7181 
7182   // Determine the VF that corresponds to LMUL 1 for ElementWidth.
7183   unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
7184   // We don't support VF==1 with ELEN==32.
7185   unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
7186 
7187   unsigned VF = N->getConstantOperandVal(2);
7188   assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
7189          "Unexpected VF");
7190   (void)MinVF;
7191 
7192   bool Fractional = VF < LMul1VF;
7193   unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
7194   unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
7195   unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
7196 
7197   SDLoc DL(N);
7198 
7199   SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
7200   SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
7201 
7202   SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
7203 
7204   SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
7205   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
7206 }
7207 
7208 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7209                                                      SelectionDAG &DAG) const {
7210   unsigned IntNo = Op.getConstantOperandVal(0);
7211   SDLoc DL(Op);
7212   MVT XLenVT = Subtarget.getXLenVT();
7213 
7214   switch (IntNo) {
7215   default:
7216     break; // Don't custom lower most intrinsics.
7217   case Intrinsic::thread_pointer: {
7218     EVT PtrVT = getPointerTy(DAG.getDataLayout());
7219     return DAG.getRegister(RISCV::X4, PtrVT);
7220   }
7221   case Intrinsic::riscv_orc_b:
7222   case Intrinsic::riscv_brev8:
7223   case Intrinsic::riscv_sha256sig0:
7224   case Intrinsic::riscv_sha256sig1:
7225   case Intrinsic::riscv_sha256sum0:
7226   case Intrinsic::riscv_sha256sum1:
7227   case Intrinsic::riscv_sm3p0:
7228   case Intrinsic::riscv_sm3p1: {
7229     unsigned Opc;
7230     switch (IntNo) {
7231     case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
7232     case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
7233     case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
7234     case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
7235     case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
7236     case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
7237     case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
7238     case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
7239     }
7240 
7241     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7242   }
7243   case Intrinsic::riscv_sm4ks:
7244   case Intrinsic::riscv_sm4ed: {
7245     unsigned Opc =
7246         IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
7247     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
7248                        Op.getOperand(3));
7249   }
7250   case Intrinsic::riscv_zip:
7251   case Intrinsic::riscv_unzip: {
7252     unsigned Opc =
7253         IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
7254     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
7255   }
7256   case Intrinsic::riscv_clmul:
7257     return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
7258                        Op.getOperand(2));
7259   case Intrinsic::riscv_clmulh:
7260     return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
7261                        Op.getOperand(2));
7262   case Intrinsic::riscv_clmulr:
7263     return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
7264                        Op.getOperand(2));
7265   case Intrinsic::experimental_get_vector_length:
7266     return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
7267   case Intrinsic::riscv_vmv_x_s:
7268     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
7269     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
7270                        Op.getOperand(1));
7271   case Intrinsic::riscv_vfmv_f_s:
7272     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
7273                        Op.getOperand(1), DAG.getConstant(0, DL, XLenVT));
7274   case Intrinsic::riscv_vmv_v_x:
7275     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
7276                             Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
7277                             Subtarget);
7278   case Intrinsic::riscv_vfmv_v_f:
7279     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
7280                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
7281   case Intrinsic::riscv_vmv_s_x: {
7282     SDValue Scalar = Op.getOperand(2);
7283 
7284     if (Scalar.getValueType().bitsLE(XLenVT)) {
7285       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
7286       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
7287                          Op.getOperand(1), Scalar, Op.getOperand(3));
7288     }
7289 
7290     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
7291 
7292     // This is an i64 value that lives in two scalar registers. We have to
7293     // insert this in a convoluted way. First we build vXi64 splat containing
7294     // the two values that we assemble using some bit math. Next we'll use
7295     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
7296     // to merge element 0 from our splat into the source vector.
7297     // FIXME: This is probably not the best way to do this, but it is
7298     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
7299     // point.
7300     //   sw lo, (a0)
7301     //   sw hi, 4(a0)
7302     //   vlse vX, (a0)
7303     //
7304     //   vid.v      vVid
7305     //   vmseq.vx   mMask, vVid, 0
7306     //   vmerge.vvm vDest, vSrc, vVal, mMask
7307     MVT VT = Op.getSimpleValueType();
7308     SDValue Vec = Op.getOperand(1);
7309     SDValue VL = getVLOperand(Op);
7310 
7311     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
7312     if (Op.getOperand(1).isUndef())
7313       return SplattedVal;
7314     SDValue SplattedIdx =
7315         DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
7316                     DAG.getConstant(0, DL, MVT::i32), VL);
7317 
7318     MVT MaskVT = getMaskTypeFor(VT);
7319     SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
7320     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
7321     SDValue SelectCond =
7322         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
7323                     {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
7324                      DAG.getUNDEF(MaskVT), Mask, VL});
7325     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
7326                        Vec, VL);
7327   }
7328   }
7329 
7330   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7331 }
7332 
7333 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
7334                                                     SelectionDAG &DAG) const {
7335   unsigned IntNo = Op.getConstantOperandVal(1);
7336   switch (IntNo) {
7337   default:
7338     break;
7339   case Intrinsic::riscv_masked_strided_load: {
7340     SDLoc DL(Op);
7341     MVT XLenVT = Subtarget.getXLenVT();
7342 
7343     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7344     // the selection of the masked intrinsics doesn't do this for us.
7345     SDValue Mask = Op.getOperand(5);
7346     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7347 
7348     MVT VT = Op->getSimpleValueType(0);
7349     MVT ContainerVT = VT;
7350     if (VT.isFixedLengthVector())
7351       ContainerVT = getContainerForFixedLengthVector(VT);
7352 
7353     SDValue PassThru = Op.getOperand(2);
7354     if (!IsUnmasked) {
7355       MVT MaskVT = getMaskTypeFor(ContainerVT);
7356       if (VT.isFixedLengthVector()) {
7357         Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7358         PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
7359       }
7360     }
7361 
7362     auto *Load = cast<MemIntrinsicSDNode>(Op);
7363     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7364     SDValue Ptr = Op.getOperand(3);
7365     SDValue Stride = Op.getOperand(4);
7366     SDValue Result, Chain;
7367 
7368     // TODO: We restrict this to unmasked loads currently in consideration of
7369     // the complexity of hanlding all falses masks.
7370     if (IsUnmasked && isNullConstant(Stride)) {
7371       MVT ScalarVT = ContainerVT.getVectorElementType();
7372       SDValue ScalarLoad =
7373           DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
7374                          ScalarVT, Load->getMemOperand());
7375       Chain = ScalarLoad.getValue(1);
7376       Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
7377                                 Subtarget);
7378     } else {
7379       SDValue IntID = DAG.getTargetConstant(
7380           IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
7381           XLenVT);
7382 
7383       SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
7384       if (IsUnmasked)
7385         Ops.push_back(DAG.getUNDEF(ContainerVT));
7386       else
7387         Ops.push_back(PassThru);
7388       Ops.push_back(Ptr);
7389       Ops.push_back(Stride);
7390       if (!IsUnmasked)
7391         Ops.push_back(Mask);
7392       Ops.push_back(VL);
7393       if (!IsUnmasked) {
7394         SDValue Policy =
7395             DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
7396         Ops.push_back(Policy);
7397       }
7398 
7399       SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
7400       Result =
7401           DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
7402                                   Load->getMemoryVT(), Load->getMemOperand());
7403       Chain = Result.getValue(1);
7404     }
7405     if (VT.isFixedLengthVector())
7406       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
7407     return DAG.getMergeValues({Result, Chain}, DL);
7408   }
7409   case Intrinsic::riscv_seg2_load:
7410   case Intrinsic::riscv_seg3_load:
7411   case Intrinsic::riscv_seg4_load:
7412   case Intrinsic::riscv_seg5_load:
7413   case Intrinsic::riscv_seg6_load:
7414   case Intrinsic::riscv_seg7_load:
7415   case Intrinsic::riscv_seg8_load: {
7416     SDLoc DL(Op);
7417     static const Intrinsic::ID VlsegInts[7] = {
7418         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
7419         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
7420         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
7421         Intrinsic::riscv_vlseg8};
7422     unsigned NF = Op->getNumValues() - 1;
7423     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7424     MVT XLenVT = Subtarget.getXLenVT();
7425     MVT VT = Op->getSimpleValueType(0);
7426     MVT ContainerVT = getContainerForFixedLengthVector(VT);
7427 
7428     SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7429     SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
7430     auto *Load = cast<MemIntrinsicSDNode>(Op);
7431     SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
7432     ContainerVTs.push_back(MVT::Other);
7433     SDVTList VTs = DAG.getVTList(ContainerVTs);
7434     SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
7435     Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
7436     Ops.push_back(Op.getOperand(2));
7437     Ops.push_back(VL);
7438     SDValue Result =
7439         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
7440                                 Load->getMemoryVT(), Load->getMemOperand());
7441     SmallVector<SDValue, 9> Results;
7442     for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
7443       Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
7444                                                   DAG, Subtarget));
7445     Results.push_back(Result.getValue(NF));
7446     return DAG.getMergeValues(Results, DL);
7447   }
7448   }
7449 
7450   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7451 }
7452 
7453 SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
7454                                                  SelectionDAG &DAG) const {
7455   unsigned IntNo = Op.getConstantOperandVal(1);
7456   switch (IntNo) {
7457   default:
7458     break;
7459   case Intrinsic::riscv_masked_strided_store: {
7460     SDLoc DL(Op);
7461     MVT XLenVT = Subtarget.getXLenVT();
7462 
7463     // If the mask is known to be all ones, optimize to an unmasked intrinsic;
7464     // the selection of the masked intrinsics doesn't do this for us.
7465     SDValue Mask = Op.getOperand(5);
7466     bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
7467 
7468     SDValue Val = Op.getOperand(2);
7469     MVT VT = Val.getSimpleValueType();
7470     MVT ContainerVT = VT;
7471     if (VT.isFixedLengthVector()) {
7472       ContainerVT = getContainerForFixedLengthVector(VT);
7473       Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
7474     }
7475     if (!IsUnmasked) {
7476       MVT MaskVT = getMaskTypeFor(ContainerVT);
7477       if (VT.isFixedLengthVector())
7478         Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7479     }
7480 
7481     SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7482 
7483     SDValue IntID = DAG.getTargetConstant(
7484         IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
7485         XLenVT);
7486 
7487     auto *Store = cast<MemIntrinsicSDNode>(Op);
7488     SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
7489     Ops.push_back(Val);
7490     Ops.push_back(Op.getOperand(3)); // Ptr
7491     Ops.push_back(Op.getOperand(4)); // Stride
7492     if (!IsUnmasked)
7493       Ops.push_back(Mask);
7494     Ops.push_back(VL);
7495 
7496     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
7497                                    Ops, Store->getMemoryVT(),
7498                                    Store->getMemOperand());
7499   }
7500   case Intrinsic::riscv_seg2_store:
7501   case Intrinsic::riscv_seg3_store:
7502   case Intrinsic::riscv_seg4_store:
7503   case Intrinsic::riscv_seg5_store:
7504   case Intrinsic::riscv_seg6_store:
7505   case Intrinsic::riscv_seg7_store:
7506   case Intrinsic::riscv_seg8_store: {
7507     SDLoc DL(Op);
7508     static const Intrinsic::ID VssegInts[] = {
7509         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
7510         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
7511         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
7512         Intrinsic::riscv_vsseg8};
7513     // Operands are (chain, int_id, vec*, ptr, vl)
7514     unsigned NF = Op->getNumOperands() - 4;
7515     assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
7516     MVT XLenVT = Subtarget.getXLenVT();
7517     MVT VT = Op->getOperand(2).getSimpleValueType();
7518     MVT ContainerVT = getContainerForFixedLengthVector(VT);
7519 
7520     SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
7521     SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
7522     SDValue Ptr = Op->getOperand(NF + 2);
7523 
7524     auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
7525     SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
7526     for (unsigned i = 0; i < NF; i++)
7527       Ops.push_back(convertToScalableVector(
7528           ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
7529     Ops.append({Ptr, VL});
7530 
7531     return DAG.getMemIntrinsicNode(
7532         ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
7533         FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
7534   }
7535   }
7536 
7537   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
7538 }
7539 
7540 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
7541   switch (ISDOpcode) {
7542   default:
7543     llvm_unreachable("Unhandled reduction");
7544   case ISD::VECREDUCE_ADD:
7545     return RISCVISD::VECREDUCE_ADD_VL;
7546   case ISD::VECREDUCE_UMAX:
7547     return RISCVISD::VECREDUCE_UMAX_VL;
7548   case ISD::VECREDUCE_SMAX:
7549     return RISCVISD::VECREDUCE_SMAX_VL;
7550   case ISD::VECREDUCE_UMIN:
7551     return RISCVISD::VECREDUCE_UMIN_VL;
7552   case ISD::VECREDUCE_SMIN:
7553     return RISCVISD::VECREDUCE_SMIN_VL;
7554   case ISD::VECREDUCE_AND:
7555     return RISCVISD::VECREDUCE_AND_VL;
7556   case ISD::VECREDUCE_OR:
7557     return RISCVISD::VECREDUCE_OR_VL;
7558   case ISD::VECREDUCE_XOR:
7559     return RISCVISD::VECREDUCE_XOR_VL;
7560   }
7561 }
7562 
7563 SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
7564                                                          SelectionDAG &DAG,
7565                                                          bool IsVP) const {
7566   SDLoc DL(Op);
7567   SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
7568   MVT VecVT = Vec.getSimpleValueType();
7569   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
7570           Op.getOpcode() == ISD::VECREDUCE_OR ||
7571           Op.getOpcode() == ISD::VECREDUCE_XOR ||
7572           Op.getOpcode() == ISD::VP_REDUCE_AND ||
7573           Op.getOpcode() == ISD::VP_REDUCE_OR ||
7574           Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
7575          "Unexpected reduction lowering");
7576 
7577   MVT XLenVT = Subtarget.getXLenVT();
7578   assert(Op.getValueType() == XLenVT &&
7579          "Expected reduction output to be legalized to XLenVT");
7580 
7581   MVT ContainerVT = VecVT;
7582   if (VecVT.isFixedLengthVector()) {
7583     ContainerVT = getContainerForFixedLengthVector(VecVT);
7584     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7585   }
7586 
7587   SDValue Mask, VL;
7588   if (IsVP) {
7589     Mask = Op.getOperand(2);
7590     VL = Op.getOperand(3);
7591   } else {
7592     std::tie(Mask, VL) =
7593         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7594   }
7595 
7596   unsigned BaseOpc;
7597   ISD::CondCode CC;
7598   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7599 
7600   switch (Op.getOpcode()) {
7601   default:
7602     llvm_unreachable("Unhandled reduction");
7603   case ISD::VECREDUCE_AND:
7604   case ISD::VP_REDUCE_AND: {
7605     // vcpop ~x == 0
7606     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
7607     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
7608     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7609     CC = ISD::SETEQ;
7610     BaseOpc = ISD::AND;
7611     break;
7612   }
7613   case ISD::VECREDUCE_OR:
7614   case ISD::VP_REDUCE_OR:
7615     // vcpop x != 0
7616     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7617     CC = ISD::SETNE;
7618     BaseOpc = ISD::OR;
7619     break;
7620   case ISD::VECREDUCE_XOR:
7621   case ISD::VP_REDUCE_XOR: {
7622     // ((vcpop x) & 1) != 0
7623     SDValue One = DAG.getConstant(1, DL, XLenVT);
7624     Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
7625     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
7626     CC = ISD::SETNE;
7627     BaseOpc = ISD::XOR;
7628     break;
7629   }
7630   }
7631 
7632   SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
7633 
7634   if (!IsVP)
7635     return SetCC;
7636 
7637   // Now include the start value in the operation.
7638   // Note that we must return the start value when no elements are operated
7639   // upon. The vcpop instructions we've emitted in each case above will return
7640   // 0 for an inactive vector, and so we've already received the neutral value:
7641   // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
7642   // can simply include the start value.
7643   return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
7644 }
7645 
7646 static bool isNonZeroAVL(SDValue AVL) {
7647   auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
7648   auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
7649   return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
7650          (ImmAVL && ImmAVL->getZExtValue() >= 1);
7651 }
7652 
7653 /// Helper to lower a reduction sequence of the form:
7654 /// scalar = reduce_op vec, scalar_start
7655 static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
7656                                  SDValue StartValue, SDValue Vec, SDValue Mask,
7657                                  SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
7658                                  const RISCVSubtarget &Subtarget) {
7659   const MVT VecVT = Vec.getSimpleValueType();
7660   const MVT M1VT = getLMUL1VT(VecVT);
7661   const MVT XLenVT = Subtarget.getXLenVT();
7662   const bool NonZeroAVL = isNonZeroAVL(VL);
7663 
7664   // The reduction needs an LMUL1 input; do the splat at either LMUL1
7665   // or the original VT if fractional.
7666   auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
7667   // We reuse the VL of the reduction to reduce vsetvli toggles if we can
7668   // prove it is non-zero.  For the AVL=0 case, we need the scalar to
7669   // be the result of the reduction operation.
7670   auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
7671   SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
7672                                            DAG, Subtarget);
7673   if (M1VT != InnerVT)
7674     InitialValue = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT,
7675                                DAG.getUNDEF(M1VT),
7676                                InitialValue, DAG.getConstant(0, DL, XLenVT));
7677   SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
7678   SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
7679   SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
7680   SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
7681   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
7682                      DAG.getConstant(0, DL, XLenVT));
7683 }
7684 
7685 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
7686                                             SelectionDAG &DAG) const {
7687   SDLoc DL(Op);
7688   SDValue Vec = Op.getOperand(0);
7689   EVT VecEVT = Vec.getValueType();
7690 
7691   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
7692 
7693   // Due to ordering in legalize types we may have a vector type that needs to
7694   // be split. Do that manually so we can get down to a legal type.
7695   while (getTypeAction(*DAG.getContext(), VecEVT) ==
7696          TargetLowering::TypeSplitVector) {
7697     auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
7698     VecEVT = Lo.getValueType();
7699     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
7700   }
7701 
7702   // TODO: The type may need to be widened rather than split. Or widened before
7703   // it can be split.
7704   if (!isTypeLegal(VecEVT))
7705     return SDValue();
7706 
7707   MVT VecVT = VecEVT.getSimpleVT();
7708   MVT VecEltVT = VecVT.getVectorElementType();
7709   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
7710 
7711   MVT ContainerVT = VecVT;
7712   if (VecVT.isFixedLengthVector()) {
7713     ContainerVT = getContainerForFixedLengthVector(VecVT);
7714     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7715   }
7716 
7717   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7718 
7719   SDValue NeutralElem =
7720       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
7721   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), NeutralElem, Vec,
7722                            Mask, VL, DL, DAG, Subtarget);
7723 }
7724 
7725 // Given a reduction op, this function returns the matching reduction opcode,
7726 // the vector SDValue and the scalar SDValue required to lower this to a
7727 // RISCVISD node.
7728 static std::tuple<unsigned, SDValue, SDValue>
7729 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
7730   SDLoc DL(Op);
7731   auto Flags = Op->getFlags();
7732   unsigned Opcode = Op.getOpcode();
7733   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
7734   switch (Opcode) {
7735   default:
7736     llvm_unreachable("Unhandled reduction");
7737   case ISD::VECREDUCE_FADD: {
7738     // Use positive zero if we can. It is cheaper to materialize.
7739     SDValue Zero =
7740         DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
7741     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
7742   }
7743   case ISD::VECREDUCE_SEQ_FADD:
7744     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
7745                            Op.getOperand(0));
7746   case ISD::VECREDUCE_FMIN:
7747     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
7748                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7749   case ISD::VECREDUCE_FMAX:
7750     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
7751                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
7752   }
7753 }
7754 
7755 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
7756                                               SelectionDAG &DAG) const {
7757   SDLoc DL(Op);
7758   MVT VecEltVT = Op.getSimpleValueType();
7759 
7760   unsigned RVVOpcode;
7761   SDValue VectorVal, ScalarVal;
7762   std::tie(RVVOpcode, VectorVal, ScalarVal) =
7763       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
7764   MVT VecVT = VectorVal.getSimpleValueType();
7765 
7766   MVT ContainerVT = VecVT;
7767   if (VecVT.isFixedLengthVector()) {
7768     ContainerVT = getContainerForFixedLengthVector(VecVT);
7769     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
7770   }
7771 
7772   auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7773   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), ScalarVal,
7774                            VectorVal, Mask, VL, DL, DAG, Subtarget);
7775 }
7776 
7777 static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
7778   switch (ISDOpcode) {
7779   default:
7780     llvm_unreachable("Unhandled reduction");
7781   case ISD::VP_REDUCE_ADD:
7782     return RISCVISD::VECREDUCE_ADD_VL;
7783   case ISD::VP_REDUCE_UMAX:
7784     return RISCVISD::VECREDUCE_UMAX_VL;
7785   case ISD::VP_REDUCE_SMAX:
7786     return RISCVISD::VECREDUCE_SMAX_VL;
7787   case ISD::VP_REDUCE_UMIN:
7788     return RISCVISD::VECREDUCE_UMIN_VL;
7789   case ISD::VP_REDUCE_SMIN:
7790     return RISCVISD::VECREDUCE_SMIN_VL;
7791   case ISD::VP_REDUCE_AND:
7792     return RISCVISD::VECREDUCE_AND_VL;
7793   case ISD::VP_REDUCE_OR:
7794     return RISCVISD::VECREDUCE_OR_VL;
7795   case ISD::VP_REDUCE_XOR:
7796     return RISCVISD::VECREDUCE_XOR_VL;
7797   case ISD::VP_REDUCE_FADD:
7798     return RISCVISD::VECREDUCE_FADD_VL;
7799   case ISD::VP_REDUCE_SEQ_FADD:
7800     return RISCVISD::VECREDUCE_SEQ_FADD_VL;
7801   case ISD::VP_REDUCE_FMAX:
7802     return RISCVISD::VECREDUCE_FMAX_VL;
7803   case ISD::VP_REDUCE_FMIN:
7804     return RISCVISD::VECREDUCE_FMIN_VL;
7805   }
7806 }
7807 
7808 SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
7809                                            SelectionDAG &DAG) const {
7810   SDLoc DL(Op);
7811   SDValue Vec = Op.getOperand(1);
7812   EVT VecEVT = Vec.getValueType();
7813 
7814   // TODO: The type may need to be widened rather than split. Or widened before
7815   // it can be split.
7816   if (!isTypeLegal(VecEVT))
7817     return SDValue();
7818 
7819   MVT VecVT = VecEVT.getSimpleVT();
7820   unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
7821 
7822   if (VecVT.isFixedLengthVector()) {
7823     auto ContainerVT = getContainerForFixedLengthVector(VecVT);
7824     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7825   }
7826 
7827   SDValue VL = Op.getOperand(3);
7828   SDValue Mask = Op.getOperand(2);
7829   return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
7830                            Vec, Mask, VL, DL, DAG, Subtarget);
7831 }
7832 
7833 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
7834                                                    SelectionDAG &DAG) const {
7835   SDValue Vec = Op.getOperand(0);
7836   SDValue SubVec = Op.getOperand(1);
7837   MVT VecVT = Vec.getSimpleValueType();
7838   MVT SubVecVT = SubVec.getSimpleValueType();
7839 
7840   SDLoc DL(Op);
7841   MVT XLenVT = Subtarget.getXLenVT();
7842   unsigned OrigIdx = Op.getConstantOperandVal(2);
7843   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
7844 
7845   // We don't have the ability to slide mask vectors up indexed by their i1
7846   // elements; the smallest we can do is i8. Often we are able to bitcast to
7847   // equivalent i8 vectors. Note that when inserting a fixed-length vector
7848   // into a scalable one, we might not necessarily have enough scalable
7849   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
7850   if (SubVecVT.getVectorElementType() == MVT::i1 &&
7851       (OrigIdx != 0 || !Vec.isUndef())) {
7852     if (VecVT.getVectorMinNumElements() >= 8 &&
7853         SubVecVT.getVectorMinNumElements() >= 8) {
7854       assert(OrigIdx % 8 == 0 && "Invalid index");
7855       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
7856              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
7857              "Unexpected mask vector lowering");
7858       OrigIdx /= 8;
7859       SubVecVT =
7860           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
7861                            SubVecVT.isScalableVector());
7862       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
7863                                VecVT.isScalableVector());
7864       Vec = DAG.getBitcast(VecVT, Vec);
7865       SubVec = DAG.getBitcast(SubVecVT, SubVec);
7866     } else {
7867       // We can't slide this mask vector up indexed by its i1 elements.
7868       // This poses a problem when we wish to insert a scalable vector which
7869       // can't be re-expressed as a larger type. Just choose the slow path and
7870       // extend to a larger type, then truncate back down.
7871       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
7872       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
7873       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
7874       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
7875       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
7876                         Op.getOperand(2));
7877       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
7878       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
7879     }
7880   }
7881 
7882   // If the subvector vector is a fixed-length type, we cannot use subregister
7883   // manipulation to simplify the codegen; we don't know which register of a
7884   // LMUL group contains the specific subvector as we only know the minimum
7885   // register size. Therefore we must slide the vector group up the full
7886   // amount.
7887   if (SubVecVT.isFixedLengthVector()) {
7888     if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
7889       return Op;
7890     MVT ContainerVT = VecVT;
7891     if (VecVT.isFixedLengthVector()) {
7892       ContainerVT = getContainerForFixedLengthVector(VecVT);
7893       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
7894     }
7895     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
7896                          DAG.getUNDEF(ContainerVT), SubVec,
7897                          DAG.getConstant(0, DL, XLenVT));
7898     if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
7899       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7900       return DAG.getBitcast(Op.getValueType(), SubVec);
7901     }
7902     SDValue Mask =
7903         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
7904     // Set the vector length to only the number of elements we care about. Note
7905     // that for slideup this includes the offset.
7906     unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
7907     SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
7908 
7909     // Use tail agnostic policy if we're inserting over Vec's tail.
7910     unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
7911     if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
7912       Policy = RISCVII::TAIL_AGNOSTIC;
7913 
7914     // If we're inserting into the lowest elements, use a tail undisturbed
7915     // vmv.v.v.
7916     if (OrigIdx == 0) {
7917       SubVec =
7918           DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
7919     } else {
7920       SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
7921       SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
7922                            SlideupAmt, Mask, VL, Policy);
7923     }
7924 
7925     if (VecVT.isFixedLengthVector())
7926       SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
7927     return DAG.getBitcast(Op.getValueType(), SubVec);
7928   }
7929 
7930   unsigned SubRegIdx, RemIdx;
7931   std::tie(SubRegIdx, RemIdx) =
7932       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
7933           VecVT, SubVecVT, OrigIdx, TRI);
7934 
7935   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
7936   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
7937                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
7938                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
7939 
7940   // 1. If the Idx has been completely eliminated and this subvector's size is
7941   // a vector register or a multiple thereof, or the surrounding elements are
7942   // undef, then this is a subvector insert which naturally aligns to a vector
7943   // register. These can easily be handled using subregister manipulation.
7944   // 2. If the subvector is smaller than a vector register, then the insertion
7945   // must preserve the undisturbed elements of the register. We do this by
7946   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
7947   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
7948   // subvector within the vector register, and an INSERT_SUBVECTOR of that
7949   // LMUL=1 type back into the larger vector (resolving to another subregister
7950   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
7951   // to avoid allocating a large register group to hold our subvector.
7952   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
7953     return Op;
7954 
7955   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
7956   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
7957   // (in our case undisturbed). This means we can set up a subvector insertion
7958   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
7959   // size of the subvector.
7960   MVT InterSubVT = VecVT;
7961   SDValue AlignedExtract = Vec;
7962   unsigned AlignedIdx = OrigIdx - RemIdx;
7963   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
7964     InterSubVT = getLMUL1VT(VecVT);
7965     // Extract a subvector equal to the nearest full vector register type. This
7966     // should resolve to a EXTRACT_SUBREG instruction.
7967     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
7968                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
7969   }
7970 
7971   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
7972                        DAG.getUNDEF(InterSubVT), SubVec,
7973                        DAG.getConstant(0, DL, XLenVT));
7974 
7975   auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
7976 
7977   VL = computeVLMax(SubVecVT, DL, DAG);
7978 
7979   // If we're inserting into the lowest elements, use a tail undisturbed
7980   // vmv.v.v.
7981   if (RemIdx == 0) {
7982     SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
7983                          SubVec, VL);
7984   } else {
7985     SDValue SlideupAmt =
7986         DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
7987 
7988     // Construct the vector length corresponding to RemIdx + length(SubVecVT).
7989     VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
7990 
7991     SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
7992                          SlideupAmt, Mask, VL);
7993   }
7994 
7995   // If required, insert this subvector back into the correct vector register.
7996   // This should resolve to an INSERT_SUBREG instruction.
7997   if (VecVT.bitsGT(InterSubVT))
7998     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
7999                          DAG.getConstant(AlignedIdx, DL, XLenVT));
8000 
8001   // We might have bitcast from a mask type: cast back to the original type if
8002   // required.
8003   return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
8004 }
8005 
8006 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
8007                                                     SelectionDAG &DAG) const {
8008   SDValue Vec = Op.getOperand(0);
8009   MVT SubVecVT = Op.getSimpleValueType();
8010   MVT VecVT = Vec.getSimpleValueType();
8011 
8012   SDLoc DL(Op);
8013   MVT XLenVT = Subtarget.getXLenVT();
8014   unsigned OrigIdx = Op.getConstantOperandVal(1);
8015   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
8016 
8017   // We don't have the ability to slide mask vectors down indexed by their i1
8018   // elements; the smallest we can do is i8. Often we are able to bitcast to
8019   // equivalent i8 vectors. Note that when extracting a fixed-length vector
8020   // from a scalable one, we might not necessarily have enough scalable
8021   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
8022   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
8023     if (VecVT.getVectorMinNumElements() >= 8 &&
8024         SubVecVT.getVectorMinNumElements() >= 8) {
8025       assert(OrigIdx % 8 == 0 && "Invalid index");
8026       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
8027              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
8028              "Unexpected mask vector lowering");
8029       OrigIdx /= 8;
8030       SubVecVT =
8031           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
8032                            SubVecVT.isScalableVector());
8033       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
8034                                VecVT.isScalableVector());
8035       Vec = DAG.getBitcast(VecVT, Vec);
8036     } else {
8037       // We can't slide this mask vector down, indexed by its i1 elements.
8038       // This poses a problem when we wish to extract a scalable vector which
8039       // can't be re-expressed as a larger type. Just choose the slow path and
8040       // extend to a larger type, then truncate back down.
8041       // TODO: We could probably improve this when extracting certain fixed
8042       // from fixed, where we can extract as i8 and shift the correct element
8043       // right to reach the desired subvector?
8044       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
8045       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
8046       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
8047       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
8048                         Op.getOperand(1));
8049       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
8050       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
8051     }
8052   }
8053 
8054   // If the subvector vector is a fixed-length type, we cannot use subregister
8055   // manipulation to simplify the codegen; we don't know which register of a
8056   // LMUL group contains the specific subvector as we only know the minimum
8057   // register size. Therefore we must slide the vector group down the full
8058   // amount.
8059   if (SubVecVT.isFixedLengthVector()) {
8060     // With an index of 0 this is a cast-like subvector, which can be performed
8061     // with subregister operations.
8062     if (OrigIdx == 0)
8063       return Op;
8064     MVT ContainerVT = VecVT;
8065     if (VecVT.isFixedLengthVector()) {
8066       ContainerVT = getContainerForFixedLengthVector(VecVT);
8067       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8068     }
8069     SDValue Mask =
8070         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
8071     // Set the vector length to only the number of elements we care about. This
8072     // avoids sliding down elements we're going to discard straight away.
8073     SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
8074     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
8075     SDValue Slidedown =
8076         getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8077                       DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
8078     // Now we can use a cast-like subvector extract to get the result.
8079     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8080                             DAG.getConstant(0, DL, XLenVT));
8081     return DAG.getBitcast(Op.getValueType(), Slidedown);
8082   }
8083 
8084   unsigned SubRegIdx, RemIdx;
8085   std::tie(SubRegIdx, RemIdx) =
8086       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
8087           VecVT, SubVecVT, OrigIdx, TRI);
8088 
8089   // If the Idx has been completely eliminated then this is a subvector extract
8090   // which naturally aligns to a vector register. These can easily be handled
8091   // using subregister manipulation.
8092   if (RemIdx == 0)
8093     return Op;
8094 
8095   // Else we must shift our vector register directly to extract the subvector.
8096   // Do this using VSLIDEDOWN.
8097 
8098   // If the vector type is an LMUL-group type, extract a subvector equal to the
8099   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
8100   // instruction.
8101   MVT InterSubVT = VecVT;
8102   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
8103     InterSubVT = getLMUL1VT(VecVT);
8104     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
8105                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
8106   }
8107 
8108   // Slide this vector register down by the desired number of elements in order
8109   // to place the desired subvector starting at element 0.
8110   SDValue SlidedownAmt =
8111       DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
8112 
8113   auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
8114   SDValue Slidedown =
8115       getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
8116                     Vec, SlidedownAmt, Mask, VL);
8117 
8118   // Now the vector is in the right position, extract our final subvector. This
8119   // should resolve to a COPY.
8120   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
8121                           DAG.getConstant(0, DL, XLenVT));
8122 
8123   // We might have bitcast from a mask type: cast back to the original type if
8124   // required.
8125   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
8126 }
8127 
8128 // Widen a vector's operands to i8, then truncate its results back to the
8129 // original type, typically i1.  All operand and result types must be the same.
8130 static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
8131                                   SelectionDAG &DAG) {
8132   MVT VT = N.getSimpleValueType();
8133   MVT WideVT = VT.changeVectorElementType(MVT::i8);
8134   SmallVector<SDValue, 4> WideOps;
8135   for (SDValue Op : N->ops()) {
8136     assert(Op.getSimpleValueType() == VT &&
8137            "Operands and result must be same type");
8138     WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
8139   }
8140 
8141   unsigned NumVals = N->getNumValues();
8142 
8143   SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
8144       NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
8145   SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
8146   SmallVector<SDValue, 4> TruncVals;
8147   for (unsigned I = 0; I < NumVals; I++) {
8148     TruncVals.push_back(
8149         DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
8150                      DAG.getConstant(0, DL, WideVT), ISD::SETNE));
8151   }
8152 
8153   if (TruncVals.size() > 1)
8154     return DAG.getMergeValues(TruncVals, DL);
8155   return TruncVals.front();
8156 }
8157 
8158 SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
8159                                                       SelectionDAG &DAG) const {
8160   SDLoc DL(Op);
8161   MVT VecVT = Op.getSimpleValueType();
8162   MVT XLenVT = Subtarget.getXLenVT();
8163 
8164   assert(VecVT.isScalableVector() &&
8165          "vector_interleave on non-scalable vector!");
8166 
8167   // 1 bit element vectors need to be widened to e8
8168   if (VecVT.getVectorElementType() == MVT::i1)
8169     return widenVectorOpsToi8(Op, DL, DAG);
8170 
8171   // If the VT is LMUL=8, we need to split and reassemble.
8172   if (VecVT.getSizeInBits().getKnownMinValue() ==
8173       (8 * RISCV::RVVBitsPerBlock)) {
8174     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8175     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8176     EVT SplitVT = Op0Lo.getValueType();
8177 
8178     SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
8179                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
8180     SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
8181                                 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
8182 
8183     SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8184                                ResLo.getValue(0), ResHi.getValue(0));
8185     SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
8186                               ResHi.getValue(1));
8187     return DAG.getMergeValues({Even, Odd}, DL);
8188   }
8189 
8190   // Concatenate the two vectors as one vector to deinterleave
8191   MVT ConcatVT =
8192       MVT::getVectorVT(VecVT.getVectorElementType(),
8193                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
8194   SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8195                                Op.getOperand(0), Op.getOperand(1));
8196 
8197   // We want to operate on all lanes, so get the mask and VL and mask for it
8198   auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
8199   SDValue Passthru = DAG.getUNDEF(ConcatVT);
8200 
8201   // We can deinterleave through vnsrl.wi if the element type is smaller than
8202   // ELEN
8203   if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8204     SDValue Even =
8205         getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
8206     SDValue Odd =
8207         getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
8208     return DAG.getMergeValues({Even, Odd}, DL);
8209   }
8210 
8211   // For the indices, use the same SEW to avoid an extra vsetvli
8212   MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
8213   // Create a vector of even indices {0, 2, 4, ...}
8214   SDValue EvenIdx =
8215       DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
8216   // Create a vector of odd indices {1, 3, 5, ... }
8217   SDValue OddIdx =
8218       DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
8219 
8220   // Gather the even and odd elements into two separate vectors
8221   SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8222                                  Concat, EvenIdx, Passthru, Mask, VL);
8223   SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
8224                                 Concat, OddIdx, Passthru, Mask, VL);
8225 
8226   // Extract the result half of the gather for even and odd
8227   SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
8228                              DAG.getConstant(0, DL, XLenVT));
8229   SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
8230                             DAG.getConstant(0, DL, XLenVT));
8231 
8232   return DAG.getMergeValues({Even, Odd}, DL);
8233 }
8234 
8235 SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
8236                                                     SelectionDAG &DAG) const {
8237   SDLoc DL(Op);
8238   MVT VecVT = Op.getSimpleValueType();
8239 
8240   assert(VecVT.isScalableVector() &&
8241          "vector_interleave on non-scalable vector!");
8242 
8243   // i1 vectors need to be widened to i8
8244   if (VecVT.getVectorElementType() == MVT::i1)
8245     return widenVectorOpsToi8(Op, DL, DAG);
8246 
8247   MVT XLenVT = Subtarget.getXLenVT();
8248   SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
8249 
8250   // If the VT is LMUL=8, we need to split and reassemble.
8251   if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
8252     auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8253     auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
8254     EVT SplitVT = Op0Lo.getValueType();
8255 
8256     SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
8257                                 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
8258     SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
8259                                 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
8260 
8261     SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8262                              ResLo.getValue(0), ResLo.getValue(1));
8263     SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
8264                              ResHi.getValue(0), ResHi.getValue(1));
8265     return DAG.getMergeValues({Lo, Hi}, DL);
8266   }
8267 
8268   SDValue Interleaved;
8269 
8270   // If the element type is smaller than ELEN, then we can interleave with
8271   // vwaddu.vv and vwmaccu.vx
8272   if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
8273     Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
8274                                         DAG, Subtarget);
8275   } else {
8276     // Otherwise, fallback to using vrgathere16.vv
8277     MVT ConcatVT =
8278       MVT::getVectorVT(VecVT.getVectorElementType(),
8279                        VecVT.getVectorElementCount().multiplyCoefficientBy(2));
8280     SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
8281                                  Op.getOperand(0), Op.getOperand(1));
8282 
8283     MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
8284 
8285     // 0 1 2 3 4 5 6 7 ...
8286     SDValue StepVec = DAG.getStepVector(DL, IdxVT);
8287 
8288     // 1 1 1 1 1 1 1 1 ...
8289     SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
8290 
8291     // 1 0 1 0 1 0 1 0 ...
8292     SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
8293     OddMask = DAG.getSetCC(
8294         DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
8295         DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
8296         ISD::CondCode::SETNE);
8297 
8298     SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
8299 
8300     // Build up the index vector for interleaving the concatenated vector
8301     //      0      0      1      1      2      2      3      3 ...
8302     SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
8303     //      0      n      1    n+1      2    n+2      3    n+3 ...
8304     Idx =
8305         DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
8306 
8307     // Then perform the interleave
8308     //   v[0]   v[n]   v[1] v[n+1]   v[2] v[n+2]   v[3] v[n+3] ...
8309     SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
8310     Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
8311                               Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
8312   }
8313 
8314   // Extract the two halves from the interleaved result
8315   SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8316                            DAG.getVectorIdxConstant(0, DL));
8317   SDValue Hi = DAG.getNode(
8318       ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
8319       DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
8320 
8321   return DAG.getMergeValues({Lo, Hi}, DL);
8322 }
8323 
8324 // Lower step_vector to the vid instruction. Any non-identity step value must
8325 // be accounted for my manual expansion.
8326 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
8327                                               SelectionDAG &DAG) const {
8328   SDLoc DL(Op);
8329   MVT VT = Op.getSimpleValueType();
8330   assert(VT.isScalableVector() && "Expected scalable vector");
8331   MVT XLenVT = Subtarget.getXLenVT();
8332   auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
8333   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8334   uint64_t StepValImm = Op.getConstantOperandVal(0);
8335   if (StepValImm != 1) {
8336     if (isPowerOf2_64(StepValImm)) {
8337       SDValue StepVal =
8338           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8339                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
8340       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
8341     } else {
8342       SDValue StepVal = lowerScalarSplat(
8343           SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
8344           VL, VT, DL, DAG, Subtarget);
8345       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
8346     }
8347   }
8348   return StepVec;
8349 }
8350 
8351 // Implement vector_reverse using vrgather.vv with indices determined by
8352 // subtracting the id of each element from (VLMAX-1). This will convert
8353 // the indices like so:
8354 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
8355 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
8356 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
8357                                                  SelectionDAG &DAG) const {
8358   SDLoc DL(Op);
8359   MVT VecVT = Op.getSimpleValueType();
8360   if (VecVT.getVectorElementType() == MVT::i1) {
8361     MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8362     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
8363     SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
8364     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
8365   }
8366   unsigned EltSize = VecVT.getScalarSizeInBits();
8367   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
8368   unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
8369   unsigned MaxVLMAX =
8370     RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
8371 
8372   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
8373   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
8374 
8375   // If this is SEW=8 and VLMAX is potentially more than 256, we need
8376   // to use vrgatherei16.vv.
8377   // TODO: It's also possible to use vrgatherei16.vv for other types to
8378   // decrease register width for the index calculation.
8379   if (MaxVLMAX > 256 && EltSize == 8) {
8380     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
8381     // Reverse each half, then reassemble them in reverse order.
8382     // NOTE: It's also possible that after splitting that VLMAX no longer
8383     // requires vrgatherei16.vv.
8384     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
8385       auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
8386       auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
8387       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
8388       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
8389       // Reassemble the low and high pieces reversed.
8390       // FIXME: This is a CONCAT_VECTORS.
8391       SDValue Res =
8392           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
8393                       DAG.getIntPtrConstant(0, DL));
8394       return DAG.getNode(
8395           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
8396           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
8397     }
8398 
8399     // Just promote the int type to i16 which will double the LMUL.
8400     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
8401     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
8402   }
8403 
8404   MVT XLenVT = Subtarget.getXLenVT();
8405   auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
8406 
8407   // Calculate VLMAX-1 for the desired SEW.
8408   SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
8409                                  computeVLMax(VecVT, DL, DAG),
8410                                  DAG.getConstant(1, DL, XLenVT));
8411 
8412   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
8413   bool IsRV32E64 =
8414       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
8415   SDValue SplatVL;
8416   if (!IsRV32E64)
8417     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
8418   else
8419     SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
8420                           VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
8421 
8422   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
8423   SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
8424                                 DAG.getUNDEF(IntVT), Mask, VL);
8425 
8426   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
8427                      DAG.getUNDEF(VecVT), Mask, VL);
8428 }
8429 
8430 SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
8431                                                 SelectionDAG &DAG) const {
8432   SDLoc DL(Op);
8433   SDValue V1 = Op.getOperand(0);
8434   SDValue V2 = Op.getOperand(1);
8435   MVT XLenVT = Subtarget.getXLenVT();
8436   MVT VecVT = Op.getSimpleValueType();
8437 
8438   SDValue VLMax = computeVLMax(VecVT, DL, DAG);
8439 
8440   int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
8441   SDValue DownOffset, UpOffset;
8442   if (ImmValue >= 0) {
8443     // The operand is a TargetConstant, we need to rebuild it as a regular
8444     // constant.
8445     DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
8446     UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
8447   } else {
8448     // The operand is a TargetConstant, we need to rebuild it as a regular
8449     // constant rather than negating the original operand.
8450     UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
8451     DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
8452   }
8453 
8454   SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
8455 
8456   SDValue SlideDown =
8457       getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
8458                     DownOffset, TrueMask, UpOffset);
8459   return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
8460                      TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
8461                      RISCVII::TAIL_AGNOSTIC);
8462 }
8463 
8464 SDValue
8465 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
8466                                                      SelectionDAG &DAG) const {
8467   SDLoc DL(Op);
8468   auto *Load = cast<LoadSDNode>(Op);
8469 
8470   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8471                                         Load->getMemoryVT(),
8472                                         *Load->getMemOperand()) &&
8473          "Expecting a correctly-aligned load");
8474 
8475   MVT VT = Op.getSimpleValueType();
8476   MVT XLenVT = Subtarget.getXLenVT();
8477   MVT ContainerVT = getContainerForFixedLengthVector(VT);
8478 
8479   SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8480 
8481   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8482   SDValue IntID = DAG.getTargetConstant(
8483       IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
8484   SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
8485   if (!IsMaskOp)
8486     Ops.push_back(DAG.getUNDEF(ContainerVT));
8487   Ops.push_back(Load->getBasePtr());
8488   Ops.push_back(VL);
8489   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8490   SDValue NewLoad =
8491       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
8492                               Load->getMemoryVT(), Load->getMemOperand());
8493 
8494   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
8495   return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
8496 }
8497 
8498 SDValue
8499 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
8500                                                       SelectionDAG &DAG) const {
8501   SDLoc DL(Op);
8502   auto *Store = cast<StoreSDNode>(Op);
8503 
8504   assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8505                                         Store->getMemoryVT(),
8506                                         *Store->getMemOperand()) &&
8507          "Expecting a correctly-aligned store");
8508 
8509   SDValue StoreVal = Store->getValue();
8510   MVT VT = StoreVal.getSimpleValueType();
8511   MVT XLenVT = Subtarget.getXLenVT();
8512 
8513   // If the size less than a byte, we need to pad with zeros to make a byte.
8514   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
8515     VT = MVT::v8i1;
8516     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
8517                            DAG.getConstant(0, DL, VT), StoreVal,
8518                            DAG.getIntPtrConstant(0, DL));
8519   }
8520 
8521   MVT ContainerVT = getContainerForFixedLengthVector(VT);
8522 
8523   SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
8524 
8525   SDValue NewValue =
8526       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
8527 
8528   bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
8529   SDValue IntID = DAG.getTargetConstant(
8530       IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
8531   return DAG.getMemIntrinsicNode(
8532       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
8533       {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
8534       Store->getMemoryVT(), Store->getMemOperand());
8535 }
8536 
8537 SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
8538                                              SelectionDAG &DAG) const {
8539   SDLoc DL(Op);
8540   MVT VT = Op.getSimpleValueType();
8541 
8542   const auto *MemSD = cast<MemSDNode>(Op);
8543   EVT MemVT = MemSD->getMemoryVT();
8544   MachineMemOperand *MMO = MemSD->getMemOperand();
8545   SDValue Chain = MemSD->getChain();
8546   SDValue BasePtr = MemSD->getBasePtr();
8547 
8548   SDValue Mask, PassThru, VL;
8549   if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
8550     Mask = VPLoad->getMask();
8551     PassThru = DAG.getUNDEF(VT);
8552     VL = VPLoad->getVectorLength();
8553   } else {
8554     const auto *MLoad = cast<MaskedLoadSDNode>(Op);
8555     Mask = MLoad->getMask();
8556     PassThru = MLoad->getPassThru();
8557   }
8558 
8559   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8560 
8561   MVT XLenVT = Subtarget.getXLenVT();
8562 
8563   MVT ContainerVT = VT;
8564   if (VT.isFixedLengthVector()) {
8565     ContainerVT = getContainerForFixedLengthVector(VT);
8566     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
8567     if (!IsUnmasked) {
8568       MVT MaskVT = getMaskTypeFor(ContainerVT);
8569       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8570     }
8571   }
8572 
8573   if (!VL)
8574     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8575 
8576   unsigned IntID =
8577       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
8578   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8579   if (IsUnmasked)
8580     Ops.push_back(DAG.getUNDEF(ContainerVT));
8581   else
8582     Ops.push_back(PassThru);
8583   Ops.push_back(BasePtr);
8584   if (!IsUnmasked)
8585     Ops.push_back(Mask);
8586   Ops.push_back(VL);
8587   if (!IsUnmasked)
8588     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
8589 
8590   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
8591 
8592   SDValue Result =
8593       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
8594   Chain = Result.getValue(1);
8595 
8596   if (VT.isFixedLengthVector())
8597     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8598 
8599   return DAG.getMergeValues({Result, Chain}, DL);
8600 }
8601 
8602 SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
8603                                               SelectionDAG &DAG) const {
8604   SDLoc DL(Op);
8605 
8606   const auto *MemSD = cast<MemSDNode>(Op);
8607   EVT MemVT = MemSD->getMemoryVT();
8608   MachineMemOperand *MMO = MemSD->getMemOperand();
8609   SDValue Chain = MemSD->getChain();
8610   SDValue BasePtr = MemSD->getBasePtr();
8611   SDValue Val, Mask, VL;
8612 
8613   if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
8614     Val = VPStore->getValue();
8615     Mask = VPStore->getMask();
8616     VL = VPStore->getVectorLength();
8617   } else {
8618     const auto *MStore = cast<MaskedStoreSDNode>(Op);
8619     Val = MStore->getValue();
8620     Mask = MStore->getMask();
8621   }
8622 
8623   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
8624 
8625   MVT VT = Val.getSimpleValueType();
8626   MVT XLenVT = Subtarget.getXLenVT();
8627 
8628   MVT ContainerVT = VT;
8629   if (VT.isFixedLengthVector()) {
8630     ContainerVT = getContainerForFixedLengthVector(VT);
8631 
8632     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
8633     if (!IsUnmasked) {
8634       MVT MaskVT = getMaskTypeFor(ContainerVT);
8635       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8636     }
8637   }
8638 
8639   if (!VL)
8640     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8641 
8642   unsigned IntID =
8643       IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
8644   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
8645   Ops.push_back(Val);
8646   Ops.push_back(BasePtr);
8647   if (!IsUnmasked)
8648     Ops.push_back(Mask);
8649   Ops.push_back(VL);
8650 
8651   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
8652                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
8653 }
8654 
8655 SDValue
8656 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
8657                                                       SelectionDAG &DAG) const {
8658   MVT InVT = Op.getOperand(0).getSimpleValueType();
8659   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
8660 
8661   MVT VT = Op.getSimpleValueType();
8662 
8663   SDValue Op1 =
8664       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8665   SDValue Op2 =
8666       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8667 
8668   SDLoc DL(Op);
8669   auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
8670                                     DAG, Subtarget);
8671   MVT MaskVT = getMaskTypeFor(ContainerVT);
8672 
8673   SDValue Cmp =
8674       DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8675                   {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
8676 
8677   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
8678 }
8679 
8680 SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
8681                                                      SelectionDAG &DAG) const {
8682   unsigned Opc = Op.getOpcode();
8683   SDLoc DL(Op);
8684   SDValue Chain = Op.getOperand(0);
8685   SDValue Op1 = Op.getOperand(1);
8686   SDValue Op2 = Op.getOperand(2);
8687   SDValue CC = Op.getOperand(3);
8688   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
8689   MVT VT = Op.getSimpleValueType();
8690   MVT InVT = Op1.getSimpleValueType();
8691 
8692   // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
8693   // condition code.
8694   if (Opc == ISD::STRICT_FSETCCS) {
8695     // Expand strict_fsetccs(x, oeq) to
8696     // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
8697     SDVTList VTList = Op->getVTList();
8698     if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
8699       SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
8700       SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8701                                  Op2, OLECCVal);
8702       SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
8703                                  Op1, OLECCVal);
8704       SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
8705                                      Tmp1.getValue(1), Tmp2.getValue(1));
8706       // Tmp1 and Tmp2 might be the same node.
8707       if (Tmp1 != Tmp2)
8708         Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
8709       return DAG.getMergeValues({Tmp1, OutChain}, DL);
8710     }
8711 
8712     // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
8713     if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
8714       SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
8715       SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
8716                                 Op2, OEQCCVal);
8717       SDValue Res = DAG.getNOT(DL, OEQ, VT);
8718       return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
8719     }
8720   }
8721 
8722   MVT ContainerInVT = InVT;
8723   if (InVT.isFixedLengthVector()) {
8724     ContainerInVT = getContainerForFixedLengthVector(InVT);
8725     Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
8726     Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
8727   }
8728   MVT MaskVT = getMaskTypeFor(ContainerInVT);
8729 
8730   auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
8731 
8732   SDValue Res;
8733   if (Opc == ISD::STRICT_FSETCC &&
8734       (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
8735        CCVal == ISD::SETOLE)) {
8736     // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
8737     // active when both input elements are ordered.
8738     SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
8739     SDValue OrderMask1 = DAG.getNode(
8740         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8741         {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8742          True, VL});
8743     SDValue OrderMask2 = DAG.getNode(
8744         RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
8745         {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
8746          True, VL});
8747     Mask =
8748         DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
8749     // Use Mask as the merge operand to let the result be 0 if either of the
8750     // inputs is unordered.
8751     Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
8752                       DAG.getVTList(MaskVT, MVT::Other),
8753                       {Chain, Op1, Op2, CC, Mask, Mask, VL});
8754   } else {
8755     unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
8756                                                 : RISCVISD::STRICT_FSETCCS_VL;
8757     Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
8758                       {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
8759   }
8760 
8761   if (VT.isFixedLengthVector()) {
8762     SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8763     return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8764   }
8765   return Res;
8766 }
8767 
8768 // Lower vector ABS to smax(X, sub(0, X)).
8769 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
8770   SDLoc DL(Op);
8771   MVT VT = Op.getSimpleValueType();
8772   SDValue X = Op.getOperand(0);
8773 
8774   assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
8775          "Unexpected type for ISD::ABS");
8776 
8777   MVT ContainerVT = VT;
8778   if (VT.isFixedLengthVector()) {
8779     ContainerVT = getContainerForFixedLengthVector(VT);
8780     X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
8781   }
8782 
8783   SDValue Mask, VL;
8784   if (Op->getOpcode() == ISD::VP_ABS) {
8785     Mask = Op->getOperand(1);
8786     if (VT.isFixedLengthVector())
8787       Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
8788                                      Subtarget);
8789     VL = Op->getOperand(2);
8790   } else
8791     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8792 
8793   SDValue SplatZero = DAG.getNode(
8794       RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
8795       DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
8796   SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
8797                              DAG.getUNDEF(ContainerVT), Mask, VL);
8798   SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
8799                             DAG.getUNDEF(ContainerVT), Mask, VL);
8800 
8801   if (VT.isFixedLengthVector())
8802     Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
8803   return Max;
8804 }
8805 
8806 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
8807     SDValue Op, SelectionDAG &DAG) const {
8808   SDLoc DL(Op);
8809   MVT VT = Op.getSimpleValueType();
8810   SDValue Mag = Op.getOperand(0);
8811   SDValue Sign = Op.getOperand(1);
8812   assert(Mag.getValueType() == Sign.getValueType() &&
8813          "Can only handle COPYSIGN with matching types.");
8814 
8815   MVT ContainerVT = getContainerForFixedLengthVector(VT);
8816   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
8817   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
8818 
8819   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8820 
8821   SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
8822                                  Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
8823 
8824   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
8825 }
8826 
8827 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
8828     SDValue Op, SelectionDAG &DAG) const {
8829   MVT VT = Op.getSimpleValueType();
8830   MVT ContainerVT = getContainerForFixedLengthVector(VT);
8831 
8832   MVT I1ContainerVT =
8833       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8834 
8835   SDValue CC =
8836       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
8837   SDValue Op1 =
8838       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
8839   SDValue Op2 =
8840       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
8841 
8842   SDLoc DL(Op);
8843   SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
8844 
8845   SDValue Select =
8846       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
8847 
8848   return convertFromScalableVector(VT, Select, DAG, Subtarget);
8849 }
8850 
8851 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
8852                                                SelectionDAG &DAG) const {
8853   unsigned NewOpc = getRISCVVLOp(Op);
8854   bool HasMergeOp = hasMergeOp(NewOpc);
8855   bool HasMask = hasMaskOp(NewOpc);
8856 
8857   MVT VT = Op.getSimpleValueType();
8858   MVT ContainerVT = getContainerForFixedLengthVector(VT);
8859 
8860   // Create list of operands by converting existing ones to scalable types.
8861   SmallVector<SDValue, 6> Ops;
8862   for (const SDValue &V : Op->op_values()) {
8863     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8864 
8865     // Pass through non-vector operands.
8866     if (!V.getValueType().isVector()) {
8867       Ops.push_back(V);
8868       continue;
8869     }
8870 
8871     // "cast" fixed length vector to a scalable vector.
8872     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
8873            "Only fixed length vectors are supported!");
8874     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8875   }
8876 
8877   SDLoc DL(Op);
8878   auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8879   if (HasMergeOp)
8880     Ops.push_back(DAG.getUNDEF(ContainerVT));
8881   if (HasMask)
8882     Ops.push_back(Mask);
8883   Ops.push_back(VL);
8884 
8885   // StrictFP operations have two result values. Their lowered result should
8886   // have same result count.
8887   if (Op->isStrictFPOpcode()) {
8888     SDValue ScalableRes =
8889         DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
8890                     Op->getFlags());
8891     SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8892     return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
8893   }
8894 
8895   SDValue ScalableRes =
8896       DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
8897   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
8898 }
8899 
8900 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
8901 // * Operands of each node are assumed to be in the same order.
8902 // * The EVL operand is promoted from i32 to i64 on RV64.
8903 // * Fixed-length vectors are converted to their scalable-vector container
8904 //   types.
8905 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
8906                                        unsigned RISCVISDOpc,
8907                                        bool HasMergeOp) const {
8908   SDLoc DL(Op);
8909   MVT VT = Op.getSimpleValueType();
8910   SmallVector<SDValue, 4> Ops;
8911 
8912   MVT ContainerVT = VT;
8913   if (VT.isFixedLengthVector())
8914     ContainerVT = getContainerForFixedLengthVector(VT);
8915 
8916   for (const auto &OpIdx : enumerate(Op->ops())) {
8917     SDValue V = OpIdx.value();
8918     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
8919     // Add dummy merge value before the mask.
8920     if (HasMergeOp && *ISD::getVPMaskIdx(Op.getOpcode()) == OpIdx.index())
8921       Ops.push_back(DAG.getUNDEF(ContainerVT));
8922     // Pass through operands which aren't fixed-length vectors.
8923     if (!V.getValueType().isFixedLengthVector()) {
8924       Ops.push_back(V);
8925       continue;
8926     }
8927     // "cast" fixed length vector to a scalable vector.
8928     MVT OpVT = V.getSimpleValueType();
8929     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
8930     assert(useRVVForFixedLengthVectorVT(OpVT) &&
8931            "Only fixed length vectors are supported!");
8932     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
8933   }
8934 
8935   if (!VT.isFixedLengthVector())
8936     return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
8937 
8938   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
8939 
8940   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
8941 }
8942 
8943 SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
8944                                               SelectionDAG &DAG) const {
8945   SDLoc DL(Op);
8946   MVT VT = Op.getSimpleValueType();
8947 
8948   SDValue Src = Op.getOperand(0);
8949   // NOTE: Mask is dropped.
8950   SDValue VL = Op.getOperand(2);
8951 
8952   MVT ContainerVT = VT;
8953   if (VT.isFixedLengthVector()) {
8954     ContainerVT = getContainerForFixedLengthVector(VT);
8955     MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8956     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
8957   }
8958 
8959   MVT XLenVT = Subtarget.getXLenVT();
8960   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8961   SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8962                                   DAG.getUNDEF(ContainerVT), Zero, VL);
8963 
8964   SDValue SplatValue = DAG.getConstant(
8965       Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
8966   SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8967                               DAG.getUNDEF(ContainerVT), SplatValue, VL);
8968 
8969   SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
8970                                Splat, ZeroSplat, VL);
8971   if (!VT.isFixedLengthVector())
8972     return Result;
8973   return convertFromScalableVector(VT, Result, DAG, Subtarget);
8974 }
8975 
8976 SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
8977                                                 SelectionDAG &DAG) const {
8978   SDLoc DL(Op);
8979   MVT VT = Op.getSimpleValueType();
8980 
8981   SDValue Op1 = Op.getOperand(0);
8982   SDValue Op2 = Op.getOperand(1);
8983   ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8984   // NOTE: Mask is dropped.
8985   SDValue VL = Op.getOperand(4);
8986 
8987   MVT ContainerVT = VT;
8988   if (VT.isFixedLengthVector()) {
8989     ContainerVT = getContainerForFixedLengthVector(VT);
8990     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
8991     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
8992   }
8993 
8994   SDValue Result;
8995   SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
8996 
8997   switch (Condition) {
8998   default:
8999     break;
9000   // X != Y  --> (X^Y)
9001   case ISD::SETNE:
9002     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9003     break;
9004   // X == Y  --> ~(X^Y)
9005   case ISD::SETEQ: {
9006     SDValue Temp =
9007         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
9008     Result =
9009         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
9010     break;
9011   }
9012   // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
9013   // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
9014   case ISD::SETGT:
9015   case ISD::SETULT: {
9016     SDValue Temp =
9017         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9018     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
9019     break;
9020   }
9021   // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
9022   // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
9023   case ISD::SETLT:
9024   case ISD::SETUGT: {
9025     SDValue Temp =
9026         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9027     Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
9028     break;
9029   }
9030   // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
9031   // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
9032   case ISD::SETGE:
9033   case ISD::SETULE: {
9034     SDValue Temp =
9035         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
9036     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
9037     break;
9038   }
9039   // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
9040   // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
9041   case ISD::SETLE:
9042   case ISD::SETUGE: {
9043     SDValue Temp =
9044         DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
9045     Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
9046     break;
9047   }
9048   }
9049 
9050   if (!VT.isFixedLengthVector())
9051     return Result;
9052   return convertFromScalableVector(VT, Result, DAG, Subtarget);
9053 }
9054 
9055 // Lower Floating-Point/Integer Type-Convert VP SDNodes
9056 SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
9057                                                 unsigned RISCVISDOpc) const {
9058   SDLoc DL(Op);
9059 
9060   SDValue Src = Op.getOperand(0);
9061   SDValue Mask = Op.getOperand(1);
9062   SDValue VL = Op.getOperand(2);
9063 
9064   MVT DstVT = Op.getSimpleValueType();
9065   MVT SrcVT = Src.getSimpleValueType();
9066   if (DstVT.isFixedLengthVector()) {
9067     DstVT = getContainerForFixedLengthVector(DstVT);
9068     SrcVT = getContainerForFixedLengthVector(SrcVT);
9069     Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
9070     MVT MaskVT = getMaskTypeFor(DstVT);
9071     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9072   }
9073 
9074   unsigned DstEltSize = DstVT.getScalarSizeInBits();
9075   unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
9076 
9077   SDValue Result;
9078   if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
9079     if (SrcVT.isInteger()) {
9080       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9081 
9082       unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
9083                                     ? RISCVISD::VSEXT_VL
9084                                     : RISCVISD::VZEXT_VL;
9085 
9086       // Do we need to do any pre-widening before converting?
9087       if (SrcEltSize == 1) {
9088         MVT IntVT = DstVT.changeVectorElementTypeToInteger();
9089         MVT XLenVT = Subtarget.getXLenVT();
9090         SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9091         SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9092                                         DAG.getUNDEF(IntVT), Zero, VL);
9093         SDValue One = DAG.getConstant(
9094             RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
9095         SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
9096                                        DAG.getUNDEF(IntVT), One, VL);
9097         Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
9098                           ZeroSplat, VL);
9099       } else if (DstEltSize > (2 * SrcEltSize)) {
9100         // Widen before converting.
9101         MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
9102                                      DstVT.getVectorElementCount());
9103         Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
9104       }
9105 
9106       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9107     } else {
9108       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9109              "Wrong input/output vector types");
9110 
9111       // Convert f16 to f32 then convert f32 to i64.
9112       if (DstEltSize > (2 * SrcEltSize)) {
9113         assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9114         MVT InterimFVT =
9115             MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9116         Src =
9117             DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
9118       }
9119 
9120       Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
9121     }
9122   } else { // Narrowing + Conversion
9123     if (SrcVT.isInteger()) {
9124       assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
9125       // First do a narrowing convert to an FP type half the size, then round
9126       // the FP type to a small FP type if needed.
9127 
9128       MVT InterimFVT = DstVT;
9129       if (SrcEltSize > (2 * DstEltSize)) {
9130         assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
9131         assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
9132         InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
9133       }
9134 
9135       Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
9136 
9137       if (InterimFVT != DstVT) {
9138         Src = Result;
9139         Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
9140       }
9141     } else {
9142       assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
9143              "Wrong input/output vector types");
9144       // First do a narrowing conversion to an integer half the size, then
9145       // truncate if needed.
9146 
9147       if (DstEltSize == 1) {
9148         // First convert to the same size integer, then convert to mask using
9149         // setcc.
9150         assert(SrcEltSize >= 16 && "Unexpected FP type!");
9151         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
9152                                           DstVT.getVectorElementCount());
9153         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9154 
9155         // Compare the integer result to 0. The integer should be 0 or 1/-1,
9156         // otherwise the conversion was undefined.
9157         MVT XLenVT = Subtarget.getXLenVT();
9158         SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9159         SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
9160                                 DAG.getUNDEF(InterimIVT), SplatZero, VL);
9161         Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
9162                              {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
9163                               DAG.getUNDEF(DstVT), Mask, VL});
9164       } else {
9165         MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9166                                           DstVT.getVectorElementCount());
9167 
9168         Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
9169 
9170         while (InterimIVT != DstVT) {
9171           SrcEltSize /= 2;
9172           Src = Result;
9173           InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
9174                                         DstVT.getVectorElementCount());
9175           Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
9176                                Src, Mask, VL);
9177         }
9178       }
9179     }
9180   }
9181 
9182   MVT VT = Op.getSimpleValueType();
9183   if (!VT.isFixedLengthVector())
9184     return Result;
9185   return convertFromScalableVector(VT, Result, DAG, Subtarget);
9186 }
9187 
9188 SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
9189                                             unsigned MaskOpc,
9190                                             unsigned VecOpc) const {
9191   MVT VT = Op.getSimpleValueType();
9192   if (VT.getVectorElementType() != MVT::i1)
9193     return lowerVPOp(Op, DAG, VecOpc, true);
9194 
9195   // It is safe to drop mask parameter as masked-off elements are undef.
9196   SDValue Op1 = Op->getOperand(0);
9197   SDValue Op2 = Op->getOperand(1);
9198   SDValue VL = Op->getOperand(3);
9199 
9200   MVT ContainerVT = VT;
9201   const bool IsFixed = VT.isFixedLengthVector();
9202   if (IsFixed) {
9203     ContainerVT = getContainerForFixedLengthVector(VT);
9204     Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
9205     Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
9206   }
9207 
9208   SDLoc DL(Op);
9209   SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
9210   if (!IsFixed)
9211     return Val;
9212   return convertFromScalableVector(VT, Val, DAG, Subtarget);
9213 }
9214 
9215 SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
9216                                                 SelectionDAG &DAG) const {
9217   SDLoc DL(Op);
9218   MVT XLenVT = Subtarget.getXLenVT();
9219   MVT VT = Op.getSimpleValueType();
9220   MVT ContainerVT = VT;
9221   if (VT.isFixedLengthVector())
9222     ContainerVT = getContainerForFixedLengthVector(VT);
9223 
9224   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9225 
9226   auto *VPNode = cast<VPStridedLoadSDNode>(Op);
9227   // Check if the mask is known to be all ones
9228   SDValue Mask = VPNode->getMask();
9229   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9230 
9231   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
9232                                                    : Intrinsic::riscv_vlse_mask,
9233                                         DL, XLenVT);
9234   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
9235                               DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
9236                               VPNode->getStride()};
9237   if (!IsUnmasked) {
9238     if (VT.isFixedLengthVector()) {
9239       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9240       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9241     }
9242     Ops.push_back(Mask);
9243   }
9244   Ops.push_back(VPNode->getVectorLength());
9245   if (!IsUnmasked) {
9246     SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9247     Ops.push_back(Policy);
9248   }
9249 
9250   SDValue Result =
9251       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
9252                               VPNode->getMemoryVT(), VPNode->getMemOperand());
9253   SDValue Chain = Result.getValue(1);
9254 
9255   if (VT.isFixedLengthVector())
9256     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9257 
9258   return DAG.getMergeValues({Result, Chain}, DL);
9259 }
9260 
9261 SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
9262                                                  SelectionDAG &DAG) const {
9263   SDLoc DL(Op);
9264   MVT XLenVT = Subtarget.getXLenVT();
9265 
9266   auto *VPNode = cast<VPStridedStoreSDNode>(Op);
9267   SDValue StoreVal = VPNode->getValue();
9268   MVT VT = StoreVal.getSimpleValueType();
9269   MVT ContainerVT = VT;
9270   if (VT.isFixedLengthVector()) {
9271     ContainerVT = getContainerForFixedLengthVector(VT);
9272     StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
9273   }
9274 
9275   // Check if the mask is known to be all ones
9276   SDValue Mask = VPNode->getMask();
9277   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9278 
9279   SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
9280                                                    : Intrinsic::riscv_vsse_mask,
9281                                         DL, XLenVT);
9282   SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
9283                               VPNode->getBasePtr(), VPNode->getStride()};
9284   if (!IsUnmasked) {
9285     if (VT.isFixedLengthVector()) {
9286       MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
9287       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9288     }
9289     Ops.push_back(Mask);
9290   }
9291   Ops.push_back(VPNode->getVectorLength());
9292 
9293   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
9294                                  Ops, VPNode->getMemoryVT(),
9295                                  VPNode->getMemOperand());
9296 }
9297 
9298 // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
9299 // matched to a RVV indexed load. The RVV indexed load instructions only
9300 // support the "unsigned unscaled" addressing mode; indices are implicitly
9301 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
9302 // signed or scaled indexing is extended to the XLEN value type and scaled
9303 // accordingly.
9304 SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
9305                                                SelectionDAG &DAG) const {
9306   SDLoc DL(Op);
9307   MVT VT = Op.getSimpleValueType();
9308 
9309   const auto *MemSD = cast<MemSDNode>(Op.getNode());
9310   EVT MemVT = MemSD->getMemoryVT();
9311   MachineMemOperand *MMO = MemSD->getMemOperand();
9312   SDValue Chain = MemSD->getChain();
9313   SDValue BasePtr = MemSD->getBasePtr();
9314 
9315   ISD::LoadExtType LoadExtType;
9316   SDValue Index, Mask, PassThru, VL;
9317 
9318   if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
9319     Index = VPGN->getIndex();
9320     Mask = VPGN->getMask();
9321     PassThru = DAG.getUNDEF(VT);
9322     VL = VPGN->getVectorLength();
9323     // VP doesn't support extending loads.
9324     LoadExtType = ISD::NON_EXTLOAD;
9325   } else {
9326     // Else it must be a MGATHER.
9327     auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
9328     Index = MGN->getIndex();
9329     Mask = MGN->getMask();
9330     PassThru = MGN->getPassThru();
9331     LoadExtType = MGN->getExtensionType();
9332   }
9333 
9334   MVT IndexVT = Index.getSimpleValueType();
9335   MVT XLenVT = Subtarget.getXLenVT();
9336 
9337   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
9338          "Unexpected VTs!");
9339   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9340   // Targets have to explicitly opt-in for extending vector loads.
9341   assert(LoadExtType == ISD::NON_EXTLOAD &&
9342          "Unexpected extending MGATHER/VP_GATHER");
9343   (void)LoadExtType;
9344 
9345   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9346   // the selection of the masked intrinsics doesn't do this for us.
9347   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9348 
9349   MVT ContainerVT = VT;
9350   if (VT.isFixedLengthVector()) {
9351     ContainerVT = getContainerForFixedLengthVector(VT);
9352     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9353                                ContainerVT.getVectorElementCount());
9354 
9355     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9356 
9357     if (!IsUnmasked) {
9358       MVT MaskVT = getMaskTypeFor(ContainerVT);
9359       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9360       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9361     }
9362   }
9363 
9364   if (!VL)
9365     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9366 
9367   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9368     IndexVT = IndexVT.changeVectorElementType(XLenVT);
9369     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9370                                    VL);
9371     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
9372                         TrueMask, VL);
9373   }
9374 
9375   unsigned IntID =
9376       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
9377   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9378   if (IsUnmasked)
9379     Ops.push_back(DAG.getUNDEF(ContainerVT));
9380   else
9381     Ops.push_back(PassThru);
9382   Ops.push_back(BasePtr);
9383   Ops.push_back(Index);
9384   if (!IsUnmasked)
9385     Ops.push_back(Mask);
9386   Ops.push_back(VL);
9387   if (!IsUnmasked)
9388     Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
9389 
9390   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9391   SDValue Result =
9392       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
9393   Chain = Result.getValue(1);
9394 
9395   if (VT.isFixedLengthVector())
9396     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9397 
9398   return DAG.getMergeValues({Result, Chain}, DL);
9399 }
9400 
9401 // Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
9402 // matched to a RVV indexed store. The RVV indexed store instructions only
9403 // support the "unsigned unscaled" addressing mode; indices are implicitly
9404 // zero-extended or truncated to XLEN and are treated as byte offsets. Any
9405 // signed or scaled indexing is extended to the XLEN value type and scaled
9406 // accordingly.
9407 SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
9408                                                 SelectionDAG &DAG) const {
9409   SDLoc DL(Op);
9410   const auto *MemSD = cast<MemSDNode>(Op.getNode());
9411   EVT MemVT = MemSD->getMemoryVT();
9412   MachineMemOperand *MMO = MemSD->getMemOperand();
9413   SDValue Chain = MemSD->getChain();
9414   SDValue BasePtr = MemSD->getBasePtr();
9415 
9416   bool IsTruncatingStore = false;
9417   SDValue Index, Mask, Val, VL;
9418 
9419   if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
9420     Index = VPSN->getIndex();
9421     Mask = VPSN->getMask();
9422     Val = VPSN->getValue();
9423     VL = VPSN->getVectorLength();
9424     // VP doesn't support truncating stores.
9425     IsTruncatingStore = false;
9426   } else {
9427     // Else it must be a MSCATTER.
9428     auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
9429     Index = MSN->getIndex();
9430     Mask = MSN->getMask();
9431     Val = MSN->getValue();
9432     IsTruncatingStore = MSN->isTruncatingStore();
9433   }
9434 
9435   MVT VT = Val.getSimpleValueType();
9436   MVT IndexVT = Index.getSimpleValueType();
9437   MVT XLenVT = Subtarget.getXLenVT();
9438 
9439   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
9440          "Unexpected VTs!");
9441   assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
9442   // Targets have to explicitly opt-in for extending vector loads and
9443   // truncating vector stores.
9444   assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
9445   (void)IsTruncatingStore;
9446 
9447   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9448   // the selection of the masked intrinsics doesn't do this for us.
9449   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9450 
9451   MVT ContainerVT = VT;
9452   if (VT.isFixedLengthVector()) {
9453     ContainerVT = getContainerForFixedLengthVector(VT);
9454     IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
9455                                ContainerVT.getVectorElementCount());
9456 
9457     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
9458     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9459 
9460     if (!IsUnmasked) {
9461       MVT MaskVT = getMaskTypeFor(ContainerVT);
9462       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9463     }
9464   }
9465 
9466   if (!VL)
9467     VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9468 
9469   if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
9470     IndexVT = IndexVT.changeVectorElementType(XLenVT);
9471     SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
9472                                    VL);
9473     Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
9474                         TrueMask, VL);
9475   }
9476 
9477   unsigned IntID =
9478       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
9479   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
9480   Ops.push_back(Val);
9481   Ops.push_back(BasePtr);
9482   Ops.push_back(Index);
9483   if (!IsUnmasked)
9484     Ops.push_back(Mask);
9485   Ops.push_back(VL);
9486 
9487   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
9488                                  DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
9489 }
9490 
9491 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
9492                                                SelectionDAG &DAG) const {
9493   const MVT XLenVT = Subtarget.getXLenVT();
9494   SDLoc DL(Op);
9495   SDValue Chain = Op->getOperand(0);
9496   SDValue SysRegNo = DAG.getTargetConstant(
9497       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9498   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
9499   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
9500 
9501   // Encoding used for rounding mode in RISC-V differs from that used in
9502   // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
9503   // table, which consists of a sequence of 4-bit fields, each representing
9504   // corresponding FLT_ROUNDS mode.
9505   static const int Table =
9506       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
9507       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
9508       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
9509       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
9510       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
9511 
9512   SDValue Shift =
9513       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
9514   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9515                                 DAG.getConstant(Table, DL, XLenVT), Shift);
9516   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9517                                DAG.getConstant(7, DL, XLenVT));
9518 
9519   return DAG.getMergeValues({Masked, Chain}, DL);
9520 }
9521 
9522 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
9523                                                SelectionDAG &DAG) const {
9524   const MVT XLenVT = Subtarget.getXLenVT();
9525   SDLoc DL(Op);
9526   SDValue Chain = Op->getOperand(0);
9527   SDValue RMValue = Op->getOperand(1);
9528   SDValue SysRegNo = DAG.getTargetConstant(
9529       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
9530 
9531   // Encoding used for rounding mode in RISC-V differs from that used in
9532   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
9533   // a table, which consists of a sequence of 4-bit fields, each representing
9534   // corresponding RISC-V mode.
9535   static const unsigned Table =
9536       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
9537       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
9538       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
9539       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
9540       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
9541 
9542   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
9543                               DAG.getConstant(2, DL, XLenVT));
9544   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
9545                                 DAG.getConstant(Table, DL, XLenVT), Shift);
9546   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
9547                         DAG.getConstant(0x7, DL, XLenVT));
9548   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
9549                      RMValue);
9550 }
9551 
9552 SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
9553                                                SelectionDAG &DAG) const {
9554   MachineFunction &MF = DAG.getMachineFunction();
9555 
9556   bool isRISCV64 = Subtarget.is64Bit();
9557   EVT PtrVT = getPointerTy(DAG.getDataLayout());
9558 
9559   int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
9560   return DAG.getFrameIndex(FI, PtrVT);
9561 }
9562 
9563 // Returns the opcode of the target-specific SDNode that implements the 32-bit
9564 // form of the given Opcode.
9565 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
9566   switch (Opcode) {
9567   default:
9568     llvm_unreachable("Unexpected opcode");
9569   case ISD::SHL:
9570     return RISCVISD::SLLW;
9571   case ISD::SRA:
9572     return RISCVISD::SRAW;
9573   case ISD::SRL:
9574     return RISCVISD::SRLW;
9575   case ISD::SDIV:
9576     return RISCVISD::DIVW;
9577   case ISD::UDIV:
9578     return RISCVISD::DIVUW;
9579   case ISD::UREM:
9580     return RISCVISD::REMUW;
9581   case ISD::ROTL:
9582     return RISCVISD::ROLW;
9583   case ISD::ROTR:
9584     return RISCVISD::RORW;
9585   }
9586 }
9587 
9588 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
9589 // node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
9590 // otherwise be promoted to i64, making it difficult to select the
9591 // SLLW/DIVUW/.../*W later one because the fact the operation was originally of
9592 // type i8/i16/i32 is lost.
9593 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
9594                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
9595   SDLoc DL(N);
9596   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
9597   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
9598   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
9599   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
9600   // ReplaceNodeResults requires we maintain the same type for the return value.
9601   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
9602 }
9603 
9604 // Converts the given 32-bit operation to a i64 operation with signed extension
9605 // semantic to reduce the signed extension instructions.
9606 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
9607   SDLoc DL(N);
9608   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9609   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9610   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
9611   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9612                                DAG.getValueType(MVT::i32));
9613   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
9614 }
9615 
9616 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
9617                                              SmallVectorImpl<SDValue> &Results,
9618                                              SelectionDAG &DAG) const {
9619   SDLoc DL(N);
9620   switch (N->getOpcode()) {
9621   default:
9622     llvm_unreachable("Don't know how to custom type legalize this operation!");
9623   case ISD::STRICT_FP_TO_SINT:
9624   case ISD::STRICT_FP_TO_UINT:
9625   case ISD::FP_TO_SINT:
9626   case ISD::FP_TO_UINT: {
9627     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9628            "Unexpected custom legalisation");
9629     bool IsStrict = N->isStrictFPOpcode();
9630     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
9631                     N->getOpcode() == ISD::STRICT_FP_TO_SINT;
9632     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
9633     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9634         TargetLowering::TypeSoftenFloat) {
9635       if (!isTypeLegal(Op0.getValueType()))
9636         return;
9637       if (IsStrict) {
9638         SDValue Chain = N->getOperand(0);
9639         // In absense of Zfh, promote f16 to f32, then convert.
9640         if (Op0.getValueType() == MVT::f16 &&
9641             !Subtarget.hasStdExtZfhOrZhinx()) {
9642           Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
9643                             {Chain, Op0});
9644           Chain = Op0.getValue(1);
9645         }
9646         unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
9647                                 : RISCVISD::STRICT_FCVT_WU_RV64;
9648         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
9649         SDValue Res = DAG.getNode(
9650             Opc, DL, VTs, Chain, Op0,
9651             DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9652         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9653         Results.push_back(Res.getValue(1));
9654         return;
9655       }
9656       // In absense of Zfh, promote f16 to f32, then convert.
9657       if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9658         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9659 
9660       unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
9661       SDValue Res =
9662           DAG.getNode(Opc, DL, MVT::i64, Op0,
9663                       DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
9664       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9665       return;
9666     }
9667     // If the FP type needs to be softened, emit a library call using the 'si'
9668     // version. If we left it to default legalization we'd end up with 'di'. If
9669     // the FP type doesn't need to be softened just let generic type
9670     // legalization promote the result type.
9671     RTLIB::Libcall LC;
9672     if (IsSigned)
9673       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
9674     else
9675       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
9676     MakeLibCallOptions CallOptions;
9677     EVT OpVT = Op0.getValueType();
9678     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
9679     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
9680     SDValue Result;
9681     std::tie(Result, Chain) =
9682         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
9683     Results.push_back(Result);
9684     if (IsStrict)
9685       Results.push_back(Chain);
9686     break;
9687   }
9688   case ISD::LROUND: {
9689     SDValue Op0 = N->getOperand(0);
9690     EVT Op0VT = Op0.getValueType();
9691     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
9692         TargetLowering::TypeSoftenFloat) {
9693       if (!isTypeLegal(Op0VT))
9694         return;
9695 
9696       // In absense of Zfh, promote f16 to f32, then convert.
9697       if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
9698         Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
9699 
9700       SDValue Res =
9701           DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
9702                       DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
9703       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9704       return;
9705     }
9706     // If the FP type needs to be softened, emit a library call to lround. We'll
9707     // need to truncate the result. We assume any value that doesn't fit in i32
9708     // is allowed to return an unspecified value.
9709     RTLIB::Libcall LC =
9710         Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
9711     MakeLibCallOptions CallOptions;
9712     EVT OpVT = Op0.getValueType();
9713     CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
9714     SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
9715     Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
9716     Results.push_back(Result);
9717     break;
9718   }
9719   case ISD::READCYCLECOUNTER: {
9720     assert(!Subtarget.is64Bit() &&
9721            "READCYCLECOUNTER only has custom type legalization on riscv32");
9722 
9723     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
9724     SDValue RCW =
9725         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
9726 
9727     Results.push_back(
9728         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
9729     Results.push_back(RCW.getValue(2));
9730     break;
9731   }
9732   case ISD::LOAD: {
9733     if (!ISD::isNON_EXTLoad(N))
9734       return;
9735 
9736     // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
9737     // sext_inreg we emit for ADD/SUB/MUL/SLLI.
9738     LoadSDNode *Ld = cast<LoadSDNode>(N);
9739 
9740     SDLoc dl(N);
9741     SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
9742                                  Ld->getBasePtr(), Ld->getMemoryVT(),
9743                                  Ld->getMemOperand());
9744     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
9745     Results.push_back(Res.getValue(1));
9746     return;
9747   }
9748   case ISD::MUL: {
9749     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
9750     unsigned XLen = Subtarget.getXLen();
9751     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
9752     if (Size > XLen) {
9753       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
9754       SDValue LHS = N->getOperand(0);
9755       SDValue RHS = N->getOperand(1);
9756       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
9757 
9758       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
9759       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
9760       // We need exactly one side to be unsigned.
9761       if (LHSIsU == RHSIsU)
9762         return;
9763 
9764       auto MakeMULPair = [&](SDValue S, SDValue U) {
9765         MVT XLenVT = Subtarget.getXLenVT();
9766         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
9767         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
9768         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
9769         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
9770         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
9771       };
9772 
9773       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
9774       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
9775 
9776       // The other operand should be signed, but still prefer MULH when
9777       // possible.
9778       if (RHSIsU && LHSIsS && !RHSIsS)
9779         Results.push_back(MakeMULPair(LHS, RHS));
9780       else if (LHSIsU && RHSIsS && !LHSIsS)
9781         Results.push_back(MakeMULPair(RHS, LHS));
9782 
9783       return;
9784     }
9785     [[fallthrough]];
9786   }
9787   case ISD::ADD:
9788   case ISD::SUB:
9789     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9790            "Unexpected custom legalisation");
9791     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
9792     break;
9793   case ISD::SHL:
9794   case ISD::SRA:
9795   case ISD::SRL:
9796     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9797            "Unexpected custom legalisation");
9798     if (N->getOperand(1).getOpcode() != ISD::Constant) {
9799       // If we can use a BSET instruction, allow default promotion to apply.
9800       if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
9801           isOneConstant(N->getOperand(0)))
9802         break;
9803       Results.push_back(customLegalizeToWOp(N, DAG));
9804       break;
9805     }
9806 
9807     // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
9808     // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
9809     // shift amount.
9810     if (N->getOpcode() == ISD::SHL) {
9811       SDLoc DL(N);
9812       SDValue NewOp0 =
9813           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9814       SDValue NewOp1 =
9815           DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
9816       SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
9817       SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
9818                                    DAG.getValueType(MVT::i32));
9819       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
9820     }
9821 
9822     break;
9823   case ISD::ROTL:
9824   case ISD::ROTR:
9825     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9826            "Unexpected custom legalisation");
9827     assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
9828             Subtarget.hasVendorXTHeadBb()) &&
9829            "Unexpected custom legalization");
9830     if (!isa<ConstantSDNode>(N->getOperand(1)) &&
9831         !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
9832       return;
9833     Results.push_back(customLegalizeToWOp(N, DAG));
9834     break;
9835   case ISD::CTTZ:
9836   case ISD::CTTZ_ZERO_UNDEF:
9837   case ISD::CTLZ:
9838   case ISD::CTLZ_ZERO_UNDEF: {
9839     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9840            "Unexpected custom legalisation");
9841 
9842     SDValue NewOp0 =
9843         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9844     bool IsCTZ =
9845         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
9846     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
9847     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
9848     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9849     return;
9850   }
9851   case ISD::SDIV:
9852   case ISD::UDIV:
9853   case ISD::UREM: {
9854     MVT VT = N->getSimpleValueType(0);
9855     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
9856            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
9857            "Unexpected custom legalisation");
9858     // Don't promote division/remainder by constant since we should expand those
9859     // to multiply by magic constant.
9860     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
9861     if (N->getOperand(1).getOpcode() == ISD::Constant &&
9862         !isIntDivCheap(N->getValueType(0), Attr))
9863       return;
9864 
9865     // If the input is i32, use ANY_EXTEND since the W instructions don't read
9866     // the upper 32 bits. For other types we need to sign or zero extend
9867     // based on the opcode.
9868     unsigned ExtOpc = ISD::ANY_EXTEND;
9869     if (VT != MVT::i32)
9870       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
9871                                            : ISD::ZERO_EXTEND;
9872 
9873     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
9874     break;
9875   }
9876   case ISD::SADDO: {
9877     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9878            "Unexpected custom legalisation");
9879 
9880     // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
9881     // use the default legalization.
9882     if (!isa<ConstantSDNode>(N->getOperand(1)))
9883       return;
9884 
9885     SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9886     SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9887     SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
9888     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9889                       DAG.getValueType(MVT::i32));
9890 
9891     SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
9892 
9893     // For an addition, the result should be less than one of the operands (LHS)
9894     // if and only if the other operand (RHS) is negative, otherwise there will
9895     // be overflow.
9896     // For a subtraction, the result should be less than one of the operands
9897     // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9898     // otherwise there will be overflow.
9899     EVT OType = N->getValueType(1);
9900     SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
9901     SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
9902 
9903     SDValue Overflow =
9904         DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
9905     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9906     Results.push_back(Overflow);
9907     return;
9908   }
9909   case ISD::UADDO:
9910   case ISD::USUBO: {
9911     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9912            "Unexpected custom legalisation");
9913     bool IsAdd = N->getOpcode() == ISD::UADDO;
9914     // Create an ADDW or SUBW.
9915     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9916     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
9917     SDValue Res =
9918         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
9919     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
9920                       DAG.getValueType(MVT::i32));
9921 
9922     SDValue Overflow;
9923     if (IsAdd && isOneConstant(RHS)) {
9924       // Special case uaddo X, 1 overflowed if the addition result is 0.
9925       // The general case (X + C) < C is not necessarily beneficial. Although we
9926       // reduce the live range of X, we may introduce the materialization of
9927       // constant C, especially when the setcc result is used by branch. We have
9928       // no compare with constant and branch instructions.
9929       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
9930                               DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
9931     } else if (IsAdd && isAllOnesConstant(RHS)) {
9932       // Special case uaddo X, -1 overflowed if X != 0.
9933       Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
9934                               DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
9935     } else {
9936       // Sign extend the LHS and perform an unsigned compare with the ADDW
9937       // result. Since the inputs are sign extended from i32, this is equivalent
9938       // to comparing the lower 32 bits.
9939       LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9940       Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
9941                               IsAdd ? ISD::SETULT : ISD::SETUGT);
9942     }
9943 
9944     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9945     Results.push_back(Overflow);
9946     return;
9947   }
9948   case ISD::UADDSAT:
9949   case ISD::USUBSAT: {
9950     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9951            "Unexpected custom legalisation");
9952     if (Subtarget.hasStdExtZbb()) {
9953       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
9954       // sign extend allows overflow of the lower 32 bits to be detected on
9955       // the promoted size.
9956       SDValue LHS =
9957           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
9958       SDValue RHS =
9959           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
9960       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
9961       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
9962       return;
9963     }
9964 
9965     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
9966     // promotion for UADDO/USUBO.
9967     Results.push_back(expandAddSubSat(N, DAG));
9968     return;
9969   }
9970   case ISD::ABS: {
9971     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
9972            "Unexpected custom legalisation");
9973 
9974     if (Subtarget.hasStdExtZbb()) {
9975       // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
9976       // This allows us to remember that the result is sign extended. Expanding
9977       // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
9978       SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
9979                                 N->getOperand(0));
9980       SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
9981       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
9982       return;
9983     }
9984 
9985     // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
9986     SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
9987 
9988     // Freeze the source so we can increase it's use count.
9989     Src = DAG.getFreeze(Src);
9990 
9991     // Copy sign bit to all bits using the sraiw pattern.
9992     SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
9993                                    DAG.getValueType(MVT::i32));
9994     SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
9995                            DAG.getConstant(31, DL, MVT::i64));
9996 
9997     SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
9998     NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
9999 
10000     // NOTE: The result is only required to be anyextended, but sext is
10001     // consistent with type legalization of sub.
10002     NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
10003                          DAG.getValueType(MVT::i32));
10004     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
10005     return;
10006   }
10007   case ISD::BITCAST: {
10008     EVT VT = N->getValueType(0);
10009     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
10010     SDValue Op0 = N->getOperand(0);
10011     EVT Op0VT = Op0.getValueType();
10012     MVT XLenVT = Subtarget.getXLenVT();
10013     if (VT == MVT::i16 && Op0VT == MVT::f16 &&
10014         Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
10015       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10016       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10017     } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
10018         Subtarget.hasStdExtZfbfmin()) {
10019       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
10020       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
10021     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
10022                Subtarget.hasStdExtFOrZfinx()) {
10023       SDValue FPConv =
10024           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
10025       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
10026     } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
10027                Subtarget.hasStdExtZfa()) {
10028       SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
10029                                    DAG.getVTList(MVT::i32, MVT::i32), Op0);
10030       SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
10031                                    NewReg.getValue(0), NewReg.getValue(1));
10032       Results.push_back(RetReg);
10033     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
10034                isTypeLegal(Op0VT)) {
10035       // Custom-legalize bitcasts from fixed-length vector types to illegal
10036       // scalar types in order to improve codegen. Bitcast the vector to a
10037       // one-element vector type whose element type is the same as the result
10038       // type, and extract the first element.
10039       EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
10040       if (isTypeLegal(BVT)) {
10041         SDValue BVec = DAG.getBitcast(BVT, Op0);
10042         Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
10043                                       DAG.getConstant(0, DL, XLenVT)));
10044       }
10045     }
10046     break;
10047   }
10048   case RISCVISD::BREV8: {
10049     MVT VT = N->getSimpleValueType(0);
10050     MVT XLenVT = Subtarget.getXLenVT();
10051     assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
10052            "Unexpected custom legalisation");
10053     assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
10054     SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
10055     SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
10056     // ReplaceNodeResults requires we maintain the same type for the return
10057     // value.
10058     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
10059     break;
10060   }
10061   case ISD::EXTRACT_VECTOR_ELT: {
10062     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
10063     // type is illegal (currently only vXi64 RV32).
10064     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
10065     // transferred to the destination register. We issue two of these from the
10066     // upper- and lower- halves of the SEW-bit vector element, slid down to the
10067     // first element.
10068     SDValue Vec = N->getOperand(0);
10069     SDValue Idx = N->getOperand(1);
10070 
10071     // The vector type hasn't been legalized yet so we can't issue target
10072     // specific nodes if it needs legalization.
10073     // FIXME: We would manually legalize if it's important.
10074     if (!isTypeLegal(Vec.getValueType()))
10075       return;
10076 
10077     MVT VecVT = Vec.getSimpleValueType();
10078 
10079     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
10080            VecVT.getVectorElementType() == MVT::i64 &&
10081            "Unexpected EXTRACT_VECTOR_ELT legalization");
10082 
10083     // If this is a fixed vector, we need to convert it to a scalable vector.
10084     MVT ContainerVT = VecVT;
10085     if (VecVT.isFixedLengthVector()) {
10086       ContainerVT = getContainerForFixedLengthVector(VecVT);
10087       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10088     }
10089 
10090     MVT XLenVT = Subtarget.getXLenVT();
10091 
10092     // Use a VL of 1 to avoid processing more elements than we need.
10093     auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10094 
10095     // Unless the index is known to be 0, we must slide the vector down to get
10096     // the desired element into index 0.
10097     if (!isNullConstant(Idx)) {
10098       Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10099                           DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10100     }
10101 
10102     // Extract the lower XLEN bits of the correct vector element.
10103     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10104 
10105     // To extract the upper XLEN bits of the vector element, shift the first
10106     // element right by 32 bits and re-extract the lower XLEN bits.
10107     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10108                                      DAG.getUNDEF(ContainerVT),
10109                                      DAG.getConstant(32, DL, XLenVT), VL);
10110     SDValue LShr32 =
10111         DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
10112                     DAG.getUNDEF(ContainerVT), Mask, VL);
10113 
10114     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10115 
10116     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10117     break;
10118   }
10119   case ISD::INTRINSIC_WO_CHAIN: {
10120     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10121     switch (IntNo) {
10122     default:
10123       llvm_unreachable(
10124           "Don't know how to custom type legalize this intrinsic!");
10125     case Intrinsic::experimental_get_vector_length: {
10126       SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
10127       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10128       return;
10129     }
10130     case Intrinsic::riscv_orc_b:
10131     case Intrinsic::riscv_brev8:
10132     case Intrinsic::riscv_sha256sig0:
10133     case Intrinsic::riscv_sha256sig1:
10134     case Intrinsic::riscv_sha256sum0:
10135     case Intrinsic::riscv_sha256sum1:
10136     case Intrinsic::riscv_sm3p0:
10137     case Intrinsic::riscv_sm3p1: {
10138       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10139         return;
10140       unsigned Opc;
10141       switch (IntNo) {
10142       case Intrinsic::riscv_orc_b:      Opc = RISCVISD::ORC_B;      break;
10143       case Intrinsic::riscv_brev8:      Opc = RISCVISD::BREV8;      break;
10144       case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10145       case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10146       case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10147       case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10148       case Intrinsic::riscv_sm3p0:      Opc = RISCVISD::SM3P0;      break;
10149       case Intrinsic::riscv_sm3p1:      Opc = RISCVISD::SM3P1;      break;
10150       }
10151 
10152       SDValue NewOp =
10153           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10154       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
10155       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10156       return;
10157     }
10158     case Intrinsic::riscv_sm4ks:
10159     case Intrinsic::riscv_sm4ed: {
10160       unsigned Opc =
10161           IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10162       SDValue NewOp0 =
10163           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10164       SDValue NewOp1 =
10165           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10166       SDValue Res =
10167           DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
10168       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10169       return;
10170     }
10171     case Intrinsic::riscv_clmul: {
10172       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10173         return;
10174 
10175       SDValue NewOp0 =
10176           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10177       SDValue NewOp1 =
10178           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10179       SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
10180       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10181       return;
10182     }
10183     case Intrinsic::riscv_clmulh:
10184     case Intrinsic::riscv_clmulr: {
10185       if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
10186         return;
10187 
10188       // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
10189       // to the full 128-bit clmul result of multiplying two xlen values.
10190       // Perform clmulr or clmulh on the shifted values. Finally, extract the
10191       // upper 32 bits.
10192       //
10193       // The alternative is to mask the inputs to 32 bits and use clmul, but
10194       // that requires two shifts to mask each input without zext.w.
10195       // FIXME: If the inputs are known zero extended or could be freely
10196       // zero extended, the mask form would be better.
10197       SDValue NewOp0 =
10198           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
10199       SDValue NewOp1 =
10200           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
10201       NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
10202                            DAG.getConstant(32, DL, MVT::i64));
10203       NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
10204                            DAG.getConstant(32, DL, MVT::i64));
10205       unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
10206                                                       : RISCVISD::CLMULR;
10207       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
10208       Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
10209                         DAG.getConstant(32, DL, MVT::i64));
10210       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
10211       return;
10212     }
10213     case Intrinsic::riscv_vmv_x_s: {
10214       EVT VT = N->getValueType(0);
10215       MVT XLenVT = Subtarget.getXLenVT();
10216       if (VT.bitsLT(XLenVT)) {
10217         // Simple case just extract using vmv.x.s and truncate.
10218         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
10219                                       Subtarget.getXLenVT(), N->getOperand(1));
10220         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
10221         return;
10222       }
10223 
10224       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
10225              "Unexpected custom legalization");
10226 
10227       // We need to do the move in two steps.
10228       SDValue Vec = N->getOperand(1);
10229       MVT VecVT = Vec.getSimpleValueType();
10230 
10231       // First extract the lower XLEN bits of the element.
10232       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10233 
10234       // To extract the upper XLEN bits of the vector element, shift the first
10235       // element right by 32 bits and re-extract the lower XLEN bits.
10236       auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
10237 
10238       SDValue ThirtyTwoV =
10239           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
10240                       DAG.getConstant(32, DL, XLenVT), VL);
10241       SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
10242                                    DAG.getUNDEF(VecVT), Mask, VL);
10243       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
10244 
10245       Results.push_back(
10246           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
10247       break;
10248     }
10249     }
10250     break;
10251   }
10252   case ISD::VECREDUCE_ADD:
10253   case ISD::VECREDUCE_AND:
10254   case ISD::VECREDUCE_OR:
10255   case ISD::VECREDUCE_XOR:
10256   case ISD::VECREDUCE_SMAX:
10257   case ISD::VECREDUCE_UMAX:
10258   case ISD::VECREDUCE_SMIN:
10259   case ISD::VECREDUCE_UMIN:
10260     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
10261       Results.push_back(V);
10262     break;
10263   case ISD::VP_REDUCE_ADD:
10264   case ISD::VP_REDUCE_AND:
10265   case ISD::VP_REDUCE_OR:
10266   case ISD::VP_REDUCE_XOR:
10267   case ISD::VP_REDUCE_SMAX:
10268   case ISD::VP_REDUCE_UMAX:
10269   case ISD::VP_REDUCE_SMIN:
10270   case ISD::VP_REDUCE_UMIN:
10271     if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
10272       Results.push_back(V);
10273     break;
10274   case ISD::GET_ROUNDING: {
10275     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
10276     SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
10277     Results.push_back(Res.getValue(0));
10278     Results.push_back(Res.getValue(1));
10279     break;
10280   }
10281   }
10282 }
10283 
10284 // Try to fold (<bop> x, (reduction.<bop> vec, start))
10285 static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
10286                                     const RISCVSubtarget &Subtarget) {
10287   auto BinOpToRVVReduce = [](unsigned Opc) {
10288     switch (Opc) {
10289     default:
10290       llvm_unreachable("Unhandled binary to transfrom reduction");
10291     case ISD::ADD:
10292       return RISCVISD::VECREDUCE_ADD_VL;
10293     case ISD::UMAX:
10294       return RISCVISD::VECREDUCE_UMAX_VL;
10295     case ISD::SMAX:
10296       return RISCVISD::VECREDUCE_SMAX_VL;
10297     case ISD::UMIN:
10298       return RISCVISD::VECREDUCE_UMIN_VL;
10299     case ISD::SMIN:
10300       return RISCVISD::VECREDUCE_SMIN_VL;
10301     case ISD::AND:
10302       return RISCVISD::VECREDUCE_AND_VL;
10303     case ISD::OR:
10304       return RISCVISD::VECREDUCE_OR_VL;
10305     case ISD::XOR:
10306       return RISCVISD::VECREDUCE_XOR_VL;
10307     case ISD::FADD:
10308       return RISCVISD::VECREDUCE_FADD_VL;
10309     case ISD::FMAXNUM:
10310       return RISCVISD::VECREDUCE_FMAX_VL;
10311     case ISD::FMINNUM:
10312       return RISCVISD::VECREDUCE_FMIN_VL;
10313     }
10314   };
10315 
10316   auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
10317     return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10318            isNullConstant(V.getOperand(1)) &&
10319            V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
10320   };
10321 
10322   unsigned Opc = N->getOpcode();
10323   unsigned ReduceIdx;
10324   if (IsReduction(N->getOperand(0), Opc))
10325     ReduceIdx = 0;
10326   else if (IsReduction(N->getOperand(1), Opc))
10327     ReduceIdx = 1;
10328   else
10329     return SDValue();
10330 
10331   // Skip if FADD disallows reassociation but the combiner needs.
10332   if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
10333     return SDValue();
10334 
10335   SDValue Extract = N->getOperand(ReduceIdx);
10336   SDValue Reduce = Extract.getOperand(0);
10337   if (!Extract.hasOneUse() || !Reduce.hasOneUse())
10338     return SDValue();
10339 
10340   SDValue ScalarV = Reduce.getOperand(2);
10341   EVT ScalarVT = ScalarV.getValueType();
10342   if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
10343       ScalarV.getOperand(0)->isUndef() &&
10344       isNullConstant(ScalarV.getOperand(2)))
10345     ScalarV = ScalarV.getOperand(1);
10346 
10347   // Make sure that ScalarV is a splat with VL=1.
10348   if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
10349       ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
10350       ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
10351     return SDValue();
10352 
10353   if (!isNonZeroAVL(ScalarV.getOperand(2)))
10354     return SDValue();
10355 
10356   // Check the scalar of ScalarV is neutral element
10357   // TODO: Deal with value other than neutral element.
10358   if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
10359                          0))
10360     return SDValue();
10361 
10362   // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
10363   // FIXME: We might be able to improve this if operand 0 is undef.
10364   if (!isNonZeroAVL(Reduce.getOperand(5)))
10365     return SDValue();
10366 
10367   SDValue NewStart = N->getOperand(1 - ReduceIdx);
10368 
10369   SDLoc DL(N);
10370   SDValue NewScalarV =
10371       lowerScalarInsert(NewStart, ScalarV.getOperand(2),
10372                         ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
10373 
10374   // If we looked through an INSERT_SUBVECTOR we need to restore it.
10375   if (ScalarVT != ScalarV.getValueType())
10376     NewScalarV =
10377         DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
10378                     NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
10379 
10380   SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
10381                    NewScalarV,           Reduce.getOperand(3),
10382                    Reduce.getOperand(4), Reduce.getOperand(5)};
10383   SDValue NewReduce =
10384       DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
10385   return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
10386                      Extract.getOperand(1));
10387 }
10388 
10389 // Optimize (add (shl x, c0), (shl y, c1)) ->
10390 //          (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
10391 static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
10392                                   const RISCVSubtarget &Subtarget) {
10393   // Perform this optimization only in the zba extension.
10394   if (!Subtarget.hasStdExtZba())
10395     return SDValue();
10396 
10397   // Skip for vector types and larger types.
10398   EVT VT = N->getValueType(0);
10399   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10400     return SDValue();
10401 
10402   // The two operand nodes must be SHL and have no other use.
10403   SDValue N0 = N->getOperand(0);
10404   SDValue N1 = N->getOperand(1);
10405   if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
10406       !N0->hasOneUse() || !N1->hasOneUse())
10407     return SDValue();
10408 
10409   // Check c0 and c1.
10410   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10411   auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
10412   if (!N0C || !N1C)
10413     return SDValue();
10414   int64_t C0 = N0C->getSExtValue();
10415   int64_t C1 = N1C->getSExtValue();
10416   if (C0 <= 0 || C1 <= 0)
10417     return SDValue();
10418 
10419   // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
10420   int64_t Bits = std::min(C0, C1);
10421   int64_t Diff = std::abs(C0 - C1);
10422   if (Diff != 1 && Diff != 2 && Diff != 3)
10423     return SDValue();
10424 
10425   // Build nodes.
10426   SDLoc DL(N);
10427   SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
10428   SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
10429   SDValue NA0 =
10430       DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
10431   SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
10432   return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
10433 }
10434 
10435 // Combine a constant select operand into its use:
10436 //
10437 // (and (select cond, -1, c), x)
10438 //   -> (select cond, x, (and x, c))  [AllOnes=1]
10439 // (or  (select cond, 0, c), x)
10440 //   -> (select cond, x, (or x, c))  [AllOnes=0]
10441 // (xor (select cond, 0, c), x)
10442 //   -> (select cond, x, (xor x, c))  [AllOnes=0]
10443 // (add (select cond, 0, c), x)
10444 //   -> (select cond, x, (add x, c))  [AllOnes=0]
10445 // (sub x, (select cond, 0, c))
10446 //   -> (select cond, x, (sub x, c))  [AllOnes=0]
10447 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
10448                                    SelectionDAG &DAG, bool AllOnes,
10449                                    const RISCVSubtarget &Subtarget) {
10450   EVT VT = N->getValueType(0);
10451 
10452   // Skip vectors.
10453   if (VT.isVector())
10454     return SDValue();
10455 
10456   if (!Subtarget.hasShortForwardBranchOpt() ||
10457       (Slct.getOpcode() != ISD::SELECT &&
10458        Slct.getOpcode() != RISCVISD::SELECT_CC) ||
10459       !Slct.hasOneUse())
10460     return SDValue();
10461 
10462   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
10463     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
10464   };
10465 
10466   bool SwapSelectOps;
10467   unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
10468   SDValue TrueVal = Slct.getOperand(1 + OpOffset);
10469   SDValue FalseVal = Slct.getOperand(2 + OpOffset);
10470   SDValue NonConstantVal;
10471   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
10472     SwapSelectOps = false;
10473     NonConstantVal = FalseVal;
10474   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
10475     SwapSelectOps = true;
10476     NonConstantVal = TrueVal;
10477   } else
10478     return SDValue();
10479 
10480   // Slct is now know to be the desired identity constant when CC is true.
10481   TrueVal = OtherOp;
10482   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
10483   // Unless SwapSelectOps says the condition should be false.
10484   if (SwapSelectOps)
10485     std::swap(TrueVal, FalseVal);
10486 
10487   if (Slct.getOpcode() == RISCVISD::SELECT_CC)
10488     return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
10489                        {Slct.getOperand(0), Slct.getOperand(1),
10490                         Slct.getOperand(2), TrueVal, FalseVal});
10491 
10492   return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
10493                      {Slct.getOperand(0), TrueVal, FalseVal});
10494 }
10495 
10496 // Attempt combineSelectAndUse on each operand of a commutative operator N.
10497 static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
10498                                               bool AllOnes,
10499                                               const RISCVSubtarget &Subtarget) {
10500   SDValue N0 = N->getOperand(0);
10501   SDValue N1 = N->getOperand(1);
10502   if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
10503     return Result;
10504   if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
10505     return Result;
10506   return SDValue();
10507 }
10508 
10509 // Transform (add (mul x, c0), c1) ->
10510 //           (add (mul (add x, c1/c0), c0), c1%c0).
10511 // if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
10512 // that should be excluded is when c0*(c1/c0) is simm12, which will lead
10513 // to an infinite loop in DAGCombine if transformed.
10514 // Or transform (add (mul x, c0), c1) ->
10515 //              (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
10516 // if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
10517 // case that should be excluded is when c0*(c1/c0+1) is simm12, which will
10518 // lead to an infinite loop in DAGCombine if transformed.
10519 // Or transform (add (mul x, c0), c1) ->
10520 //              (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
10521 // if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
10522 // case that should be excluded is when c0*(c1/c0-1) is simm12, which will
10523 // lead to an infinite loop in DAGCombine if transformed.
10524 // Or transform (add (mul x, c0), c1) ->
10525 //              (mul (add x, c1/c0), c0).
10526 // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
10527 static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
10528                                      const RISCVSubtarget &Subtarget) {
10529   // Skip for vector types and larger types.
10530   EVT VT = N->getValueType(0);
10531   if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
10532     return SDValue();
10533   // The first operand node must be a MUL and has no other use.
10534   SDValue N0 = N->getOperand(0);
10535   if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
10536     return SDValue();
10537   // Check if c0 and c1 match above conditions.
10538   auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
10539   auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
10540   if (!N0C || !N1C)
10541     return SDValue();
10542   // If N0C has multiple uses it's possible one of the cases in
10543   // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
10544   // in an infinite loop.
10545   if (!N0C->hasOneUse())
10546     return SDValue();
10547   int64_t C0 = N0C->getSExtValue();
10548   int64_t C1 = N1C->getSExtValue();
10549   int64_t CA, CB;
10550   if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
10551     return SDValue();
10552   // Search for proper CA (non-zero) and CB that both are simm12.
10553   if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
10554       !isInt<12>(C0 * (C1 / C0))) {
10555     CA = C1 / C0;
10556     CB = C1 % C0;
10557   } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
10558              isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
10559     CA = C1 / C0 + 1;
10560     CB = C1 % C0 - C0;
10561   } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
10562              isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
10563     CA = C1 / C0 - 1;
10564     CB = C1 % C0 + C0;
10565   } else
10566     return SDValue();
10567   // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
10568   SDLoc DL(N);
10569   SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
10570                              DAG.getConstant(CA, DL, VT));
10571   SDValue New1 =
10572       DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
10573   return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
10574 }
10575 
10576 // Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
10577 static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
10578   SDValue N0 = N->getOperand(0);
10579   SDValue N1 = N->getOperand(1);
10580   EVT VT = N->getValueType(0);
10581   SDLoc DL(N);
10582 
10583   // RHS should be -1.
10584   if (!isAllOnesConstant(N1))
10585     return SDValue();
10586 
10587   // Look for an (xor (setcc X, Y), 1).
10588   if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
10589       N0.getOperand(0).getOpcode() != ISD::SETCC)
10590     return SDValue();
10591 
10592   // Emit a negate of the setcc.
10593   return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10594                      N0.getOperand(0));
10595 }
10596 
10597 static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
10598                                  const RISCVSubtarget &Subtarget) {
10599   if (SDValue V = combineAddOfBooleanXor(N, DAG))
10600     return V;
10601   if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
10602     return V;
10603   if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
10604     return V;
10605   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10606     return V;
10607   // fold (add (select lhs, rhs, cc, 0, y), x) ->
10608   //      (select lhs, rhs, cc, x, (add x, y))
10609   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10610 }
10611 
10612 // Try to turn a sub boolean RHS and constant LHS into an addi.
10613 static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG) {
10614   SDValue N0 = N->getOperand(0);
10615   SDValue N1 = N->getOperand(1);
10616   EVT VT = N->getValueType(0);
10617   SDLoc DL(N);
10618 
10619   // Require a constant LHS.
10620   auto *N0C = dyn_cast<ConstantSDNode>(N0);
10621   if (!N0C)
10622     return SDValue();
10623 
10624   // All our optimizations involve subtracting 1 from the immediate and forming
10625   // an ADDI. Make sure the new immediate is valid for an ADDI.
10626   APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
10627   if (!ImmValMinus1.isSignedIntN(12))
10628     return SDValue();
10629 
10630   SDValue NewLHS;
10631   if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
10632     // (sub constant, (setcc x, y, eq/neq)) ->
10633     // (add (setcc x, y, neq/eq), constant - 1)
10634     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10635     EVT SetCCOpVT = N1.getOperand(0).getValueType();
10636     if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
10637       return SDValue();
10638     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
10639     NewLHS =
10640         DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
10641   } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
10642              N1.getOperand(0).getOpcode() == ISD::SETCC) {
10643     // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
10644     // Since setcc returns a bool the xor is equivalent to 1-setcc.
10645     NewLHS = N1.getOperand(0);
10646   } else
10647     return SDValue();
10648 
10649   SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
10650   return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
10651 }
10652 
10653 static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
10654                                  const RISCVSubtarget &Subtarget) {
10655   if (SDValue V = combineSubOfBoolean(N, DAG))
10656     return V;
10657 
10658   SDValue N0 = N->getOperand(0);
10659   SDValue N1 = N->getOperand(1);
10660   // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
10661   if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
10662       isNullConstant(N1.getOperand(1))) {
10663     ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
10664     if (CCVal == ISD::SETLT) {
10665       EVT VT = N->getValueType(0);
10666       SDLoc DL(N);
10667       unsigned ShAmt = N0.getValueSizeInBits() - 1;
10668       return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
10669                          DAG.getConstant(ShAmt, DL, VT));
10670     }
10671   }
10672 
10673   // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
10674   //      (select lhs, rhs, cc, x, (sub x, y))
10675   return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
10676 }
10677 
10678 // Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
10679 // Legalizing setcc can introduce xors like this. Doing this transform reduces
10680 // the number of xors and may allow the xor to fold into a branch condition.
10681 static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG) {
10682   SDValue N0 = N->getOperand(0);
10683   SDValue N1 = N->getOperand(1);
10684   bool IsAnd = N->getOpcode() == ISD::AND;
10685 
10686   if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
10687     return SDValue();
10688 
10689   if (!N0.hasOneUse() || !N1.hasOneUse())
10690     return SDValue();
10691 
10692   SDValue N01 = N0.getOperand(1);
10693   SDValue N11 = N1.getOperand(1);
10694 
10695   // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
10696   // (xor X, -1) based on the upper bits of the other operand being 0. If the
10697   // operation is And, allow one of the Xors to use -1.
10698   if (isOneConstant(N01)) {
10699     if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
10700       return SDValue();
10701   } else if (isOneConstant(N11)) {
10702     // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
10703     if (!(IsAnd && isAllOnesConstant(N01)))
10704       return SDValue();
10705   } else
10706     return SDValue();
10707 
10708   EVT VT = N->getValueType(0);
10709 
10710   SDValue N00 = N0.getOperand(0);
10711   SDValue N10 = N1.getOperand(0);
10712 
10713   // The LHS of the xors needs to be 0/1.
10714   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
10715   if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
10716     return SDValue();
10717 
10718   // Invert the opcode and insert a new xor.
10719   SDLoc DL(N);
10720   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
10721   SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
10722   return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
10723 }
10724 
10725 static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
10726                                       const RISCVSubtarget &Subtarget) {
10727   SDValue N0 = N->getOperand(0);
10728   EVT VT = N->getValueType(0);
10729 
10730   // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
10731   // extending X. This is safe since we only need the LSB after the shift and
10732   // shift amounts larger than 31 would produce poison. If we wait until
10733   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10734   // to use a BEXT instruction.
10735   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
10736       N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
10737       !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
10738     SDLoc DL(N0);
10739     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10740     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10741     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10742     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
10743   }
10744 
10745   return SDValue();
10746 }
10747 
10748 // Combines two comparison operation and logic operation to one selection
10749 // operation(min, max) and logic operation. Returns new constructed Node if
10750 // conditions for optimization are satisfied.
10751 static SDValue performANDCombine(SDNode *N,
10752                                  TargetLowering::DAGCombinerInfo &DCI,
10753                                  const RISCVSubtarget &Subtarget) {
10754   SelectionDAG &DAG = DCI.DAG;
10755 
10756   SDValue N0 = N->getOperand(0);
10757   // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
10758   // extending X. This is safe since we only need the LSB after the shift and
10759   // shift amounts larger than 31 would produce poison. If we wait until
10760   // type legalization, we'll create RISCVISD::SRLW and we can't recover it
10761   // to use a BEXT instruction.
10762   if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
10763       N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
10764       N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
10765       N0.hasOneUse()) {
10766     SDLoc DL(N);
10767     SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
10768     SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
10769     SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
10770     SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
10771                               DAG.getConstant(1, DL, MVT::i64));
10772     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
10773   }
10774 
10775   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10776     return V;
10777 
10778   if (DCI.isAfterLegalizeDAG())
10779     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10780       return V;
10781 
10782   // fold (and (select lhs, rhs, cc, -1, y), x) ->
10783   //      (select lhs, rhs, cc, x, (and x, y))
10784   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
10785 }
10786 
10787 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
10788                                 const RISCVSubtarget &Subtarget) {
10789   SelectionDAG &DAG = DCI.DAG;
10790 
10791   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10792     return V;
10793 
10794   if (DCI.isAfterLegalizeDAG())
10795     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
10796       return V;
10797 
10798   // fold (or (select cond, 0, y), x) ->
10799   //      (select cond, x, (or x, y))
10800   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10801 }
10802 
10803 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
10804                                  const RISCVSubtarget &Subtarget) {
10805   SDValue N0 = N->getOperand(0);
10806   SDValue N1 = N->getOperand(1);
10807 
10808   // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
10809   // NOTE: Assumes ROL being legal means ROLW is legal.
10810   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10811   if (N0.getOpcode() == RISCVISD::SLLW &&
10812       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
10813       TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
10814     SDLoc DL(N);
10815     return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
10816                        DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
10817   }
10818 
10819   // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
10820   if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
10821     auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
10822     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10823     if (ConstN00 && CC == ISD::SETLT) {
10824       EVT VT = N0.getValueType();
10825       SDLoc DL(N0);
10826       const APInt &Imm = ConstN00->getAPIntValue();
10827       if ((Imm + 1).isSignedIntN(12))
10828         return DAG.getSetCC(DL, VT, N0.getOperand(1),
10829                             DAG.getConstant(Imm + 1, DL, VT), CC);
10830     }
10831   }
10832 
10833   if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
10834     return V;
10835   // fold (xor (select cond, 0, y), x) ->
10836   //      (select cond, x, (xor x, y))
10837   return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
10838 }
10839 
10840 // According to the property that indexed load/store instructions
10841 // zero-extended their indices, \p narrowIndex tries to narrow the type of index
10842 // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
10843 // bits(ty).
10844 static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
10845   if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
10846     return SDValue();
10847 
10848   SDValue N0 = N.getOperand(0);
10849   if (N0.getOpcode() != ISD::ZERO_EXTEND &&
10850       N0.getOpcode() != RISCVISD::VZEXT_VL)
10851     return SDValue();
10852   if (!N0->hasOneUse())
10853     return SDValue();
10854 
10855   APInt ShAmt;
10856   SDValue N1 = N.getOperand(1);
10857   if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
10858     return SDValue();
10859 
10860   SDLoc DL(N);
10861   SDValue Src = N0.getOperand(0);
10862   EVT SrcVT = Src.getValueType();
10863   unsigned SrcElen = SrcVT.getScalarSizeInBits();
10864   unsigned ShAmtV = ShAmt.getZExtValue();
10865   unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
10866   NewElen = std::max(NewElen, 8U);
10867 
10868   // Skip if NewElen is not narrower than the original extended type.
10869   if (NewElen >= N0.getValueType().getScalarSizeInBits())
10870     return SDValue();
10871 
10872   EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
10873   EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
10874 
10875   SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
10876   SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
10877   return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
10878 }
10879 
10880 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
10881 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
10882 // bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
10883 // can become a sext.w instead of a shift pair.
10884 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
10885                                    const RISCVSubtarget &Subtarget) {
10886   SDValue N0 = N->getOperand(0);
10887   SDValue N1 = N->getOperand(1);
10888   EVT VT = N->getValueType(0);
10889   EVT OpVT = N0.getValueType();
10890 
10891   if (OpVT != MVT::i64 || !Subtarget.is64Bit())
10892     return SDValue();
10893 
10894   // RHS needs to be a constant.
10895   auto *N1C = dyn_cast<ConstantSDNode>(N1);
10896   if (!N1C)
10897     return SDValue();
10898 
10899   // LHS needs to be (and X, 0xffffffff).
10900   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
10901       !isa<ConstantSDNode>(N0.getOperand(1)) ||
10902       N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
10903     return SDValue();
10904 
10905   // Looking for an equality compare.
10906   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10907   if (!isIntEqualitySetCC(Cond))
10908     return SDValue();
10909 
10910   // Don't do this if the sign bit is provably zero, it will be turned back into
10911   // an AND.
10912   APInt SignMask = APInt::getOneBitSet(64, 31);
10913   if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
10914     return SDValue();
10915 
10916   const APInt &C1 = N1C->getAPIntValue();
10917 
10918   SDLoc dl(N);
10919   // If the constant is larger than 2^32 - 1 it is impossible for both sides
10920   // to be equal.
10921   if (C1.getActiveBits() > 32)
10922     return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
10923 
10924   SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
10925                                N0.getOperand(0), DAG.getValueType(MVT::i32));
10926   return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
10927                                                       dl, OpVT), Cond);
10928 }
10929 
10930 static SDValue
10931 performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
10932                                 const RISCVSubtarget &Subtarget) {
10933   SDValue Src = N->getOperand(0);
10934   EVT VT = N->getValueType(0);
10935 
10936   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
10937   if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
10938       cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
10939     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
10940                        Src.getOperand(0));
10941 
10942   return SDValue();
10943 }
10944 
10945 namespace {
10946 // Forward declaration of the structure holding the necessary information to
10947 // apply a combine.
10948 struct CombineResult;
10949 
10950 /// Helper class for folding sign/zero extensions.
10951 /// In particular, this class is used for the following combines:
10952 /// add_vl -> vwadd(u) | vwadd(u)_w
10953 /// sub_vl -> vwsub(u) | vwsub(u)_w
10954 /// mul_vl -> vwmul(u) | vwmul_su
10955 ///
10956 /// An object of this class represents an operand of the operation we want to
10957 /// combine.
10958 /// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
10959 /// NodeExtensionHelper for `a` and one for `b`.
10960 ///
10961 /// This class abstracts away how the extension is materialized and
10962 /// how its Mask, VL, number of users affect the combines.
10963 ///
10964 /// In particular:
10965 /// - VWADD_W is conceptually == add(op0, sext(op1))
10966 /// - VWADDU_W == add(op0, zext(op1))
10967 /// - VWSUB_W == sub(op0, sext(op1))
10968 /// - VWSUBU_W == sub(op0, zext(op1))
10969 ///
10970 /// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
10971 /// zext|sext(smaller_value).
10972 struct NodeExtensionHelper {
10973   /// Records if this operand is like being zero extended.
10974   bool SupportsZExt;
10975   /// Records if this operand is like being sign extended.
10976   /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
10977   /// instance, a splat constant (e.g., 3), would support being both sign and
10978   /// zero extended.
10979   bool SupportsSExt;
10980   /// This boolean captures whether we care if this operand would still be
10981   /// around after the folding happens.
10982   bool EnforceOneUse;
10983   /// Records if this operand's mask needs to match the mask of the operation
10984   /// that it will fold into.
10985   bool CheckMask;
10986   /// Value of the Mask for this operand.
10987   /// It may be SDValue().
10988   SDValue Mask;
10989   /// Value of the vector length operand.
10990   /// It may be SDValue().
10991   SDValue VL;
10992   /// Original value that this NodeExtensionHelper represents.
10993   SDValue OrigOperand;
10994 
10995   /// Get the value feeding the extension or the value itself.
10996   /// E.g., for zext(a), this would return a.
10997   SDValue getSource() const {
10998     switch (OrigOperand.getOpcode()) {
10999     case RISCVISD::VSEXT_VL:
11000     case RISCVISD::VZEXT_VL:
11001       return OrigOperand.getOperand(0);
11002     default:
11003       return OrigOperand;
11004     }
11005   }
11006 
11007   /// Check if this instance represents a splat.
11008   bool isSplat() const {
11009     return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
11010   }
11011 
11012   /// Get or create a value that can feed \p Root with the given extension \p
11013   /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
11014   /// \see ::getSource().
11015   SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
11016                                 std::optional<bool> SExt) const {
11017     if (!SExt.has_value())
11018       return OrigOperand;
11019 
11020     MVT NarrowVT = getNarrowType(Root);
11021 
11022     SDValue Source = getSource();
11023     if (Source.getValueType() == NarrowVT)
11024       return Source;
11025 
11026     unsigned ExtOpc = *SExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
11027 
11028     // If we need an extension, we should be changing the type.
11029     SDLoc DL(Root);
11030     auto [Mask, VL] = getMaskAndVL(Root);
11031     switch (OrigOperand.getOpcode()) {
11032     case RISCVISD::VSEXT_VL:
11033     case RISCVISD::VZEXT_VL:
11034       return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
11035     case RISCVISD::VMV_V_X_VL:
11036       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
11037                          DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
11038     default:
11039       // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
11040       // and that operand should already have the right NarrowVT so no
11041       // extension should be required at this point.
11042       llvm_unreachable("Unsupported opcode");
11043     }
11044   }
11045 
11046   /// Helper function to get the narrow type for \p Root.
11047   /// The narrow type is the type of \p Root where we divided the size of each
11048   /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
11049   /// \pre The size of the type of the elements of Root must be a multiple of 2
11050   /// and be greater than 16.
11051   static MVT getNarrowType(const SDNode *Root) {
11052     MVT VT = Root->getSimpleValueType(0);
11053 
11054     // Determine the narrow size.
11055     unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11056     assert(NarrowSize >= 8 && "Trying to extend something we can't represent");
11057     MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
11058                                     VT.getVectorElementCount());
11059     return NarrowVT;
11060   }
11061 
11062   /// Return the opcode required to materialize the folding of the sign
11063   /// extensions (\p IsSExt == true) or zero extensions (IsSExt == false) for
11064   /// both operands for \p Opcode.
11065   /// Put differently, get the opcode to materialize:
11066   /// - ISExt == true: \p Opcode(sext(a), sext(b)) -> newOpcode(a, b)
11067   /// - ISExt == false: \p Opcode(zext(a), zext(b)) -> newOpcode(a, b)
11068   /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
11069   static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
11070     switch (Opcode) {
11071     case RISCVISD::ADD_VL:
11072     case RISCVISD::VWADD_W_VL:
11073     case RISCVISD::VWADDU_W_VL:
11074       return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
11075     case RISCVISD::MUL_VL:
11076       return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
11077     case RISCVISD::SUB_VL:
11078     case RISCVISD::VWSUB_W_VL:
11079     case RISCVISD::VWSUBU_W_VL:
11080       return IsSExt ? RISCVISD::VWSUB_VL : RISCVISD::VWSUBU_VL;
11081     default:
11082       llvm_unreachable("Unexpected opcode");
11083     }
11084   }
11085 
11086   /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
11087   /// newOpcode(a, b).
11088   static unsigned getSUOpcode(unsigned Opcode) {
11089     assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
11090     return RISCVISD::VWMULSU_VL;
11091   }
11092 
11093   /// Get the opcode to materialize \p Opcode(a, s|zext(b)) ->
11094   /// newOpcode(a, b).
11095   static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
11096     switch (Opcode) {
11097     case RISCVISD::ADD_VL:
11098       return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
11099     case RISCVISD::SUB_VL:
11100       return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
11101     default:
11102       llvm_unreachable("Unexpected opcode");
11103     }
11104   }
11105 
11106   using CombineToTry = std::function<std::optional<CombineResult>(
11107       SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
11108       const NodeExtensionHelper & /*RHS*/)>;
11109 
11110   /// Check if this node needs to be fully folded or extended for all users.
11111   bool needToPromoteOtherUsers() const { return EnforceOneUse; }
11112 
11113   /// Helper method to set the various fields of this struct based on the
11114   /// type of \p Root.
11115   void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
11116     SupportsZExt = false;
11117     SupportsSExt = false;
11118     EnforceOneUse = true;
11119     CheckMask = true;
11120     switch (OrigOperand.getOpcode()) {
11121     case RISCVISD::VZEXT_VL:
11122       SupportsZExt = true;
11123       Mask = OrigOperand.getOperand(1);
11124       VL = OrigOperand.getOperand(2);
11125       break;
11126     case RISCVISD::VSEXT_VL:
11127       SupportsSExt = true;
11128       Mask = OrigOperand.getOperand(1);
11129       VL = OrigOperand.getOperand(2);
11130       break;
11131     case RISCVISD::VMV_V_X_VL: {
11132       // Historically, we didn't care about splat values not disappearing during
11133       // combines.
11134       EnforceOneUse = false;
11135       CheckMask = false;
11136       VL = OrigOperand.getOperand(2);
11137 
11138       // The operand is a splat of a scalar.
11139 
11140       // The pasthru must be undef for tail agnostic.
11141       if (!OrigOperand.getOperand(0).isUndef())
11142         break;
11143 
11144       // Get the scalar value.
11145       SDValue Op = OrigOperand.getOperand(1);
11146 
11147       // See if we have enough sign bits or zero bits in the scalar to use a
11148       // widening opcode by splatting to smaller element size.
11149       MVT VT = Root->getSimpleValueType(0);
11150       unsigned EltBits = VT.getScalarSizeInBits();
11151       unsigned ScalarBits = Op.getValueSizeInBits();
11152       // Make sure we're getting all element bits from the scalar register.
11153       // FIXME: Support implicit sign extension of vmv.v.x?
11154       if (ScalarBits < EltBits)
11155         break;
11156 
11157       unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
11158       // If the narrow type cannot be expressed with a legal VMV,
11159       // this is not a valid candidate.
11160       if (NarrowSize < 8)
11161         break;
11162 
11163       if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
11164         SupportsSExt = true;
11165       if (DAG.MaskedValueIsZero(Op,
11166                                 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
11167         SupportsZExt = true;
11168       break;
11169     }
11170     default:
11171       break;
11172     }
11173   }
11174 
11175   /// Check if \p Root supports any extension folding combines.
11176   static bool isSupportedRoot(const SDNode *Root) {
11177     switch (Root->getOpcode()) {
11178     case RISCVISD::ADD_VL:
11179     case RISCVISD::MUL_VL:
11180     case RISCVISD::VWADD_W_VL:
11181     case RISCVISD::VWADDU_W_VL:
11182     case RISCVISD::SUB_VL:
11183     case RISCVISD::VWSUB_W_VL:
11184     case RISCVISD::VWSUBU_W_VL:
11185       return true;
11186     default:
11187       return false;
11188     }
11189   }
11190 
11191   /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
11192   NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
11193     assert(isSupportedRoot(Root) && "Trying to build an helper with an "
11194                                     "unsupported root");
11195     assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
11196     OrigOperand = Root->getOperand(OperandIdx);
11197 
11198     unsigned Opc = Root->getOpcode();
11199     switch (Opc) {
11200     // We consider VW<ADD|SUB>(U)_W(LHS, RHS) as if they were
11201     // <ADD|SUB>(LHS, S|ZEXT(RHS))
11202     case RISCVISD::VWADD_W_VL:
11203     case RISCVISD::VWADDU_W_VL:
11204     case RISCVISD::VWSUB_W_VL:
11205     case RISCVISD::VWSUBU_W_VL:
11206       if (OperandIdx == 1) {
11207         SupportsZExt =
11208             Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
11209         SupportsSExt = !SupportsZExt;
11210         std::tie(Mask, VL) = getMaskAndVL(Root);
11211         CheckMask = true;
11212         // There's no existing extension here, so we don't have to worry about
11213         // making sure it gets removed.
11214         EnforceOneUse = false;
11215         break;
11216       }
11217       [[fallthrough]];
11218     default:
11219       fillUpExtensionSupport(Root, DAG);
11220       break;
11221     }
11222   }
11223 
11224   /// Check if this operand is compatible with the given vector length \p VL.
11225   bool isVLCompatible(SDValue VL) const {
11226     return this->VL != SDValue() && this->VL == VL;
11227   }
11228 
11229   /// Check if this operand is compatible with the given \p Mask.
11230   bool isMaskCompatible(SDValue Mask) const {
11231     return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
11232   }
11233 
11234   /// Helper function to get the Mask and VL from \p Root.
11235   static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
11236     assert(isSupportedRoot(Root) && "Unexpected root");
11237     return std::make_pair(Root->getOperand(3), Root->getOperand(4));
11238   }
11239 
11240   /// Check if the Mask and VL of this operand are compatible with \p Root.
11241   bool areVLAndMaskCompatible(const SDNode *Root) const {
11242     auto [Mask, VL] = getMaskAndVL(Root);
11243     return isMaskCompatible(Mask) && isVLCompatible(VL);
11244   }
11245 
11246   /// Helper function to check if \p N is commutative with respect to the
11247   /// foldings that are supported by this class.
11248   static bool isCommutative(const SDNode *N) {
11249     switch (N->getOpcode()) {
11250     case RISCVISD::ADD_VL:
11251     case RISCVISD::MUL_VL:
11252     case RISCVISD::VWADD_W_VL:
11253     case RISCVISD::VWADDU_W_VL:
11254       return true;
11255     case RISCVISD::SUB_VL:
11256     case RISCVISD::VWSUB_W_VL:
11257     case RISCVISD::VWSUBU_W_VL:
11258       return false;
11259     default:
11260       llvm_unreachable("Unexpected opcode");
11261     }
11262   }
11263 
11264   /// Get a list of combine to try for folding extensions in \p Root.
11265   /// Note that each returned CombineToTry function doesn't actually modify
11266   /// anything. Instead they produce an optional CombineResult that if not None,
11267   /// need to be materialized for the combine to be applied.
11268   /// \see CombineResult::materialize.
11269   /// If the related CombineToTry function returns std::nullopt, that means the
11270   /// combine didn't match.
11271   static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
11272 };
11273 
11274 /// Helper structure that holds all the necessary information to materialize a
11275 /// combine that does some extension folding.
11276 struct CombineResult {
11277   /// Opcode to be generated when materializing the combine.
11278   unsigned TargetOpcode;
11279   // No value means no extension is needed. If extension is needed, the value
11280   // indicates if it needs to be sign extended.
11281   std::optional<bool> SExtLHS;
11282   std::optional<bool> SExtRHS;
11283   /// Root of the combine.
11284   SDNode *Root;
11285   /// LHS of the TargetOpcode.
11286   NodeExtensionHelper LHS;
11287   /// RHS of the TargetOpcode.
11288   NodeExtensionHelper RHS;
11289 
11290   CombineResult(unsigned TargetOpcode, SDNode *Root,
11291                 const NodeExtensionHelper &LHS, std::optional<bool> SExtLHS,
11292                 const NodeExtensionHelper &RHS, std::optional<bool> SExtRHS)
11293       : TargetOpcode(TargetOpcode), SExtLHS(SExtLHS), SExtRHS(SExtRHS),
11294         Root(Root), LHS(LHS), RHS(RHS) {}
11295 
11296   /// Return a value that uses TargetOpcode and that can be used to replace
11297   /// Root.
11298   /// The actual replacement is *not* done in that method.
11299   SDValue materialize(SelectionDAG &DAG) const {
11300     SDValue Mask, VL, Merge;
11301     std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
11302     Merge = Root->getOperand(2);
11303     return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
11304                        LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
11305                        RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
11306                        Mask, VL);
11307   }
11308 };
11309 
11310 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11311 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11312 /// are zext) and LHS and RHS can be folded into Root.
11313 /// AllowSExt and AllozZExt define which form `ext` can take in this pattern.
11314 ///
11315 /// \note If the pattern can match with both zext and sext, the returned
11316 /// CombineResult will feature the zext result.
11317 ///
11318 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11319 /// can be used to apply the pattern.
11320 static std::optional<CombineResult>
11321 canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
11322                                  const NodeExtensionHelper &RHS, bool AllowSExt,
11323                                  bool AllowZExt) {
11324   assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
11325   if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11326     return std::nullopt;
11327   if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
11328     return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11329                              Root->getOpcode(), /*IsSExt=*/false),
11330                          Root, LHS, /*SExtLHS=*/false, RHS,
11331                          /*SExtRHS=*/false);
11332   if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
11333     return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
11334                              Root->getOpcode(), /*IsSExt=*/true),
11335                          Root, LHS, /*SExtLHS=*/true, RHS,
11336                          /*SExtRHS=*/true);
11337   return std::nullopt;
11338 }
11339 
11340 /// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
11341 /// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
11342 /// are zext) and LHS and RHS can be folded into Root.
11343 ///
11344 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11345 /// can be used to apply the pattern.
11346 static std::optional<CombineResult>
11347 canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
11348                              const NodeExtensionHelper &RHS) {
11349   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11350                                           /*AllowZExt=*/true);
11351 }
11352 
11353 /// Check if \p Root follows a pattern Root(LHS, ext(RHS))
11354 ///
11355 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11356 /// can be used to apply the pattern.
11357 static std::optional<CombineResult>
11358 canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
11359               const NodeExtensionHelper &RHS) {
11360   if (!RHS.areVLAndMaskCompatible(Root))
11361     return std::nullopt;
11362 
11363   // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
11364   // sext/zext?
11365   // Control this behavior behind an option (AllowSplatInVW_W) for testing
11366   // purposes.
11367   if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
11368     return CombineResult(
11369         NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/false),
11370         Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/false);
11371   if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
11372     return CombineResult(
11373         NodeExtensionHelper::getWOpcode(Root->getOpcode(), /*IsSExt=*/true),
11374         Root, LHS, /*SExtLHS=*/std::nullopt, RHS, /*SExtRHS=*/true);
11375   return std::nullopt;
11376 }
11377 
11378 /// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
11379 ///
11380 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11381 /// can be used to apply the pattern.
11382 static std::optional<CombineResult>
11383 canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11384                     const NodeExtensionHelper &RHS) {
11385   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
11386                                           /*AllowZExt=*/false);
11387 }
11388 
11389 /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
11390 ///
11391 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11392 /// can be used to apply the pattern.
11393 static std::optional<CombineResult>
11394 canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
11395                     const NodeExtensionHelper &RHS) {
11396   return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
11397                                           /*AllowZExt=*/true);
11398 }
11399 
11400 /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
11401 ///
11402 /// \returns std::nullopt if the pattern doesn't match or a CombineResult that
11403 /// can be used to apply the pattern.
11404 static std::optional<CombineResult>
11405 canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
11406                const NodeExtensionHelper &RHS) {
11407   if (!LHS.SupportsSExt || !RHS.SupportsZExt)
11408     return std::nullopt;
11409   if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
11410     return std::nullopt;
11411   return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
11412                        Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
11413 }
11414 
11415 SmallVector<NodeExtensionHelper::CombineToTry>
11416 NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
11417   SmallVector<CombineToTry> Strategies;
11418   switch (Root->getOpcode()) {
11419   case RISCVISD::ADD_VL:
11420   case RISCVISD::SUB_VL:
11421     // add|sub -> vwadd(u)|vwsub(u)
11422     Strategies.push_back(canFoldToVWWithSameExtension);
11423     // add|sub -> vwadd(u)_w|vwsub(u)_w
11424     Strategies.push_back(canFoldToVW_W);
11425     break;
11426   case RISCVISD::MUL_VL:
11427     // mul -> vwmul(u)
11428     Strategies.push_back(canFoldToVWWithSameExtension);
11429     // mul -> vwmulsu
11430     Strategies.push_back(canFoldToVW_SU);
11431     break;
11432   case RISCVISD::VWADD_W_VL:
11433   case RISCVISD::VWSUB_W_VL:
11434     // vwadd_w|vwsub_w -> vwadd|vwsub
11435     Strategies.push_back(canFoldToVWWithSEXT);
11436     break;
11437   case RISCVISD::VWADDU_W_VL:
11438   case RISCVISD::VWSUBU_W_VL:
11439     // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
11440     Strategies.push_back(canFoldToVWWithZEXT);
11441     break;
11442   default:
11443     llvm_unreachable("Unexpected opcode");
11444   }
11445   return Strategies;
11446 }
11447 } // End anonymous namespace.
11448 
11449 /// Combine a binary operation to its equivalent VW or VW_W form.
11450 /// The supported combines are:
11451 /// add_vl -> vwadd(u) | vwadd(u)_w
11452 /// sub_vl -> vwsub(u) | vwsub(u)_w
11453 /// mul_vl -> vwmul(u) | vwmul_su
11454 /// vwadd_w(u) -> vwadd(u)
11455 /// vwub_w(u) -> vwadd(u)
11456 static SDValue
11457 combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
11458   SelectionDAG &DAG = DCI.DAG;
11459 
11460   assert(NodeExtensionHelper::isSupportedRoot(N) &&
11461          "Shouldn't have called this method");
11462   SmallVector<SDNode *> Worklist;
11463   SmallSet<SDNode *, 8> Inserted;
11464   Worklist.push_back(N);
11465   Inserted.insert(N);
11466   SmallVector<CombineResult> CombinesToApply;
11467 
11468   while (!Worklist.empty()) {
11469     SDNode *Root = Worklist.pop_back_val();
11470     if (!NodeExtensionHelper::isSupportedRoot(Root))
11471       return SDValue();
11472 
11473     NodeExtensionHelper LHS(N, 0, DAG);
11474     NodeExtensionHelper RHS(N, 1, DAG);
11475     auto AppendUsersIfNeeded = [&Worklist,
11476                                 &Inserted](const NodeExtensionHelper &Op) {
11477       if (Op.needToPromoteOtherUsers()) {
11478         for (SDNode *TheUse : Op.OrigOperand->uses()) {
11479           if (Inserted.insert(TheUse).second)
11480             Worklist.push_back(TheUse);
11481         }
11482       }
11483     };
11484 
11485     // Control the compile time by limiting the number of node we look at in
11486     // total.
11487     if (Inserted.size() > ExtensionMaxWebSize)
11488       return SDValue();
11489 
11490     SmallVector<NodeExtensionHelper::CombineToTry> FoldingStrategies =
11491         NodeExtensionHelper::getSupportedFoldings(N);
11492 
11493     assert(!FoldingStrategies.empty() && "Nothing to be folded");
11494     bool Matched = false;
11495     for (int Attempt = 0;
11496          (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
11497          ++Attempt) {
11498 
11499       for (NodeExtensionHelper::CombineToTry FoldingStrategy :
11500            FoldingStrategies) {
11501         std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
11502         if (Res) {
11503           Matched = true;
11504           CombinesToApply.push_back(*Res);
11505           // All the inputs that are extended need to be folded, otherwise
11506           // we would be leaving the old input (since it is may still be used),
11507           // and the new one.
11508           if (Res->SExtLHS.has_value())
11509             AppendUsersIfNeeded(LHS);
11510           if (Res->SExtRHS.has_value())
11511             AppendUsersIfNeeded(RHS);
11512           break;
11513         }
11514       }
11515       std::swap(LHS, RHS);
11516     }
11517     // Right now we do an all or nothing approach.
11518     if (!Matched)
11519       return SDValue();
11520   }
11521   // Store the value for the replacement of the input node separately.
11522   SDValue InputRootReplacement;
11523   // We do the RAUW after we materialize all the combines, because some replaced
11524   // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
11525   // some of these nodes may appear in the NodeExtensionHelpers of some of the
11526   // yet-to-be-visited CombinesToApply roots.
11527   SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
11528   ValuesToReplace.reserve(CombinesToApply.size());
11529   for (CombineResult Res : CombinesToApply) {
11530     SDValue NewValue = Res.materialize(DAG);
11531     if (!InputRootReplacement) {
11532       assert(Res.Root == N &&
11533              "First element is expected to be the current node");
11534       InputRootReplacement = NewValue;
11535     } else {
11536       ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
11537     }
11538   }
11539   for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
11540     DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
11541     DCI.AddToWorklist(OldNewValues.second.getNode());
11542   }
11543   return InputRootReplacement;
11544 }
11545 
11546 // Helper function for performMemPairCombine.
11547 // Try to combine the memory loads/stores LSNode1 and LSNode2
11548 // into a single memory pair operation.
11549 static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
11550                                  LSBaseSDNode *LSNode2, SDValue BasePtr,
11551                                  uint64_t Imm) {
11552   SmallPtrSet<const SDNode *, 32> Visited;
11553   SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
11554 
11555   if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
11556       SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
11557     return SDValue();
11558 
11559   MachineFunction &MF = DAG.getMachineFunction();
11560   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11561 
11562   // The new operation has twice the width.
11563   MVT XLenVT = Subtarget.getXLenVT();
11564   EVT MemVT = LSNode1->getMemoryVT();
11565   EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
11566   MachineMemOperand *MMO = LSNode1->getMemOperand();
11567   MachineMemOperand *NewMMO = MF.getMachineMemOperand(
11568       MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
11569 
11570   if (LSNode1->getOpcode() == ISD::LOAD) {
11571     auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
11572     unsigned Opcode;
11573     if (MemVT == MVT::i32)
11574       Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
11575     else
11576       Opcode = RISCVISD::TH_LDD;
11577 
11578     SDValue Res = DAG.getMemIntrinsicNode(
11579         Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
11580         {LSNode1->getChain(), BasePtr,
11581          DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11582         NewMemVT, NewMMO);
11583 
11584     SDValue Node1 =
11585         DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
11586     SDValue Node2 =
11587         DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
11588 
11589     DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
11590     return Node1;
11591   } else {
11592     unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
11593 
11594     SDValue Res = DAG.getMemIntrinsicNode(
11595         Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
11596         {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
11597          BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
11598         NewMemVT, NewMMO);
11599 
11600     DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
11601     return Res;
11602   }
11603 }
11604 
11605 // Try to combine two adjacent loads/stores to a single pair instruction from
11606 // the XTHeadMemPair vendor extension.
11607 static SDValue performMemPairCombine(SDNode *N,
11608                                      TargetLowering::DAGCombinerInfo &DCI) {
11609   SelectionDAG &DAG = DCI.DAG;
11610   MachineFunction &MF = DAG.getMachineFunction();
11611   const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
11612 
11613   // Target does not support load/store pair.
11614   if (!Subtarget.hasVendorXTHeadMemPair())
11615     return SDValue();
11616 
11617   LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
11618   EVT MemVT = LSNode1->getMemoryVT();
11619   unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
11620 
11621   // No volatile, indexed or atomic loads/stores.
11622   if (!LSNode1->isSimple() || LSNode1->isIndexed())
11623     return SDValue();
11624 
11625   // Function to get a base + constant representation from a memory value.
11626   auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
11627     if (Ptr->getOpcode() == ISD::ADD)
11628       if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
11629         return {Ptr->getOperand(0), C1->getZExtValue()};
11630     return {Ptr, 0};
11631   };
11632 
11633   auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
11634 
11635   SDValue Chain = N->getOperand(0);
11636   for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
11637        UI != UE; ++UI) {
11638     SDUse &Use = UI.getUse();
11639     if (Use.getUser() != N && Use.getResNo() == 0 &&
11640         Use.getUser()->getOpcode() == N->getOpcode()) {
11641       LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
11642 
11643       // No volatile, indexed or atomic loads/stores.
11644       if (!LSNode2->isSimple() || LSNode2->isIndexed())
11645         continue;
11646 
11647       // Check if LSNode1 and LSNode2 have the same type and extension.
11648       if (LSNode1->getOpcode() == ISD::LOAD)
11649         if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
11650             cast<LoadSDNode>(LSNode1)->getExtensionType())
11651           continue;
11652 
11653       if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
11654         continue;
11655 
11656       auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
11657 
11658       // Check if the base pointer is the same for both instruction.
11659       if (Base1 != Base2)
11660         continue;
11661 
11662       // Check if the offsets match the XTHeadMemPair encoding contraints.
11663       bool Valid = false;
11664       if (MemVT == MVT::i32) {
11665         // Check for adjacent i32 values and a 2-bit index.
11666         if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
11667           Valid = true;
11668       } else if (MemVT == MVT::i64) {
11669         // Check for adjacent i64 values and a 2-bit index.
11670         if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
11671           Valid = true;
11672       }
11673 
11674       if (!Valid)
11675         continue;
11676 
11677       // Try to combine.
11678       if (SDValue Res =
11679               tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
11680         return Res;
11681     }
11682   }
11683 
11684   return SDValue();
11685 }
11686 
11687 // Fold
11688 //   (fp_to_int (froundeven X)) -> fcvt X, rne
11689 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
11690 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
11691 //   (fp_to_int (fceil X))      -> fcvt X, rup
11692 //   (fp_to_int (fround X))     -> fcvt X, rmm
11693 static SDValue performFP_TO_INTCombine(SDNode *N,
11694                                        TargetLowering::DAGCombinerInfo &DCI,
11695                                        const RISCVSubtarget &Subtarget) {
11696   SelectionDAG &DAG = DCI.DAG;
11697   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11698   MVT XLenVT = Subtarget.getXLenVT();
11699 
11700   SDValue Src = N->getOperand(0);
11701 
11702   // Don't do this for strict-fp Src.
11703   if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11704     return SDValue();
11705 
11706   // Ensure the FP type is legal.
11707   if (!TLI.isTypeLegal(Src.getValueType()))
11708     return SDValue();
11709 
11710   // Don't do this for f16 with Zfhmin and not Zfh.
11711   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11712     return SDValue();
11713 
11714   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11715   // If the result is invalid, we didn't find a foldable instruction.
11716   // If the result is dynamic, then we found an frint which we don't yet
11717   // support. It will cause 7 to be written to the FRM CSR for vector.
11718   // FIXME: We could support this by using VFCVT_X_F_VL/VFCVT_XU_F_VL below.
11719   if (FRM == RISCVFPRndMode::Invalid || FRM == RISCVFPRndMode::DYN)
11720     return SDValue();
11721 
11722   SDLoc DL(N);
11723   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
11724   EVT VT = N->getValueType(0);
11725 
11726   if (VT.isVector() && TLI.isTypeLegal(VT)) {
11727     MVT SrcVT = Src.getSimpleValueType();
11728     MVT SrcContainerVT = SrcVT;
11729     MVT ContainerVT = VT.getSimpleVT();
11730     SDValue XVal = Src.getOperand(0);
11731 
11732     // For widening and narrowing conversions we just combine it into a
11733     // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
11734     // end up getting lowered to their appropriate pseudo instructions based on
11735     // their operand types
11736     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
11737         VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
11738       return SDValue();
11739 
11740     // Make fixed-length vectors scalable first
11741     if (SrcVT.isFixedLengthVector()) {
11742       SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
11743       XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
11744       ContainerVT =
11745           getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
11746     }
11747 
11748     auto [Mask, VL] =
11749         getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
11750 
11751     SDValue FpToInt;
11752     if (FRM == RISCVFPRndMode::RTZ) {
11753       // Use the dedicated trunc static rounding mode if we're truncating so we
11754       // don't need to generate calls to fsrmi/fsrm
11755       unsigned Opc =
11756           IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
11757       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
11758     } else {
11759       unsigned Opc =
11760           IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
11761       FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
11762                             DAG.getTargetConstant(FRM, DL, XLenVT), VL);
11763     }
11764 
11765     // If converted from fixed-length to scalable, convert back
11766     if (VT.isFixedLengthVector())
11767       FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
11768 
11769     return FpToInt;
11770   }
11771 
11772   // Only handle XLen or i32 types. Other types narrower than XLen will
11773   // eventually be legalized to XLenVT.
11774   if (VT != MVT::i32 && VT != XLenVT)
11775     return SDValue();
11776 
11777   unsigned Opc;
11778   if (VT == XLenVT)
11779     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11780   else
11781     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11782 
11783   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
11784                                 DAG.getTargetConstant(FRM, DL, XLenVT));
11785   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
11786 }
11787 
11788 // Fold
11789 //   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
11790 //   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
11791 //   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
11792 //   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
11793 //   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
11794 static SDValue performFP_TO_INT_SATCombine(SDNode *N,
11795                                        TargetLowering::DAGCombinerInfo &DCI,
11796                                        const RISCVSubtarget &Subtarget) {
11797   SelectionDAG &DAG = DCI.DAG;
11798   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11799   MVT XLenVT = Subtarget.getXLenVT();
11800 
11801   // Only handle XLen types. Other types narrower than XLen will eventually be
11802   // legalized to XLenVT.
11803   EVT DstVT = N->getValueType(0);
11804   if (DstVT != XLenVT)
11805     return SDValue();
11806 
11807   SDValue Src = N->getOperand(0);
11808 
11809   // Don't do this for strict-fp Src.
11810   if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
11811     return SDValue();
11812 
11813   // Ensure the FP type is also legal.
11814   if (!TLI.isTypeLegal(Src.getValueType()))
11815     return SDValue();
11816 
11817   // Don't do this for f16 with Zfhmin and not Zfh.
11818   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
11819     return SDValue();
11820 
11821   EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11822 
11823   RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
11824   if (FRM == RISCVFPRndMode::Invalid)
11825     return SDValue();
11826 
11827   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
11828 
11829   unsigned Opc;
11830   if (SatVT == DstVT)
11831     Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
11832   else if (DstVT == MVT::i64 && SatVT == MVT::i32)
11833     Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11834   else
11835     return SDValue();
11836   // FIXME: Support other SatVTs by clamping before or after the conversion.
11837 
11838   Src = Src.getOperand(0);
11839 
11840   SDLoc DL(N);
11841   SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
11842                                 DAG.getTargetConstant(FRM, DL, XLenVT));
11843 
11844   // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
11845   // extend.
11846   if (Opc == RISCVISD::FCVT_WU_RV64)
11847     FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
11848 
11849   // RISC-V FP-to-int conversions saturate to the destination register size, but
11850   // don't produce 0 for nan.
11851   SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
11852   return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
11853 }
11854 
11855 // Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
11856 // smaller than XLenVT.
11857 static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
11858                                         const RISCVSubtarget &Subtarget) {
11859   assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
11860 
11861   SDValue Src = N->getOperand(0);
11862   if (Src.getOpcode() != ISD::BSWAP)
11863     return SDValue();
11864 
11865   EVT VT = N->getValueType(0);
11866   if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
11867       !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
11868     return SDValue();
11869 
11870   SDLoc DL(N);
11871   return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
11872 }
11873 
11874 // Convert from one FMA opcode to another based on whether we are negating the
11875 // multiply result and/or the accumulator.
11876 // NOTE: Only supports RVV operations with VL.
11877 static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
11878   // Negating the multiply result changes ADD<->SUB and toggles 'N'.
11879   if (NegMul) {
11880     // clang-format off
11881     switch (Opcode) {
11882     default: llvm_unreachable("Unexpected opcode");
11883     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFNMSUB_VL; break;
11884     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL;  break;
11885     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL;  break;
11886     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFNMADD_VL; break;
11887     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
11888     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
11889     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
11890     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
11891     }
11892     // clang-format on
11893   }
11894 
11895   // Negating the accumulator changes ADD<->SUB.
11896   if (NegAcc) {
11897     // clang-format off
11898     switch (Opcode) {
11899     default: llvm_unreachable("Unexpected opcode");
11900     case RISCVISD::VFMADD_VL:  Opcode = RISCVISD::VFMSUB_VL;  break;
11901     case RISCVISD::VFMSUB_VL:  Opcode = RISCVISD::VFMADD_VL;  break;
11902     case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
11903     case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
11904     case RISCVISD::STRICT_VFMADD_VL:  Opcode = RISCVISD::STRICT_VFMSUB_VL;  break;
11905     case RISCVISD::STRICT_VFMSUB_VL:  Opcode = RISCVISD::STRICT_VFMADD_VL;  break;
11906     case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
11907     case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
11908     }
11909     // clang-format on
11910   }
11911 
11912   return Opcode;
11913 }
11914 
11915 static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
11916   // Fold FNEG_VL into FMA opcodes.
11917   // The first operand of strict-fp is chain.
11918   unsigned Offset = N->isTargetStrictFPOpcode();
11919   SDValue A = N->getOperand(0 + Offset);
11920   SDValue B = N->getOperand(1 + Offset);
11921   SDValue C = N->getOperand(2 + Offset);
11922   SDValue Mask = N->getOperand(3 + Offset);
11923   SDValue VL = N->getOperand(4 + Offset);
11924 
11925   auto invertIfNegative = [&Mask, &VL](SDValue &V) {
11926     if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
11927         V.getOperand(2) == VL) {
11928       // Return the negated input.
11929       V = V.getOperand(0);
11930       return true;
11931     }
11932 
11933     return false;
11934   };
11935 
11936   bool NegA = invertIfNegative(A);
11937   bool NegB = invertIfNegative(B);
11938   bool NegC = invertIfNegative(C);
11939 
11940   // If no operands are negated, we're done.
11941   if (!NegA && !NegB && !NegC)
11942     return SDValue();
11943 
11944   unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
11945   if (N->isTargetStrictFPOpcode())
11946     return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
11947                        {N->getOperand(0), A, B, C, Mask, VL});
11948   return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
11949                      VL);
11950 }
11951 
11952 static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
11953   if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
11954     return V;
11955 
11956   // FIXME: Ignore strict opcodes for now.
11957   if (N->isTargetStrictFPOpcode())
11958     return SDValue();
11959 
11960   // Try to form widening FMA.
11961   SDValue Op0 = N->getOperand(0);
11962   SDValue Op1 = N->getOperand(1);
11963   SDValue Mask = N->getOperand(3);
11964   SDValue VL = N->getOperand(4);
11965 
11966   if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
11967       Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
11968     return SDValue();
11969 
11970   // TODO: Refactor to handle more complex cases similar to
11971   // combineBinOp_VLToVWBinOp_VL.
11972   if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
11973       (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
11974     return SDValue();
11975 
11976   // Check the mask and VL are the same.
11977   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
11978       Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
11979     return SDValue();
11980 
11981   unsigned NewOpc;
11982   switch (N->getOpcode()) {
11983   default:
11984     llvm_unreachable("Unexpected opcode");
11985   case RISCVISD::VFMADD_VL:
11986     NewOpc = RISCVISD::VFWMADD_VL;
11987     break;
11988   case RISCVISD::VFNMSUB_VL:
11989     NewOpc = RISCVISD::VFWNMSUB_VL;
11990     break;
11991   case RISCVISD::VFNMADD_VL:
11992     NewOpc = RISCVISD::VFWNMADD_VL;
11993     break;
11994   case RISCVISD::VFMSUB_VL:
11995     NewOpc = RISCVISD::VFWMSUB_VL;
11996     break;
11997   }
11998 
11999   Op0 = Op0.getOperand(0);
12000   Op1 = Op1.getOperand(0);
12001 
12002   return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
12003                      N->getOperand(2), Mask, VL);
12004 }
12005 
12006 static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
12007   // FIXME: Ignore strict opcodes for now.
12008   assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
12009 
12010   // Try to form widening multiply.
12011   SDValue Op0 = N->getOperand(0);
12012   SDValue Op1 = N->getOperand(1);
12013   SDValue Merge = N->getOperand(2);
12014   SDValue Mask = N->getOperand(3);
12015   SDValue VL = N->getOperand(4);
12016 
12017   if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
12018       Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
12019     return SDValue();
12020 
12021   // TODO: Refactor to handle more complex cases similar to
12022   // combineBinOp_VLToVWBinOp_VL.
12023   if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
12024       (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
12025     return SDValue();
12026 
12027   // Check the mask and VL are the same.
12028   if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
12029       Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
12030     return SDValue();
12031 
12032   Op0 = Op0.getOperand(0);
12033   Op1 = Op1.getOperand(0);
12034 
12035   return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
12036                      Op1, Merge, Mask, VL);
12037 }
12038 
12039 static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) {
12040   SDValue Op0 = N->getOperand(0);
12041   SDValue Op1 = N->getOperand(1);
12042   SDValue Merge = N->getOperand(2);
12043   SDValue Mask = N->getOperand(3);
12044   SDValue VL = N->getOperand(4);
12045 
12046   bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
12047 
12048   // Look for foldable FP_EXTENDS.
12049   bool Op0IsExtend =
12050       Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
12051       (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
12052   bool Op1IsExtend =
12053       (Op0 == Op1 && Op0IsExtend) ||
12054       (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
12055 
12056   // Check the mask and VL.
12057   if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
12058     Op0IsExtend = false;
12059   if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
12060     Op1IsExtend = false;
12061 
12062   // Canonicalize.
12063   if (!Op1IsExtend) {
12064     // Sub requires at least operand 1 to be an extend.
12065     if (!IsAdd)
12066       return SDValue();
12067 
12068     // Add is commutable, if the other operand is foldable, swap them.
12069     if (!Op0IsExtend)
12070       return SDValue();
12071 
12072     std::swap(Op0, Op1);
12073     std::swap(Op0IsExtend, Op1IsExtend);
12074   }
12075 
12076   // Op1 is a foldable extend. Op0 might be foldable.
12077   Op1 = Op1.getOperand(0);
12078   if (Op0IsExtend)
12079     Op0 = Op0.getOperand(0);
12080 
12081   unsigned Opc;
12082   if (IsAdd)
12083     Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
12084   else
12085     Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
12086 
12087   return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
12088                      VL);
12089 }
12090 
12091 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
12092                                  const RISCVSubtarget &Subtarget) {
12093   assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
12094 
12095   if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
12096     return SDValue();
12097 
12098   if (!isa<ConstantSDNode>(N->getOperand(1)))
12099     return SDValue();
12100   uint64_t ShAmt = N->getConstantOperandVal(1);
12101   if (ShAmt > 32)
12102     return SDValue();
12103 
12104   SDValue N0 = N->getOperand(0);
12105 
12106   // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
12107   // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
12108   // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
12109   if (ShAmt < 32 &&
12110       N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
12111       cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
12112       N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
12113       isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
12114     uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
12115     if (LShAmt < 32) {
12116       SDLoc ShlDL(N0.getOperand(0));
12117       SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
12118                                 N0.getOperand(0).getOperand(0),
12119                                 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
12120       SDLoc DL(N);
12121       return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
12122                          DAG.getConstant(ShAmt + 32, DL, MVT::i64));
12123     }
12124   }
12125 
12126   // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
12127   // FIXME: Should this be a generic combine? There's a similar combine on X86.
12128   //
12129   // Also try these folds where an add or sub is in the middle.
12130   // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
12131   // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
12132   SDValue Shl;
12133   ConstantSDNode *AddC = nullptr;
12134 
12135   // We might have an ADD or SUB between the SRA and SHL.
12136   bool IsAdd = N0.getOpcode() == ISD::ADD;
12137   if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
12138     // Other operand needs to be a constant we can modify.
12139     AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
12140     if (!AddC)
12141       return SDValue();
12142 
12143     // AddC needs to have at least 32 trailing zeros.
12144     if (AddC->getAPIntValue().countr_zero() < 32)
12145       return SDValue();
12146 
12147     // All users should be a shift by constant less than or equal to 32. This
12148     // ensures we'll do this optimization for each of them to produce an
12149     // add/sub+sext_inreg they can all share.
12150     for (SDNode *U : N0->uses()) {
12151       if (U->getOpcode() != ISD::SRA ||
12152           !isa<ConstantSDNode>(U->getOperand(1)) ||
12153           cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() > 32)
12154         return SDValue();
12155     }
12156 
12157     Shl = N0.getOperand(IsAdd ? 0 : 1);
12158   } else {
12159     // Not an ADD or SUB.
12160     Shl = N0;
12161   }
12162 
12163   // Look for a shift left by 32.
12164   if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
12165       Shl.getConstantOperandVal(1) != 32)
12166     return SDValue();
12167 
12168   // We if we didn't look through an add/sub, then the shl should have one use.
12169   // If we did look through an add/sub, the sext_inreg we create is free so
12170   // we're only creating 2 new instructions. It's enough to only remove the
12171   // original sra+add/sub.
12172   if (!AddC && !Shl.hasOneUse())
12173     return SDValue();
12174 
12175   SDLoc DL(N);
12176   SDValue In = Shl.getOperand(0);
12177 
12178   // If we looked through an ADD or SUB, we need to rebuild it with the shifted
12179   // constant.
12180   if (AddC) {
12181     SDValue ShiftedAddC =
12182         DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
12183     if (IsAdd)
12184       In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
12185     else
12186       In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
12187   }
12188 
12189   SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
12190                              DAG.getValueType(MVT::i32));
12191   if (ShAmt == 32)
12192     return SExt;
12193 
12194   return DAG.getNode(
12195       ISD::SHL, DL, MVT::i64, SExt,
12196       DAG.getConstant(32 - ShAmt, DL, MVT::i64));
12197 }
12198 
12199 // Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
12200 // the result is used as the conditon of a br_cc or select_cc we can invert,
12201 // inverting the setcc is free, and Z is 0/1. Caller will invert the
12202 // br_cc/select_cc.
12203 static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG) {
12204   bool IsAnd = Cond.getOpcode() == ISD::AND;
12205   if (!IsAnd && Cond.getOpcode() != ISD::OR)
12206     return SDValue();
12207 
12208   if (!Cond.hasOneUse())
12209     return SDValue();
12210 
12211   SDValue Setcc = Cond.getOperand(0);
12212   SDValue Xor = Cond.getOperand(1);
12213   // Canonicalize setcc to LHS.
12214   if (Setcc.getOpcode() != ISD::SETCC)
12215     std::swap(Setcc, Xor);
12216   // LHS should be a setcc and RHS should be an xor.
12217   if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
12218       Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
12219     return SDValue();
12220 
12221   // If the condition is an And, SimplifyDemandedBits may have changed
12222   // (xor Z, 1) to (not Z).
12223   SDValue Xor1 = Xor.getOperand(1);
12224   if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
12225     return SDValue();
12226 
12227   EVT VT = Cond.getValueType();
12228   SDValue Xor0 = Xor.getOperand(0);
12229 
12230   // The LHS of the xor needs to be 0/1.
12231   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), 1);
12232   if (!DAG.MaskedValueIsZero(Xor0, Mask))
12233     return SDValue();
12234 
12235   // We can only invert integer setccs.
12236   EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
12237   if (!SetCCOpVT.isScalarInteger())
12238     return SDValue();
12239 
12240   ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
12241   if (ISD::isIntEqualitySetCC(CCVal)) {
12242     CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
12243     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
12244                          Setcc.getOperand(1), CCVal);
12245   } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
12246     // Invert (setlt 0, X) by converting to (setlt X, 1).
12247     Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
12248                          DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
12249   } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
12250     // (setlt X, 1) by converting to (setlt 0, X).
12251     Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
12252                          DAG.getConstant(0, SDLoc(Setcc), VT),
12253                          Setcc.getOperand(0), CCVal);
12254   } else
12255     return SDValue();
12256 
12257   unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
12258   return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
12259 }
12260 
12261 // Perform common combines for BR_CC and SELECT_CC condtions.
12262 static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
12263                        SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
12264   ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12265 
12266   // As far as arithmetic right shift always saves the sign,
12267   // shift can be omitted.
12268   // Fold setlt (sra X, N), 0 -> setlt X, 0 and
12269   // setge (sra X, N), 0 -> setge X, 0
12270   if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS.getNode())) {
12271     if ((CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
12272         LHS.getOpcode() == ISD::SRA && RHSConst->isZero()) {
12273       LHS = LHS.getOperand(0);
12274       return true;
12275     }
12276   }
12277 
12278   if (!ISD::isIntEqualitySetCC(CCVal))
12279     return false;
12280 
12281   // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
12282   // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
12283   if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
12284       LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
12285     // If we're looking for eq 0 instead of ne 0, we need to invert the
12286     // condition.
12287     bool Invert = CCVal == ISD::SETEQ;
12288     CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
12289     if (Invert)
12290       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12291 
12292     RHS = LHS.getOperand(1);
12293     LHS = LHS.getOperand(0);
12294     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
12295 
12296     CC = DAG.getCondCode(CCVal);
12297     return true;
12298   }
12299 
12300   // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
12301   if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
12302     RHS = LHS.getOperand(1);
12303     LHS = LHS.getOperand(0);
12304     return true;
12305   }
12306 
12307   // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
12308   if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
12309       LHS.getOperand(1).getOpcode() == ISD::Constant) {
12310     SDValue LHS0 = LHS.getOperand(0);
12311     if (LHS0.getOpcode() == ISD::AND &&
12312         LHS0.getOperand(1).getOpcode() == ISD::Constant) {
12313       uint64_t Mask = LHS0.getConstantOperandVal(1);
12314       uint64_t ShAmt = LHS.getConstantOperandVal(1);
12315       if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
12316         CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
12317         CC = DAG.getCondCode(CCVal);
12318 
12319         ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
12320         LHS = LHS0.getOperand(0);
12321         if (ShAmt != 0)
12322           LHS =
12323               DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
12324                           DAG.getConstant(ShAmt, DL, LHS.getValueType()));
12325         return true;
12326       }
12327     }
12328   }
12329 
12330   // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
12331   // This can occur when legalizing some floating point comparisons.
12332   APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
12333   if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
12334     CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12335     CC = DAG.getCondCode(CCVal);
12336     RHS = DAG.getConstant(0, DL, LHS.getValueType());
12337     return true;
12338   }
12339 
12340   if (isNullConstant(RHS)) {
12341     if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
12342       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
12343       CC = DAG.getCondCode(CCVal);
12344       LHS = NewCond;
12345       return true;
12346     }
12347   }
12348 
12349   return false;
12350 }
12351 
12352 // Fold
12353 // (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
12354 // (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
12355 // (select C, (or Y, X), Y)  -> (or Y, (select C, X, 0)).
12356 // (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
12357 static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
12358                                    SDValue TrueVal, SDValue FalseVal,
12359                                    bool Swapped) {
12360   bool Commutative = true;
12361   switch (TrueVal.getOpcode()) {
12362   default:
12363     return SDValue();
12364   case ISD::SUB:
12365     Commutative = false;
12366     break;
12367   case ISD::ADD:
12368   case ISD::OR:
12369   case ISD::XOR:
12370     break;
12371   }
12372 
12373   if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
12374     return SDValue();
12375 
12376   unsigned OpToFold;
12377   if (FalseVal == TrueVal.getOperand(0))
12378     OpToFold = 0;
12379   else if (Commutative && FalseVal == TrueVal.getOperand(1))
12380     OpToFold = 1;
12381   else
12382     return SDValue();
12383 
12384   EVT VT = N->getValueType(0);
12385   SDLoc DL(N);
12386   SDValue Zero = DAG.getConstant(0, DL, VT);
12387   SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
12388 
12389   if (Swapped)
12390     std::swap(OtherOp, Zero);
12391   SDValue NewSel = DAG.getSelect(DL, VT, N->getOperand(0), OtherOp, Zero);
12392   return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
12393 }
12394 
12395 // This tries to get rid of `select` and `icmp` that are being used to handle
12396 // `Targets` that do not support `cttz(0)`/`ctlz(0)`.
12397 static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
12398   SDValue Cond = N->getOperand(0);
12399 
12400   // This represents either CTTZ or CTLZ instruction.
12401   SDValue CountZeroes;
12402 
12403   SDValue ValOnZero;
12404 
12405   if (Cond.getOpcode() != ISD::SETCC)
12406     return SDValue();
12407 
12408   if (!isNullConstant(Cond->getOperand(1)))
12409     return SDValue();
12410 
12411   ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
12412   if (CCVal == ISD::CondCode::SETEQ) {
12413     CountZeroes = N->getOperand(2);
12414     ValOnZero = N->getOperand(1);
12415   } else if (CCVal == ISD::CondCode::SETNE) {
12416     CountZeroes = N->getOperand(1);
12417     ValOnZero = N->getOperand(2);
12418   } else {
12419     return SDValue();
12420   }
12421 
12422   if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
12423       CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
12424     CountZeroes = CountZeroes.getOperand(0);
12425 
12426   if (CountZeroes.getOpcode() != ISD::CTTZ &&
12427       CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
12428       CountZeroes.getOpcode() != ISD::CTLZ &&
12429       CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
12430     return SDValue();
12431 
12432   if (!isNullConstant(ValOnZero))
12433     return SDValue();
12434 
12435   SDValue CountZeroesArgument = CountZeroes->getOperand(0);
12436   if (Cond->getOperand(0) != CountZeroesArgument)
12437     return SDValue();
12438 
12439   if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
12440     CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
12441                               CountZeroes.getValueType(), CountZeroesArgument);
12442   } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
12443     CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
12444                               CountZeroes.getValueType(), CountZeroesArgument);
12445   }
12446 
12447   unsigned BitWidth = CountZeroes.getValueSizeInBits();
12448   SDValue BitWidthMinusOne =
12449       DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
12450 
12451   auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
12452                              CountZeroes, BitWidthMinusOne);
12453   return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
12454 }
12455 
12456 static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
12457                                     const RISCVSubtarget &Subtarget) {
12458   if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
12459     return Folded;
12460 
12461   if (Subtarget.hasShortForwardBranchOpt())
12462     return SDValue();
12463 
12464   SDValue TrueVal = N->getOperand(1);
12465   SDValue FalseVal = N->getOperand(2);
12466   if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
12467     return V;
12468   return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
12469 }
12470 
12471 // If we're concatenating a series of vector loads like
12472 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
12473 // Then we can turn this into a strided load by widening the vector elements
12474 // vlse32 p, stride=n
12475 static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
12476                                             const RISCVSubtarget &Subtarget,
12477                                             const RISCVTargetLowering &TLI) {
12478   SDLoc DL(N);
12479   EVT VT = N->getValueType(0);
12480 
12481   // Only perform this combine on legal MVTs.
12482   if (!TLI.isTypeLegal(VT))
12483     return SDValue();
12484 
12485   // TODO: Potentially extend this to scalable vectors
12486   if (VT.isScalableVector())
12487     return SDValue();
12488 
12489   auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
12490   if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
12491       !SDValue(BaseLd, 0).hasOneUse())
12492     return SDValue();
12493 
12494   EVT BaseLdVT = BaseLd->getValueType(0);
12495   SDValue BasePtr = BaseLd->getBasePtr();
12496 
12497   // Go through the loads and check that they're strided
12498   SDValue CurPtr = BasePtr;
12499   SDValue Stride;
12500   Align Align = BaseLd->getAlign();
12501 
12502   for (SDValue Op : N->ops().drop_front()) {
12503     auto *Ld = dyn_cast<LoadSDNode>(Op);
12504     if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
12505         Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
12506         Ld->getValueType(0) != BaseLdVT)
12507       return SDValue();
12508 
12509     SDValue Ptr = Ld->getBasePtr();
12510     // Check that each load's pointer is (add CurPtr, Stride)
12511     if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
12512       return SDValue();
12513     SDValue Offset = Ptr.getOperand(1);
12514     if (!Stride)
12515       Stride = Offset;
12516     else if (Offset != Stride)
12517       return SDValue();
12518 
12519     // The common alignment is the most restrictive (smallest) of all the loads
12520     Align = std::min(Align, Ld->getAlign());
12521 
12522     CurPtr = Ptr;
12523   }
12524 
12525   // A special case is if the stride is exactly the width of one of the loads,
12526   // in which case it's contiguous and can be combined into a regular vle
12527   // without changing the element size
12528   if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
12529       ConstStride &&
12530       ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
12531     MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
12532         BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
12533         VT.getStoreSize(), Align);
12534     // Can't do the combine if the load isn't naturally aligned with the element
12535     // type
12536     if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
12537                                             DAG.getDataLayout(), VT, *MMO))
12538       return SDValue();
12539 
12540     SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
12541     for (SDValue Ld : N->ops())
12542       DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
12543     return WideLoad;
12544   }
12545 
12546   // Get the widened scalar type, e.g. v4i8 -> i64
12547   unsigned WideScalarBitWidth =
12548       BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
12549   MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
12550 
12551   // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
12552   MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
12553   if (!TLI.isTypeLegal(WideVecVT))
12554     return SDValue();
12555 
12556   // Check that the operation is legal
12557   if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
12558     return SDValue();
12559 
12560   MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
12561   SDValue VL =
12562       getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
12563   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12564   SDValue IntID =
12565       DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
12566   SDValue Ops[] = {BaseLd->getChain(),
12567                    IntID,
12568                    DAG.getUNDEF(ContainerVT),
12569                    BasePtr,
12570                    Stride,
12571                    VL};
12572 
12573   uint64_t MemSize;
12574   if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
12575     // total size = (elsize * n) + (stride - elsize) * (n-1)
12576     //            = elsize + stride * (n-1)
12577     MemSize = WideScalarVT.getSizeInBits() +
12578               ConstStride->getSExtValue() * (N->getNumOperands() - 1);
12579   else
12580     // If Stride isn't constant, then we can't know how much it will load
12581     MemSize = MemoryLocation::UnknownSize;
12582 
12583   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
12584       BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
12585       Align);
12586 
12587   SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
12588                                                 Ops, WideVecVT, MMO);
12589   for (SDValue Ld : N->ops())
12590     DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
12591 
12592   // Note: Perform the bitcast before the convertFromScalableVector so we have
12593   // balanced pairs of convertFromScalable/convertToScalable
12594   SDValue Res = DAG.getBitcast(
12595       TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
12596   return convertFromScalableVector(VT, Res, DAG, Subtarget);
12597 }
12598 
12599 static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
12600                                const RISCVSubtarget &Subtarget) {
12601   assert(N->getOpcode() == RISCVISD::ADD_VL);
12602   SDValue Addend = N->getOperand(0);
12603   SDValue MulOp = N->getOperand(1);
12604   SDValue AddMergeOp = N->getOperand(2);
12605 
12606   if (!AddMergeOp.isUndef())
12607     return SDValue();
12608 
12609   auto IsVWMulOpc = [](unsigned Opc) {
12610     switch (Opc) {
12611     case RISCVISD::VWMUL_VL:
12612     case RISCVISD::VWMULU_VL:
12613     case RISCVISD::VWMULSU_VL:
12614       return true;
12615     default:
12616       return false;
12617     }
12618   };
12619 
12620   if (!IsVWMulOpc(MulOp.getOpcode()))
12621     std::swap(Addend, MulOp);
12622 
12623   if (!IsVWMulOpc(MulOp.getOpcode()))
12624     return SDValue();
12625 
12626   SDValue MulMergeOp = MulOp.getOperand(2);
12627 
12628   if (!MulMergeOp.isUndef())
12629     return SDValue();
12630 
12631   SDValue AddMask = N->getOperand(3);
12632   SDValue AddVL = N->getOperand(4);
12633   SDValue MulMask = MulOp.getOperand(3);
12634   SDValue MulVL = MulOp.getOperand(4);
12635 
12636   if (AddMask != MulMask || AddVL != MulVL)
12637     return SDValue();
12638 
12639   unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
12640   static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
12641                 "Unexpected opcode after VWMACC_VL");
12642   static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
12643                 "Unexpected opcode after VWMACC_VL!");
12644   static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
12645                 "Unexpected opcode after VWMUL_VL!");
12646   static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
12647                 "Unexpected opcode after VWMUL_VL!");
12648 
12649   SDLoc DL(N);
12650   EVT VT = N->getValueType(0);
12651   SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
12652                    AddVL};
12653   return DAG.getNode(Opc, DL, VT, Ops);
12654 }
12655 
12656 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
12657                                                DAGCombinerInfo &DCI) const {
12658   SelectionDAG &DAG = DCI.DAG;
12659 
12660   // Helper to call SimplifyDemandedBits on an operand of N where only some low
12661   // bits are demanded. N will be added to the Worklist if it was not deleted.
12662   // Caller should return SDValue(N, 0) if this returns true.
12663   auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
12664     SDValue Op = N->getOperand(OpNo);
12665     APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
12666     if (!SimplifyDemandedBits(Op, Mask, DCI))
12667       return false;
12668 
12669     if (N->getOpcode() != ISD::DELETED_NODE)
12670       DCI.AddToWorklist(N);
12671     return true;
12672   };
12673 
12674   switch (N->getOpcode()) {
12675   default:
12676     break;
12677   case RISCVISD::SplitF64: {
12678     SDValue Op0 = N->getOperand(0);
12679     // If the input to SplitF64 is just BuildPairF64 then the operation is
12680     // redundant. Instead, use BuildPairF64's operands directly.
12681     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
12682       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
12683 
12684     if (Op0->isUndef()) {
12685       SDValue Lo = DAG.getUNDEF(MVT::i32);
12686       SDValue Hi = DAG.getUNDEF(MVT::i32);
12687       return DCI.CombineTo(N, Lo, Hi);
12688     }
12689 
12690     SDLoc DL(N);
12691 
12692     // It's cheaper to materialise two 32-bit integers than to load a double
12693     // from the constant pool and transfer it to integer registers through the
12694     // stack.
12695     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
12696       APInt V = C->getValueAPF().bitcastToAPInt();
12697       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
12698       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
12699       return DCI.CombineTo(N, Lo, Hi);
12700     }
12701 
12702     // This is a target-specific version of a DAGCombine performed in
12703     // DAGCombiner::visitBITCAST. It performs the equivalent of:
12704     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12705     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12706     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12707         !Op0.getNode()->hasOneUse())
12708       break;
12709     SDValue NewSplitF64 =
12710         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
12711                     Op0.getOperand(0));
12712     SDValue Lo = NewSplitF64.getValue(0);
12713     SDValue Hi = NewSplitF64.getValue(1);
12714     APInt SignBit = APInt::getSignMask(32);
12715     if (Op0.getOpcode() == ISD::FNEG) {
12716       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
12717                                   DAG.getConstant(SignBit, DL, MVT::i32));
12718       return DCI.CombineTo(N, Lo, NewHi);
12719     }
12720     assert(Op0.getOpcode() == ISD::FABS);
12721     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
12722                                 DAG.getConstant(~SignBit, DL, MVT::i32));
12723     return DCI.CombineTo(N, Lo, NewHi);
12724   }
12725   case RISCVISD::SLLW:
12726   case RISCVISD::SRAW:
12727   case RISCVISD::SRLW:
12728   case RISCVISD::RORW:
12729   case RISCVISD::ROLW: {
12730     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
12731     if (SimplifyDemandedLowBitsHelper(0, 32) ||
12732         SimplifyDemandedLowBitsHelper(1, 5))
12733       return SDValue(N, 0);
12734 
12735     break;
12736   }
12737   case RISCVISD::CLZW:
12738   case RISCVISD::CTZW: {
12739     // Only the lower 32 bits of the first operand are read
12740     if (SimplifyDemandedLowBitsHelper(0, 32))
12741       return SDValue(N, 0);
12742     break;
12743   }
12744   case RISCVISD::FMV_W_X_RV64: {
12745     // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
12746     // conversion is unnecessary and can be replaced with the
12747     // FMV_X_ANYEXTW_RV64 operand.
12748     SDValue Op0 = N->getOperand(0);
12749     if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
12750       return Op0.getOperand(0);
12751     break;
12752   }
12753   case RISCVISD::FMV_X_ANYEXTH:
12754   case RISCVISD::FMV_X_ANYEXTW_RV64: {
12755     SDLoc DL(N);
12756     SDValue Op0 = N->getOperand(0);
12757     MVT VT = N->getSimpleValueType(0);
12758     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
12759     // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
12760     // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
12761     if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
12762          Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
12763         (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
12764          Op0->getOpcode() == RISCVISD::FMV_H_X)) {
12765       assert(Op0.getOperand(0).getValueType() == VT &&
12766              "Unexpected value type!");
12767       return Op0.getOperand(0);
12768     }
12769 
12770     // This is a target-specific version of a DAGCombine performed in
12771     // DAGCombiner::visitBITCAST. It performs the equivalent of:
12772     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12773     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12774     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
12775         !Op0.getNode()->hasOneUse())
12776       break;
12777     SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
12778     unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
12779     APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
12780     if (Op0.getOpcode() == ISD::FNEG)
12781       return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
12782                          DAG.getConstant(SignBit, DL, VT));
12783 
12784     assert(Op0.getOpcode() == ISD::FABS);
12785     return DAG.getNode(ISD::AND, DL, VT, NewFMV,
12786                        DAG.getConstant(~SignBit, DL, VT));
12787   }
12788   case ISD::ADD:
12789     return performADDCombine(N, DAG, Subtarget);
12790   case ISD::SUB:
12791     return performSUBCombine(N, DAG, Subtarget);
12792   case ISD::AND:
12793     return performANDCombine(N, DCI, Subtarget);
12794   case ISD::OR:
12795     return performORCombine(N, DCI, Subtarget);
12796   case ISD::XOR:
12797     return performXORCombine(N, DAG, Subtarget);
12798   case ISD::FADD:
12799   case ISD::UMAX:
12800   case ISD::UMIN:
12801   case ISD::SMAX:
12802   case ISD::SMIN:
12803   case ISD::FMAXNUM:
12804   case ISD::FMINNUM:
12805     return combineBinOpToReduce(N, DAG, Subtarget);
12806   case ISD::SETCC:
12807     return performSETCCCombine(N, DAG, Subtarget);
12808   case ISD::SIGN_EXTEND_INREG:
12809     return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
12810   case ISD::ZERO_EXTEND:
12811     // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
12812     // type legalization. This is safe because fp_to_uint produces poison if
12813     // it overflows.
12814     if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
12815       SDValue Src = N->getOperand(0);
12816       if (Src.getOpcode() == ISD::FP_TO_UINT &&
12817           isTypeLegal(Src.getOperand(0).getValueType()))
12818         return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
12819                            Src.getOperand(0));
12820       if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
12821           isTypeLegal(Src.getOperand(1).getValueType())) {
12822         SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12823         SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
12824                                   Src.getOperand(0), Src.getOperand(1));
12825         DCI.CombineTo(N, Res);
12826         DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
12827         DCI.recursivelyDeleteUnusedNodes(Src.getNode());
12828         return SDValue(N, 0); // Return N so it doesn't get rechecked.
12829       }
12830     }
12831     return SDValue();
12832   case ISD::TRUNCATE:
12833     return performTRUNCATECombine(N, DAG, Subtarget);
12834   case ISD::SELECT:
12835     return performSELECTCombine(N, DAG, Subtarget);
12836   case RISCVISD::CZERO_EQZ:
12837   case RISCVISD::CZERO_NEZ:
12838     // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
12839     // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
12840     if (N->getOperand(1).getOpcode() == ISD::XOR &&
12841         isOneConstant(N->getOperand(1).getOperand(1))) {
12842       SDValue Cond = N->getOperand(1).getOperand(0);
12843       APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
12844       if (DAG.MaskedValueIsZero(Cond, Mask)) {
12845         unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
12846                               ? RISCVISD::CZERO_NEZ
12847                               : RISCVISD::CZERO_EQZ;
12848         return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
12849                            N->getOperand(0), Cond);
12850       }
12851     }
12852     return SDValue();
12853 
12854   case RISCVISD::SELECT_CC: {
12855     // Transform
12856     SDValue LHS = N->getOperand(0);
12857     SDValue RHS = N->getOperand(1);
12858     SDValue CC = N->getOperand(2);
12859     ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
12860     SDValue TrueV = N->getOperand(3);
12861     SDValue FalseV = N->getOperand(4);
12862     SDLoc DL(N);
12863     EVT VT = N->getValueType(0);
12864 
12865     // If the True and False values are the same, we don't need a select_cc.
12866     if (TrueV == FalseV)
12867       return TrueV;
12868 
12869     // (select (x < 0), y, z)  -> x >> (XLEN - 1) & (y - z) + z
12870     // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
12871     if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
12872         isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
12873         (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
12874       if (CCVal == ISD::CondCode::SETGE)
12875         std::swap(TrueV, FalseV);
12876 
12877       int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
12878       int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
12879       // Only handle simm12, if it is not in this range, it can be considered as
12880       // register.
12881       if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
12882           isInt<12>(TrueSImm - FalseSImm)) {
12883         SDValue SRA =
12884             DAG.getNode(ISD::SRA, DL, VT, LHS,
12885                         DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
12886         SDValue AND =
12887             DAG.getNode(ISD::AND, DL, VT, SRA,
12888                         DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
12889         return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
12890       }
12891 
12892       if (CCVal == ISD::CondCode::SETGE)
12893         std::swap(TrueV, FalseV);
12894     }
12895 
12896     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12897       return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
12898                          {LHS, RHS, CC, TrueV, FalseV});
12899 
12900     if (!Subtarget.hasShortForwardBranchOpt()) {
12901       // (select c, -1, y) -> -c | y
12902       if (isAllOnesConstant(TrueV)) {
12903         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12904         SDValue Neg = DAG.getNegative(C, DL, VT);
12905         return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
12906       }
12907       // (select c, y, -1) -> -!c | y
12908       if (isAllOnesConstant(FalseV)) {
12909         SDValue C =
12910             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12911         SDValue Neg = DAG.getNegative(C, DL, VT);
12912         return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
12913       }
12914 
12915       // (select c, 0, y) -> -!c & y
12916       if (isNullConstant(TrueV)) {
12917         SDValue C =
12918             DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
12919         SDValue Neg = DAG.getNegative(C, DL, VT);
12920         return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
12921       }
12922       // (select c, y, 0) -> -c & y
12923       if (isNullConstant(FalseV)) {
12924         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
12925         SDValue Neg = DAG.getNegative(C, DL, VT);
12926         return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
12927       }
12928       // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
12929       // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
12930       if (((isOneConstant(FalseV) && LHS == TrueV &&
12931             CCVal == ISD::CondCode::SETNE) ||
12932            (isOneConstant(TrueV) && LHS == FalseV &&
12933             CCVal == ISD::CondCode::SETEQ)) &&
12934           isNullConstant(RHS)) {
12935         // freeze it to be safe.
12936         LHS = DAG.getFreeze(LHS);
12937         SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
12938         return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
12939       }
12940     }
12941 
12942     return SDValue();
12943   }
12944   case RISCVISD::BR_CC: {
12945     SDValue LHS = N->getOperand(1);
12946     SDValue RHS = N->getOperand(2);
12947     SDValue CC = N->getOperand(3);
12948     SDLoc DL(N);
12949 
12950     if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
12951       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
12952                          N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
12953 
12954     return SDValue();
12955   }
12956   case ISD::BITREVERSE:
12957     return performBITREVERSECombine(N, DAG, Subtarget);
12958   case ISD::FP_TO_SINT:
12959   case ISD::FP_TO_UINT:
12960     return performFP_TO_INTCombine(N, DCI, Subtarget);
12961   case ISD::FP_TO_SINT_SAT:
12962   case ISD::FP_TO_UINT_SAT:
12963     return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
12964   case ISD::FCOPYSIGN: {
12965     EVT VT = N->getValueType(0);
12966     if (!VT.isVector())
12967       break;
12968     // There is a form of VFSGNJ which injects the negated sign of its second
12969     // operand. Try and bubble any FNEG up after the extend/round to produce
12970     // this optimized pattern. Avoid modifying cases where FP_ROUND and
12971     // TRUNC=1.
12972     SDValue In2 = N->getOperand(1);
12973     // Avoid cases where the extend/round has multiple uses, as duplicating
12974     // those is typically more expensive than removing a fneg.
12975     if (!In2.hasOneUse())
12976       break;
12977     if (In2.getOpcode() != ISD::FP_EXTEND &&
12978         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
12979       break;
12980     In2 = In2.getOperand(0);
12981     if (In2.getOpcode() != ISD::FNEG)
12982       break;
12983     SDLoc DL(N);
12984     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
12985     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
12986                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
12987   }
12988   case ISD::MGATHER:
12989   case ISD::MSCATTER:
12990   case ISD::VP_GATHER:
12991   case ISD::VP_SCATTER: {
12992     if (!DCI.isBeforeLegalize())
12993       break;
12994     SDValue Index, ScaleOp;
12995     bool IsIndexSigned = false;
12996     if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
12997       Index = VPGSN->getIndex();
12998       ScaleOp = VPGSN->getScale();
12999       IsIndexSigned = VPGSN->isIndexSigned();
13000       assert(!VPGSN->isIndexScaled() &&
13001              "Scaled gather/scatter should not be formed");
13002     } else {
13003       const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
13004       Index = MGSN->getIndex();
13005       ScaleOp = MGSN->getScale();
13006       IsIndexSigned = MGSN->isIndexSigned();
13007       assert(!MGSN->isIndexScaled() &&
13008              "Scaled gather/scatter should not be formed");
13009 
13010     }
13011     EVT IndexVT = Index.getValueType();
13012     MVT XLenVT = Subtarget.getXLenVT();
13013     // RISC-V indexed loads only support the "unsigned unscaled" addressing
13014     // mode, so anything else must be manually legalized.
13015     bool NeedsIdxLegalization =
13016         (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
13017     if (!NeedsIdxLegalization)
13018       break;
13019 
13020     SDLoc DL(N);
13021 
13022     // Any index legalization should first promote to XLenVT, so we don't lose
13023     // bits when scaling. This may create an illegal index type so we let
13024     // LLVM's legalization take care of the splitting.
13025     // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
13026     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
13027       IndexVT = IndexVT.changeVectorElementType(XLenVT);
13028       Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
13029                           DL, IndexVT, Index);
13030     }
13031 
13032     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
13033     if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
13034       return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
13035                              {VPGN->getChain(), VPGN->getBasePtr(), Index,
13036                               ScaleOp, VPGN->getMask(),
13037                               VPGN->getVectorLength()},
13038                              VPGN->getMemOperand(), NewIndexTy);
13039     if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
13040       return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
13041                               {VPSN->getChain(), VPSN->getValue(),
13042                                VPSN->getBasePtr(), Index, ScaleOp,
13043                                VPSN->getMask(), VPSN->getVectorLength()},
13044                               VPSN->getMemOperand(), NewIndexTy);
13045     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
13046       return DAG.getMaskedGather(
13047           N->getVTList(), MGN->getMemoryVT(), DL,
13048           {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
13049            MGN->getBasePtr(), Index, ScaleOp},
13050           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
13051     const auto *MSN = cast<MaskedScatterSDNode>(N);
13052     return DAG.getMaskedScatter(
13053         N->getVTList(), MSN->getMemoryVT(), DL,
13054         {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
13055          Index, ScaleOp},
13056         MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
13057   }
13058   case RISCVISD::SRA_VL:
13059   case RISCVISD::SRL_VL:
13060   case RISCVISD::SHL_VL: {
13061     SDValue ShAmt = N->getOperand(1);
13062     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
13063       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13064       SDLoc DL(N);
13065       SDValue VL = N->getOperand(3);
13066       EVT VT = N->getValueType(0);
13067       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13068                           ShAmt.getOperand(1), VL);
13069       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
13070                          N->getOperand(2), N->getOperand(3), N->getOperand(4));
13071     }
13072     break;
13073   }
13074   case ISD::SRA:
13075     if (SDValue V = performSRACombine(N, DAG, Subtarget))
13076       return V;
13077     [[fallthrough]];
13078   case ISD::SRL:
13079   case ISD::SHL: {
13080     SDValue ShAmt = N->getOperand(1);
13081     if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
13082       // We don't need the upper 32 bits of a 64-bit element for a shift amount.
13083       SDLoc DL(N);
13084       EVT VT = N->getValueType(0);
13085       ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
13086                           ShAmt.getOperand(1),
13087                           DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
13088       return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
13089     }
13090     break;
13091   }
13092   case RISCVISD::ADD_VL:
13093     if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
13094       return V;
13095     return combineToVWMACC(N, DAG, Subtarget);
13096   case RISCVISD::SUB_VL:
13097   case RISCVISD::VWADD_W_VL:
13098   case RISCVISD::VWADDU_W_VL:
13099   case RISCVISD::VWSUB_W_VL:
13100   case RISCVISD::VWSUBU_W_VL:
13101   case RISCVISD::MUL_VL:
13102     return combineBinOp_VLToVWBinOp_VL(N, DCI);
13103   case RISCVISD::VFMADD_VL:
13104   case RISCVISD::VFNMADD_VL:
13105   case RISCVISD::VFMSUB_VL:
13106   case RISCVISD::VFNMSUB_VL:
13107   case RISCVISD::STRICT_VFMADD_VL:
13108   case RISCVISD::STRICT_VFNMADD_VL:
13109   case RISCVISD::STRICT_VFMSUB_VL:
13110   case RISCVISD::STRICT_VFNMSUB_VL:
13111     return performVFMADD_VLCombine(N, DAG);
13112   case RISCVISD::FMUL_VL:
13113     return performVFMUL_VLCombine(N, DAG);
13114   case RISCVISD::FADD_VL:
13115   case RISCVISD::FSUB_VL:
13116     return performFADDSUB_VLCombine(N, DAG);
13117   case ISD::LOAD:
13118   case ISD::STORE: {
13119     if (DCI.isAfterLegalizeDAG())
13120       if (SDValue V = performMemPairCombine(N, DCI))
13121         return V;
13122 
13123     if (N->getOpcode() != ISD::STORE)
13124       break;
13125 
13126     auto *Store = cast<StoreSDNode>(N);
13127     SDValue Chain = Store->getChain();
13128     EVT MemVT = Store->getMemoryVT();
13129     SDValue Val = Store->getValue();
13130     SDLoc DL(N);
13131 
13132     bool IsScalarizable =
13133         MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
13134         Store->isSimple() &&
13135         MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
13136         isPowerOf2_64(MemVT.getSizeInBits()) &&
13137         MemVT.getSizeInBits() <= Subtarget.getXLen();
13138 
13139     // If sufficiently aligned we can scalarize stores of constant vectors of
13140     // any power-of-two size up to XLen bits, provided that they aren't too
13141     // expensive to materialize.
13142     //   vsetivli   zero, 2, e8, m1, ta, ma
13143     //   vmv.v.i    v8, 4
13144     //   vse64.v    v8, (a0)
13145     // ->
13146     //   li     a1, 1028
13147     //   sh     a1, 0(a0)
13148     if (DCI.isBeforeLegalize() && IsScalarizable &&
13149         ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
13150       // Get the constant vector bits
13151       APInt NewC(Val.getValueSizeInBits(), 0);
13152       for (unsigned i = 0; i < Val.getNumOperands(); i++) {
13153         if (Val.getOperand(i).isUndef())
13154           continue;
13155         NewC.insertBits(Val.getConstantOperandAPInt(i),
13156                         i * Val.getScalarValueSizeInBits());
13157       }
13158       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13159 
13160       if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
13161                                      Subtarget.getFeatureBits(), true) <= 2 &&
13162           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13163                                          NewVT, *Store->getMemOperand())) {
13164         SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
13165         return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
13166                             Store->getPointerInfo(), Store->getOriginalAlign(),
13167                             Store->getMemOperand()->getFlags());
13168       }
13169     }
13170 
13171     // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
13172     //   vsetivli   zero, 2, e16, m1, ta, ma
13173     //   vle16.v    v8, (a0)
13174     //   vse16.v    v8, (a1)
13175     if (auto *L = dyn_cast<LoadSDNode>(Val);
13176         L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
13177         L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
13178         Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
13179         L->getMemoryVT() == MemVT) {
13180       MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
13181       if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13182                                          NewVT, *Store->getMemOperand()) &&
13183           allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
13184                                          NewVT, *L->getMemOperand())) {
13185         SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
13186                                    L->getPointerInfo(), L->getOriginalAlign(),
13187                                    L->getMemOperand()->getFlags());
13188         return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
13189                             Store->getPointerInfo(), Store->getOriginalAlign(),
13190                             Store->getMemOperand()->getFlags());
13191       }
13192     }
13193 
13194     // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
13195     // vfmv.f.s is represented as extract element from 0. Match it late to avoid
13196     // any illegal types.
13197     if (Val.getOpcode() == RISCVISD::VMV_X_S ||
13198         (DCI.isAfterLegalizeDAG() &&
13199          Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13200          isNullConstant(Val.getOperand(1)))) {
13201       SDValue Src = Val.getOperand(0);
13202       MVT VecVT = Src.getSimpleValueType();
13203       // VecVT should be scalable and memory VT should match the element type.
13204       if (VecVT.isScalableVector() &&
13205           MemVT == VecVT.getVectorElementType()) {
13206         SDLoc DL(N);
13207         MVT MaskVT = getMaskTypeFor(VecVT);
13208         return DAG.getStoreVP(
13209             Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
13210             DAG.getConstant(1, DL, MaskVT),
13211             DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
13212             Store->getMemOperand(), Store->getAddressingMode(),
13213             Store->isTruncatingStore(), /*IsCompress*/ false);
13214       }
13215     }
13216 
13217     break;
13218   }
13219   case ISD::SPLAT_VECTOR: {
13220     EVT VT = N->getValueType(0);
13221     // Only perform this combine on legal MVT types.
13222     if (!isTypeLegal(VT))
13223       break;
13224     if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
13225                                          DAG, Subtarget))
13226       return Gather;
13227     break;
13228   }
13229   case ISD::CONCAT_VECTORS:
13230     if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
13231       return V;
13232     break;
13233   case RISCVISD::VMV_V_X_VL: {
13234     // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
13235     // scalar input.
13236     unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
13237     unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
13238     if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
13239       if (SimplifyDemandedLowBitsHelper(1, EltWidth))
13240         return SDValue(N, 0);
13241 
13242     break;
13243   }
13244   case RISCVISD::VFMV_S_F_VL: {
13245     SDValue Src = N->getOperand(1);
13246     // Try to remove vector->scalar->vector if the scalar->vector is inserting
13247     // into an undef vector.
13248     // TODO: Could use a vslide or vmv.v.v for non-undef.
13249     if (N->getOperand(0).isUndef() &&
13250         Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13251         isNullConstant(Src.getOperand(1)) &&
13252         Src.getOperand(0).getValueType().isScalableVector()) {
13253       EVT VT = N->getValueType(0);
13254       EVT SrcVT = Src.getOperand(0).getValueType();
13255       assert(SrcVT.getVectorElementType() == VT.getVectorElementType());
13256       // Widths match, just return the original vector.
13257       if (SrcVT == VT)
13258         return Src.getOperand(0);
13259       // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
13260     }
13261     break;
13262   }
13263   case ISD::INTRINSIC_VOID:
13264   case ISD::INTRINSIC_W_CHAIN:
13265   case ISD::INTRINSIC_WO_CHAIN: {
13266     unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
13267     unsigned IntNo = N->getConstantOperandVal(IntOpNo);
13268     switch (IntNo) {
13269       // By default we do not combine any intrinsic.
13270     default:
13271       return SDValue();
13272     case Intrinsic::riscv_vcpop:
13273     case Intrinsic::riscv_vcpop_mask:
13274     case Intrinsic::riscv_vfirst:
13275     case Intrinsic::riscv_vfirst_mask: {
13276       SDValue VL = N->getOperand(2);
13277       if (IntNo == Intrinsic::riscv_vcpop_mask ||
13278           IntNo == Intrinsic::riscv_vfirst_mask)
13279         VL = N->getOperand(3);
13280       if (!isNullConstant(VL))
13281         return SDValue();
13282       // If VL is 0, vcpop -> li 0, vfirst -> li -1.
13283       SDLoc DL(N);
13284       EVT VT = N->getValueType(0);
13285       if (IntNo == Intrinsic::riscv_vfirst ||
13286           IntNo == Intrinsic::riscv_vfirst_mask)
13287         return DAG.getConstant(-1, DL, VT);
13288       return DAG.getConstant(0, DL, VT);
13289     }
13290     case Intrinsic::riscv_vloxei:
13291     case Intrinsic::riscv_vloxei_mask:
13292     case Intrinsic::riscv_vluxei:
13293     case Intrinsic::riscv_vluxei_mask:
13294     case Intrinsic::riscv_vsoxei:
13295     case Intrinsic::riscv_vsoxei_mask:
13296     case Intrinsic::riscv_vsuxei:
13297     case Intrinsic::riscv_vsuxei_mask:
13298       if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
13299         SmallVector<SDValue, 8> Ops(N->ops());
13300         Ops[4] = V;
13301         const auto *MemSD = cast<MemIntrinsicSDNode>(N);
13302         return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
13303                                        Ops, MemSD->getMemoryVT(),
13304                                        MemSD->getMemOperand());
13305       }
13306       return SDValue();
13307     }
13308   }
13309   case ISD::BITCAST: {
13310     assert(Subtarget.useRVVForFixedLengthVectors());
13311     SDValue N0 = N->getOperand(0);
13312     EVT VT = N->getValueType(0);
13313     EVT SrcVT = N0.getValueType();
13314     // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
13315     // type, widen both sides to avoid a trip through memory.
13316     if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
13317         VT.isScalarInteger()) {
13318       unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
13319       SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
13320       Ops[0] = N0;
13321       SDLoc DL(N);
13322       N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
13323       N0 = DAG.getBitcast(MVT::i8, N0);
13324       return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
13325     }
13326 
13327     return SDValue();
13328   }
13329   }
13330 
13331   return SDValue();
13332 }
13333 
13334 bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
13335     EVT XVT, unsigned KeptBits) const {
13336   // For vectors, we don't have a preference..
13337   if (XVT.isVector())
13338     return false;
13339 
13340   if (XVT != MVT::i32 && XVT != MVT::i64)
13341     return false;
13342 
13343   // We can use sext.w for RV64 or an srai 31 on RV32.
13344   if (KeptBits == 32 || KeptBits == 64)
13345     return true;
13346 
13347   // With Zbb we can use sext.h/sext.b.
13348   return Subtarget.hasStdExtZbb() &&
13349          ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
13350           KeptBits == 16);
13351 }
13352 
13353 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
13354     const SDNode *N, CombineLevel Level) const {
13355   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13356           N->getOpcode() == ISD::SRL) &&
13357          "Expected shift op");
13358 
13359   // The following folds are only desirable if `(OP _, c1 << c2)` can be
13360   // materialised in fewer instructions than `(OP _, c1)`:
13361   //
13362   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13363   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
13364   SDValue N0 = N->getOperand(0);
13365   EVT Ty = N0.getValueType();
13366   if (Ty.isScalarInteger() &&
13367       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
13368     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13369     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13370     if (C1 && C2) {
13371       const APInt &C1Int = C1->getAPIntValue();
13372       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
13373 
13374       // We can materialise `c1 << c2` into an add immediate, so it's "free",
13375       // and the combine should happen, to potentially allow further combines
13376       // later.
13377       if (ShiftedC1Int.getSignificantBits() <= 64 &&
13378           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
13379         return true;
13380 
13381       // We can materialise `c1` in an add immediate, so it's "free", and the
13382       // combine should be prevented.
13383       if (C1Int.getSignificantBits() <= 64 &&
13384           isLegalAddImmediate(C1Int.getSExtValue()))
13385         return false;
13386 
13387       // Neither constant will fit into an immediate, so find materialisation
13388       // costs.
13389       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
13390                                               Subtarget.getFeatureBits(),
13391                                               /*CompressionCost*/true);
13392       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
13393           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
13394           /*CompressionCost*/true);
13395 
13396       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
13397       // combine should be prevented.
13398       if (C1Cost < ShiftedC1Cost)
13399         return false;
13400     }
13401   }
13402   return true;
13403 }
13404 
13405 bool RISCVTargetLowering::targetShrinkDemandedConstant(
13406     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
13407     TargetLoweringOpt &TLO) const {
13408   // Delay this optimization as late as possible.
13409   if (!TLO.LegalOps)
13410     return false;
13411 
13412   EVT VT = Op.getValueType();
13413   if (VT.isVector())
13414     return false;
13415 
13416   unsigned Opcode = Op.getOpcode();
13417   if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
13418     return false;
13419 
13420   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13421   if (!C)
13422     return false;
13423 
13424   const APInt &Mask = C->getAPIntValue();
13425 
13426   // Clear all non-demanded bits initially.
13427   APInt ShrunkMask = Mask & DemandedBits;
13428 
13429   // Try to make a smaller immediate by setting undemanded bits.
13430 
13431   APInt ExpandedMask = Mask | ~DemandedBits;
13432 
13433   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
13434     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
13435   };
13436   auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
13437     if (NewMask == Mask)
13438       return true;
13439     SDLoc DL(Op);
13440     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
13441     SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
13442                                     Op.getOperand(0), NewC);
13443     return TLO.CombineTo(Op, NewOp);
13444   };
13445 
13446   // If the shrunk mask fits in sign extended 12 bits, let the target
13447   // independent code apply it.
13448   if (ShrunkMask.isSignedIntN(12))
13449     return false;
13450 
13451   // And has a few special cases for zext.
13452   if (Opcode == ISD::AND) {
13453     // Preserve (and X, 0xffff), if zext.h exists use zext.h,
13454     // otherwise use SLLI + SRLI.
13455     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
13456     if (IsLegalMask(NewMask))
13457       return UseMask(NewMask);
13458 
13459     // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
13460     if (VT == MVT::i64) {
13461       APInt NewMask = APInt(64, 0xffffffff);
13462       if (IsLegalMask(NewMask))
13463         return UseMask(NewMask);
13464     }
13465   }
13466 
13467   // For the remaining optimizations, we need to be able to make a negative
13468   // number through a combination of mask and undemanded bits.
13469   if (!ExpandedMask.isNegative())
13470     return false;
13471 
13472   // What is the fewest number of bits we need to represent the negative number.
13473   unsigned MinSignedBits = ExpandedMask.getSignificantBits();
13474 
13475   // Try to make a 12 bit negative immediate. If that fails try to make a 32
13476   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
13477   // If we can't create a simm12, we shouldn't change opaque constants.
13478   APInt NewMask = ShrunkMask;
13479   if (MinSignedBits <= 12)
13480     NewMask.setBitsFrom(11);
13481   else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
13482     NewMask.setBitsFrom(31);
13483   else
13484     return false;
13485 
13486   // Check that our new mask is a subset of the demanded mask.
13487   assert(IsLegalMask(NewMask));
13488   return UseMask(NewMask);
13489 }
13490 
13491 static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
13492   static const uint64_t GREVMasks[] = {
13493       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
13494       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
13495 
13496   for (unsigned Stage = 0; Stage != 6; ++Stage) {
13497     unsigned Shift = 1 << Stage;
13498     if (ShAmt & Shift) {
13499       uint64_t Mask = GREVMasks[Stage];
13500       uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
13501       if (IsGORC)
13502         Res |= x;
13503       x = Res;
13504     }
13505   }
13506 
13507   return x;
13508 }
13509 
13510 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
13511                                                         KnownBits &Known,
13512                                                         const APInt &DemandedElts,
13513                                                         const SelectionDAG &DAG,
13514                                                         unsigned Depth) const {
13515   unsigned BitWidth = Known.getBitWidth();
13516   unsigned Opc = Op.getOpcode();
13517   assert((Opc >= ISD::BUILTIN_OP_END ||
13518           Opc == ISD::INTRINSIC_WO_CHAIN ||
13519           Opc == ISD::INTRINSIC_W_CHAIN ||
13520           Opc == ISD::INTRINSIC_VOID) &&
13521          "Should use MaskedValueIsZero if you don't know whether Op"
13522          " is a target node!");
13523 
13524   Known.resetAll();
13525   switch (Opc) {
13526   default: break;
13527   case RISCVISD::SELECT_CC: {
13528     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
13529     // If we don't know any bits, early out.
13530     if (Known.isUnknown())
13531       break;
13532     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
13533 
13534     // Only known if known in both the LHS and RHS.
13535     Known = Known.intersectWith(Known2);
13536     break;
13537   }
13538   case RISCVISD::CZERO_EQZ:
13539   case RISCVISD::CZERO_NEZ:
13540     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13541     // Result is either all zero or operand 0. We can propagate zeros, but not
13542     // ones.
13543     Known.One.clearAllBits();
13544     break;
13545   case RISCVISD::REMUW: {
13546     KnownBits Known2;
13547     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13548     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13549     // We only care about the lower 32 bits.
13550     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
13551     // Restore the original width by sign extending.
13552     Known = Known.sext(BitWidth);
13553     break;
13554   }
13555   case RISCVISD::DIVUW: {
13556     KnownBits Known2;
13557     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
13558     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
13559     // We only care about the lower 32 bits.
13560     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
13561     // Restore the original width by sign extending.
13562     Known = Known.sext(BitWidth);
13563     break;
13564   }
13565   case RISCVISD::CTZW: {
13566     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13567     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
13568     unsigned LowBits = llvm::bit_width(PossibleTZ);
13569     Known.Zero.setBitsFrom(LowBits);
13570     break;
13571   }
13572   case RISCVISD::CLZW: {
13573     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13574     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
13575     unsigned LowBits = llvm::bit_width(PossibleLZ);
13576     Known.Zero.setBitsFrom(LowBits);
13577     break;
13578   }
13579   case RISCVISD::BREV8:
13580   case RISCVISD::ORC_B: {
13581     // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
13582     // control value of 7 is equivalent to brev8 and orc.b.
13583     Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
13584     bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
13585     // To compute zeros, we need to invert the value and invert it back after.
13586     Known.Zero =
13587         ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
13588     Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
13589     break;
13590   }
13591   case RISCVISD::READ_VLENB: {
13592     // We can use the minimum and maximum VLEN values to bound VLENB.  We
13593     // know VLEN must be a power of two.
13594     const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
13595     const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
13596     assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
13597     Known.Zero.setLowBits(Log2_32(MinVLenB));
13598     Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
13599     if (MaxVLenB == MinVLenB)
13600       Known.One.setBit(Log2_32(MinVLenB));
13601     break;
13602   }
13603   case RISCVISD::FPCLASS: {
13604     // fclass will only set one of the low 10 bits.
13605     Known.Zero.setBitsFrom(10);
13606     break;
13607   }
13608   case ISD::INTRINSIC_W_CHAIN:
13609   case ISD::INTRINSIC_WO_CHAIN: {
13610     unsigned IntNo =
13611         Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
13612     switch (IntNo) {
13613     default:
13614       // We can't do anything for most intrinsics.
13615       break;
13616     case Intrinsic::riscv_vsetvli:
13617     case Intrinsic::riscv_vsetvlimax:
13618       // Assume that VL output is >= 65536.
13619       // TODO: Take SEW and LMUL into account.
13620       if (BitWidth > 17)
13621         Known.Zero.setBitsFrom(17);
13622       break;
13623     }
13624     break;
13625   }
13626   }
13627 }
13628 
13629 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
13630     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
13631     unsigned Depth) const {
13632   switch (Op.getOpcode()) {
13633   default:
13634     break;
13635   case RISCVISD::SELECT_CC: {
13636     unsigned Tmp =
13637         DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
13638     if (Tmp == 1) return 1;  // Early out.
13639     unsigned Tmp2 =
13640         DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
13641     return std::min(Tmp, Tmp2);
13642   }
13643   case RISCVISD::CZERO_EQZ:
13644   case RISCVISD::CZERO_NEZ:
13645     // Output is either all zero or operand 0. We can propagate sign bit count
13646     // from operand 0.
13647     return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13648   case RISCVISD::ABSW: {
13649     // We expand this at isel to negw+max. The result will have 33 sign bits
13650     // if the input has at least 33 sign bits.
13651     unsigned Tmp =
13652         DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
13653     if (Tmp < 33) return 1;
13654     return 33;
13655   }
13656   case RISCVISD::SLLW:
13657   case RISCVISD::SRAW:
13658   case RISCVISD::SRLW:
13659   case RISCVISD::DIVW:
13660   case RISCVISD::DIVUW:
13661   case RISCVISD::REMUW:
13662   case RISCVISD::ROLW:
13663   case RISCVISD::RORW:
13664   case RISCVISD::FCVT_W_RV64:
13665   case RISCVISD::FCVT_WU_RV64:
13666   case RISCVISD::STRICT_FCVT_W_RV64:
13667   case RISCVISD::STRICT_FCVT_WU_RV64:
13668     // TODO: As the result is sign-extended, this is conservatively correct. A
13669     // more precise answer could be calculated for SRAW depending on known
13670     // bits in the shift amount.
13671     return 33;
13672   case RISCVISD::VMV_X_S: {
13673     // The number of sign bits of the scalar result is computed by obtaining the
13674     // element type of the input vector operand, subtracting its width from the
13675     // XLEN, and then adding one (sign bit within the element type). If the
13676     // element type is wider than XLen, the least-significant XLEN bits are
13677     // taken.
13678     unsigned XLen = Subtarget.getXLen();
13679     unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
13680     if (EltBits <= XLen)
13681       return XLen - EltBits + 1;
13682     break;
13683   }
13684   case ISD::INTRINSIC_W_CHAIN: {
13685     unsigned IntNo = Op.getConstantOperandVal(1);
13686     switch (IntNo) {
13687     default:
13688       break;
13689     case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
13690     case Intrinsic::riscv_masked_atomicrmw_add_i64:
13691     case Intrinsic::riscv_masked_atomicrmw_sub_i64:
13692     case Intrinsic::riscv_masked_atomicrmw_nand_i64:
13693     case Intrinsic::riscv_masked_atomicrmw_max_i64:
13694     case Intrinsic::riscv_masked_atomicrmw_min_i64:
13695     case Intrinsic::riscv_masked_atomicrmw_umax_i64:
13696     case Intrinsic::riscv_masked_atomicrmw_umin_i64:
13697     case Intrinsic::riscv_masked_cmpxchg_i64:
13698       // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
13699       // narrow atomic operation. These are implemented using atomic
13700       // operations at the minimum supported atomicrmw/cmpxchg width whose
13701       // result is then sign extended to XLEN. With +A, the minimum width is
13702       // 32 for both 64 and 32.
13703       assert(Subtarget.getXLen() == 64);
13704       assert(getMinCmpXchgSizeInBits() == 32);
13705       assert(Subtarget.hasStdExtA());
13706       return 33;
13707     }
13708   }
13709   }
13710 
13711   return 1;
13712 }
13713 
13714 const Constant *
13715 RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
13716   assert(Ld && "Unexpected null LoadSDNode");
13717   if (!ISD::isNormalLoad(Ld))
13718     return nullptr;
13719 
13720   SDValue Ptr = Ld->getBasePtr();
13721 
13722   // Only constant pools with no offset are supported.
13723   auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
13724     auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
13725     if (!CNode || CNode->isMachineConstantPoolEntry() ||
13726         CNode->getOffset() != 0)
13727       return nullptr;
13728 
13729     return CNode;
13730   };
13731 
13732   // Simple case, LLA.
13733   if (Ptr.getOpcode() == RISCVISD::LLA) {
13734     auto *CNode = GetSupportedConstantPool(Ptr);
13735     if (!CNode || CNode->getTargetFlags() != 0)
13736       return nullptr;
13737 
13738     return CNode->getConstVal();
13739   }
13740 
13741   // Look for a HI and ADD_LO pair.
13742   if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
13743       Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
13744     return nullptr;
13745 
13746   auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
13747   auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
13748 
13749   if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
13750       !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
13751     return nullptr;
13752 
13753   if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
13754     return nullptr;
13755 
13756   return CNodeLo->getConstVal();
13757 }
13758 
13759 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
13760                                                   MachineBasicBlock *BB) {
13761   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
13762 
13763   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
13764   // Should the count have wrapped while it was being read, we need to try
13765   // again.
13766   // ...
13767   // read:
13768   // rdcycleh x3 # load high word of cycle
13769   // rdcycle  x2 # load low word of cycle
13770   // rdcycleh x4 # load high word of cycle
13771   // bne x3, x4, read # check if high word reads match, otherwise try again
13772   // ...
13773 
13774   MachineFunction &MF = *BB->getParent();
13775   const BasicBlock *LLVM_BB = BB->getBasicBlock();
13776   MachineFunction::iterator It = ++BB->getIterator();
13777 
13778   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13779   MF.insert(It, LoopMBB);
13780 
13781   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
13782   MF.insert(It, DoneMBB);
13783 
13784   // Transfer the remainder of BB and its successor edges to DoneMBB.
13785   DoneMBB->splice(DoneMBB->begin(), BB,
13786                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
13787   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
13788 
13789   BB->addSuccessor(LoopMBB);
13790 
13791   MachineRegisterInfo &RegInfo = MF.getRegInfo();
13792   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
13793   Register LoReg = MI.getOperand(0).getReg();
13794   Register HiReg = MI.getOperand(1).getReg();
13795   DebugLoc DL = MI.getDebugLoc();
13796 
13797   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
13798   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
13799       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13800       .addReg(RISCV::X0);
13801   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
13802       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
13803       .addReg(RISCV::X0);
13804   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
13805       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
13806       .addReg(RISCV::X0);
13807 
13808   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
13809       .addReg(HiReg)
13810       .addReg(ReadAgainReg)
13811       .addMBB(LoopMBB);
13812 
13813   LoopMBB->addSuccessor(LoopMBB);
13814   LoopMBB->addSuccessor(DoneMBB);
13815 
13816   MI.eraseFromParent();
13817 
13818   return DoneMBB;
13819 }
13820 
13821 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
13822                                              MachineBasicBlock *BB,
13823                                              const RISCVSubtarget &Subtarget) {
13824   assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
13825           MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
13826          "Unexpected instruction");
13827 
13828   MachineFunction &MF = *BB->getParent();
13829   DebugLoc DL = MI.getDebugLoc();
13830   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
13831   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
13832   Register LoReg = MI.getOperand(0).getReg();
13833   Register HiReg = MI.getOperand(1).getReg();
13834   Register SrcReg = MI.getOperand(2).getReg();
13835 
13836   const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
13837                                          ? &RISCV::GPRPF64RegClass
13838                                          : &RISCV::FPR64RegClass;
13839   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13840 
13841   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
13842                           RI, Register());
13843   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
13844   MachineMemOperand *MMOLo =
13845       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
13846   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
13847       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
13848   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
13849       .addFrameIndex(FI)
13850       .addImm(0)
13851       .addMemOperand(MMOLo);
13852   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
13853       .addFrameIndex(FI)
13854       .addImm(4)
13855       .addMemOperand(MMOHi);
13856   MI.eraseFromParent(); // The pseudo instruction is gone now.
13857   return BB;
13858 }
13859 
13860 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
13861                                                  MachineBasicBlock *BB,
13862                                                  const RISCVSubtarget &Subtarget) {
13863   assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
13864           MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
13865          "Unexpected instruction");
13866 
13867   MachineFunction &MF = *BB->getParent();
13868   DebugLoc DL = MI.getDebugLoc();
13869   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
13870   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
13871   Register DstReg = MI.getOperand(0).getReg();
13872   Register LoReg = MI.getOperand(1).getReg();
13873   Register HiReg = MI.getOperand(2).getReg();
13874 
13875   const TargetRegisterClass *DstRC =
13876       MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
13877                                                       : &RISCV::FPR64RegClass;
13878   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
13879 
13880   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
13881   MachineMemOperand *MMOLo =
13882       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
13883   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
13884       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
13885   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13886       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
13887       .addFrameIndex(FI)
13888       .addImm(0)
13889       .addMemOperand(MMOLo);
13890   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
13891       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
13892       .addFrameIndex(FI)
13893       .addImm(4)
13894       .addMemOperand(MMOHi);
13895   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
13896   MI.eraseFromParent(); // The pseudo instruction is gone now.
13897   return BB;
13898 }
13899 
13900 static bool isSelectPseudo(MachineInstr &MI) {
13901   switch (MI.getOpcode()) {
13902   default:
13903     return false;
13904   case RISCV::Select_GPR_Using_CC_GPR:
13905   case RISCV::Select_FPR16_Using_CC_GPR:
13906   case RISCV::Select_FPR16INX_Using_CC_GPR:
13907   case RISCV::Select_FPR32_Using_CC_GPR:
13908   case RISCV::Select_FPR32INX_Using_CC_GPR:
13909   case RISCV::Select_FPR64_Using_CC_GPR:
13910   case RISCV::Select_FPR64INX_Using_CC_GPR:
13911   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
13912     return true;
13913   }
13914 }
13915 
13916 static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
13917                                         unsigned RelOpcode, unsigned EqOpcode,
13918                                         const RISCVSubtarget &Subtarget) {
13919   DebugLoc DL = MI.getDebugLoc();
13920   Register DstReg = MI.getOperand(0).getReg();
13921   Register Src1Reg = MI.getOperand(1).getReg();
13922   Register Src2Reg = MI.getOperand(2).getReg();
13923   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
13924   Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
13925   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
13926 
13927   // Save the current FFLAGS.
13928   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
13929 
13930   auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
13931                  .addReg(Src1Reg)
13932                  .addReg(Src2Reg);
13933   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
13934     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
13935 
13936   // Restore the FFLAGS.
13937   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
13938       .addReg(SavedFFlags, RegState::Kill);
13939 
13940   // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
13941   auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
13942                   .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
13943                   .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
13944   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
13945     MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
13946 
13947   // Erase the pseudoinstruction.
13948   MI.eraseFromParent();
13949   return BB;
13950 }
13951 
13952 static MachineBasicBlock *
13953 EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
13954                           MachineBasicBlock *ThisMBB,
13955                           const RISCVSubtarget &Subtarget) {
13956   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
13957   // Without this, custom-inserter would have generated:
13958   //
13959   //   A
13960   //   | \
13961   //   |  B
13962   //   | /
13963   //   C
13964   //   | \
13965   //   |  D
13966   //   | /
13967   //   E
13968   //
13969   // A: X = ...; Y = ...
13970   // B: empty
13971   // C: Z = PHI [X, A], [Y, B]
13972   // D: empty
13973   // E: PHI [X, C], [Z, D]
13974   //
13975   // If we lower both Select_FPRX_ in a single step, we can instead generate:
13976   //
13977   //   A
13978   //   | \
13979   //   |  C
13980   //   | /|
13981   //   |/ |
13982   //   |  |
13983   //   |  D
13984   //   | /
13985   //   E
13986   //
13987   // A: X = ...; Y = ...
13988   // D: empty
13989   // E: PHI [X, A], [X, C], [Y, D]
13990 
13991   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
13992   const DebugLoc &DL = First.getDebugLoc();
13993   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
13994   MachineFunction *F = ThisMBB->getParent();
13995   MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
13996   MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
13997   MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13998   MachineFunction::iterator It = ++ThisMBB->getIterator();
13999   F->insert(It, FirstMBB);
14000   F->insert(It, SecondMBB);
14001   F->insert(It, SinkMBB);
14002 
14003   // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
14004   SinkMBB->splice(SinkMBB->begin(), ThisMBB,
14005                   std::next(MachineBasicBlock::iterator(First)),
14006                   ThisMBB->end());
14007   SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
14008 
14009   // Fallthrough block for ThisMBB.
14010   ThisMBB->addSuccessor(FirstMBB);
14011   // Fallthrough block for FirstMBB.
14012   FirstMBB->addSuccessor(SecondMBB);
14013   ThisMBB->addSuccessor(SinkMBB);
14014   FirstMBB->addSuccessor(SinkMBB);
14015   // This is fallthrough.
14016   SecondMBB->addSuccessor(SinkMBB);
14017 
14018   auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
14019   Register FLHS = First.getOperand(1).getReg();
14020   Register FRHS = First.getOperand(2).getReg();
14021   // Insert appropriate branch.
14022   BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
14023       .addReg(FLHS)
14024       .addReg(FRHS)
14025       .addMBB(SinkMBB);
14026 
14027   Register SLHS = Second.getOperand(1).getReg();
14028   Register SRHS = Second.getOperand(2).getReg();
14029   Register Op1Reg4 = First.getOperand(4).getReg();
14030   Register Op1Reg5 = First.getOperand(5).getReg();
14031 
14032   auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
14033   // Insert appropriate branch.
14034   BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
14035       .addReg(SLHS)
14036       .addReg(SRHS)
14037       .addMBB(SinkMBB);
14038 
14039   Register DestReg = Second.getOperand(0).getReg();
14040   Register Op2Reg4 = Second.getOperand(4).getReg();
14041   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
14042       .addReg(Op2Reg4)
14043       .addMBB(ThisMBB)
14044       .addReg(Op1Reg4)
14045       .addMBB(FirstMBB)
14046       .addReg(Op1Reg5)
14047       .addMBB(SecondMBB);
14048 
14049   // Now remove the Select_FPRX_s.
14050   First.eraseFromParent();
14051   Second.eraseFromParent();
14052   return SinkMBB;
14053 }
14054 
14055 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
14056                                            MachineBasicBlock *BB,
14057                                            const RISCVSubtarget &Subtarget) {
14058   // To "insert" Select_* instructions, we actually have to insert the triangle
14059   // control-flow pattern.  The incoming instructions know the destination vreg
14060   // to set, the condition code register to branch on, the true/false values to
14061   // select between, and the condcode to use to select the appropriate branch.
14062   //
14063   // We produce the following control flow:
14064   //     HeadMBB
14065   //     |  \
14066   //     |  IfFalseMBB
14067   //     | /
14068   //    TailMBB
14069   //
14070   // When we find a sequence of selects we attempt to optimize their emission
14071   // by sharing the control flow. Currently we only handle cases where we have
14072   // multiple selects with the exact same condition (same LHS, RHS and CC).
14073   // The selects may be interleaved with other instructions if the other
14074   // instructions meet some requirements we deem safe:
14075   // - They are not pseudo instructions.
14076   // - They are debug instructions. Otherwise,
14077   // - They do not have side-effects, do not access memory and their inputs do
14078   //   not depend on the results of the select pseudo-instructions.
14079   // The TrueV/FalseV operands of the selects cannot depend on the result of
14080   // previous selects in the sequence.
14081   // These conditions could be further relaxed. See the X86 target for a
14082   // related approach and more information.
14083   //
14084   // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
14085   // is checked here and handled by a separate function -
14086   // EmitLoweredCascadedSelect.
14087   Register LHS = MI.getOperand(1).getReg();
14088   Register RHS = MI.getOperand(2).getReg();
14089   auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
14090 
14091   SmallVector<MachineInstr *, 4> SelectDebugValues;
14092   SmallSet<Register, 4> SelectDests;
14093   SelectDests.insert(MI.getOperand(0).getReg());
14094 
14095   MachineInstr *LastSelectPseudo = &MI;
14096   auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
14097   if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
14098       Next->getOpcode() == MI.getOpcode() &&
14099       Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
14100       Next->getOperand(5).isKill()) {
14101     return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
14102   }
14103 
14104   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
14105        SequenceMBBI != E; ++SequenceMBBI) {
14106     if (SequenceMBBI->isDebugInstr())
14107       continue;
14108     if (isSelectPseudo(*SequenceMBBI)) {
14109       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
14110           SequenceMBBI->getOperand(2).getReg() != RHS ||
14111           SequenceMBBI->getOperand(3).getImm() != CC ||
14112           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
14113           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
14114         break;
14115       LastSelectPseudo = &*SequenceMBBI;
14116       SequenceMBBI->collectDebugValues(SelectDebugValues);
14117       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
14118       continue;
14119     }
14120     if (SequenceMBBI->hasUnmodeledSideEffects() ||
14121         SequenceMBBI->mayLoadOrStore() ||
14122         SequenceMBBI->usesCustomInsertionHook())
14123       break;
14124     if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
14125           return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
14126         }))
14127       break;
14128   }
14129 
14130   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14131   const BasicBlock *LLVM_BB = BB->getBasicBlock();
14132   DebugLoc DL = MI.getDebugLoc();
14133   MachineFunction::iterator I = ++BB->getIterator();
14134 
14135   MachineBasicBlock *HeadMBB = BB;
14136   MachineFunction *F = BB->getParent();
14137   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
14138   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
14139 
14140   F->insert(I, IfFalseMBB);
14141   F->insert(I, TailMBB);
14142 
14143   // Transfer debug instructions associated with the selects to TailMBB.
14144   for (MachineInstr *DebugInstr : SelectDebugValues) {
14145     TailMBB->push_back(DebugInstr->removeFromParent());
14146   }
14147 
14148   // Move all instructions after the sequence to TailMBB.
14149   TailMBB->splice(TailMBB->end(), HeadMBB,
14150                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
14151   // Update machine-CFG edges by transferring all successors of the current
14152   // block to the new block which will contain the Phi nodes for the selects.
14153   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
14154   // Set the successors for HeadMBB.
14155   HeadMBB->addSuccessor(IfFalseMBB);
14156   HeadMBB->addSuccessor(TailMBB);
14157 
14158   // Insert appropriate branch.
14159   BuildMI(HeadMBB, DL, TII.getBrCond(CC))
14160     .addReg(LHS)
14161     .addReg(RHS)
14162     .addMBB(TailMBB);
14163 
14164   // IfFalseMBB just falls through to TailMBB.
14165   IfFalseMBB->addSuccessor(TailMBB);
14166 
14167   // Create PHIs for all of the select pseudo-instructions.
14168   auto SelectMBBI = MI.getIterator();
14169   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
14170   auto InsertionPoint = TailMBB->begin();
14171   while (SelectMBBI != SelectEnd) {
14172     auto Next = std::next(SelectMBBI);
14173     if (isSelectPseudo(*SelectMBBI)) {
14174       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
14175       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
14176               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
14177           .addReg(SelectMBBI->getOperand(4).getReg())
14178           .addMBB(HeadMBB)
14179           .addReg(SelectMBBI->getOperand(5).getReg())
14180           .addMBB(IfFalseMBB);
14181       SelectMBBI->eraseFromParent();
14182     }
14183     SelectMBBI = Next;
14184   }
14185 
14186   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
14187   return TailMBB;
14188 }
14189 
14190 static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
14191                                        unsigned Opcode) {
14192   DebugLoc DL = MI.getDebugLoc();
14193 
14194   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
14195 
14196   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
14197   Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14198 
14199   assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
14200   unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
14201 
14202   // Update FRM and save the old value.
14203   BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
14204       .addImm(MI.getOperand(FRMIdx).getImm());
14205 
14206   // Emit an VFCVT with the FRM == DYN
14207   auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
14208 
14209   for (unsigned I = 0; I < MI.getNumOperands(); I++)
14210     if (I != FRMIdx)
14211       MIB = MIB.add(MI.getOperand(I));
14212     else
14213       MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
14214 
14215   MIB.add(MachineOperand::CreateReg(RISCV::FRM,
14216                                     /*IsDef*/ false,
14217                                     /*IsImp*/ true));
14218 
14219   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14220     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14221 
14222   // Restore FRM.
14223   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
14224       .addReg(SavedFRM, RegState::Kill);
14225 
14226   // Erase the pseudoinstruction.
14227   MI.eraseFromParent();
14228   return BB;
14229 }
14230 
14231 static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
14232                                                     MachineBasicBlock *BB,
14233                                                     unsigned CVTXOpc,
14234                                                     unsigned CVTFOpc) {
14235   DebugLoc DL = MI.getDebugLoc();
14236 
14237   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
14238 
14239   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
14240   Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14241 
14242   // Save the old value of FFLAGS.
14243   BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
14244 
14245   assert(MI.getNumOperands() == 7);
14246 
14247   // Emit a VFCVT_X_F
14248   const TargetRegisterInfo *TRI =
14249       BB->getParent()->getSubtarget().getRegisterInfo();
14250   const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
14251   Register Tmp = MRI.createVirtualRegister(RC);
14252   BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
14253       .add(MI.getOperand(1))
14254       .add(MI.getOperand(2))
14255       .add(MI.getOperand(3))
14256       .add(MachineOperand::CreateImm(7)) // frm = DYN
14257       .add(MI.getOperand(4))
14258       .add(MI.getOperand(5))
14259       .add(MI.getOperand(6))
14260       .add(MachineOperand::CreateReg(RISCV::FRM,
14261                                      /*IsDef*/ false,
14262                                      /*IsImp*/ true));
14263 
14264   // Emit a VFCVT_F_X
14265   BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
14266       .add(MI.getOperand(0))
14267       .add(MI.getOperand(1))
14268       .addReg(Tmp)
14269       .add(MI.getOperand(3))
14270       .add(MachineOperand::CreateImm(7)) // frm = DYN
14271       .add(MI.getOperand(4))
14272       .add(MI.getOperand(5))
14273       .add(MI.getOperand(6))
14274       .add(MachineOperand::CreateReg(RISCV::FRM,
14275                                      /*IsDef*/ false,
14276                                      /*IsImp*/ true));
14277 
14278   // Restore FFLAGS.
14279   BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
14280       .addReg(SavedFFLAGS, RegState::Kill);
14281 
14282   // Erase the pseudoinstruction.
14283   MI.eraseFromParent();
14284   return BB;
14285 }
14286 
14287 static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
14288                                      const RISCVSubtarget &Subtarget) {
14289   unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
14290   const TargetRegisterClass *RC;
14291   switch (MI.getOpcode()) {
14292   default:
14293     llvm_unreachable("Unexpected opcode");
14294   case RISCV::PseudoFROUND_H:
14295     CmpOpc = RISCV::FLT_H;
14296     F2IOpc = RISCV::FCVT_W_H;
14297     I2FOpc = RISCV::FCVT_H_W;
14298     FSGNJOpc = RISCV::FSGNJ_H;
14299     FSGNJXOpc = RISCV::FSGNJX_H;
14300     RC = &RISCV::FPR16RegClass;
14301     break;
14302   case RISCV::PseudoFROUND_H_INX:
14303     CmpOpc = RISCV::FLT_H_INX;
14304     F2IOpc = RISCV::FCVT_W_H_INX;
14305     I2FOpc = RISCV::FCVT_H_W_INX;
14306     FSGNJOpc = RISCV::FSGNJ_H_INX;
14307     FSGNJXOpc = RISCV::FSGNJX_H_INX;
14308     RC = &RISCV::GPRF16RegClass;
14309     break;
14310   case RISCV::PseudoFROUND_S:
14311     CmpOpc = RISCV::FLT_S;
14312     F2IOpc = RISCV::FCVT_W_S;
14313     I2FOpc = RISCV::FCVT_S_W;
14314     FSGNJOpc = RISCV::FSGNJ_S;
14315     FSGNJXOpc = RISCV::FSGNJX_S;
14316     RC = &RISCV::FPR32RegClass;
14317     break;
14318   case RISCV::PseudoFROUND_S_INX:
14319     CmpOpc = RISCV::FLT_S_INX;
14320     F2IOpc = RISCV::FCVT_W_S_INX;
14321     I2FOpc = RISCV::FCVT_S_W_INX;
14322     FSGNJOpc = RISCV::FSGNJ_S_INX;
14323     FSGNJXOpc = RISCV::FSGNJX_S_INX;
14324     RC = &RISCV::GPRF32RegClass;
14325     break;
14326   case RISCV::PseudoFROUND_D:
14327     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14328     CmpOpc = RISCV::FLT_D;
14329     F2IOpc = RISCV::FCVT_L_D;
14330     I2FOpc = RISCV::FCVT_D_L;
14331     FSGNJOpc = RISCV::FSGNJ_D;
14332     FSGNJXOpc = RISCV::FSGNJX_D;
14333     RC = &RISCV::FPR64RegClass;
14334     break;
14335   case RISCV::PseudoFROUND_D_INX:
14336     assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
14337     CmpOpc = RISCV::FLT_D_INX;
14338     F2IOpc = RISCV::FCVT_L_D_INX;
14339     I2FOpc = RISCV::FCVT_D_L_INX;
14340     FSGNJOpc = RISCV::FSGNJ_D_INX;
14341     FSGNJXOpc = RISCV::FSGNJX_D_INX;
14342     RC = &RISCV::GPRRegClass;
14343     break;
14344   }
14345 
14346   const BasicBlock *BB = MBB->getBasicBlock();
14347   DebugLoc DL = MI.getDebugLoc();
14348   MachineFunction::iterator I = ++MBB->getIterator();
14349 
14350   MachineFunction *F = MBB->getParent();
14351   MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
14352   MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
14353 
14354   F->insert(I, CvtMBB);
14355   F->insert(I, DoneMBB);
14356   // Move all instructions after the sequence to DoneMBB.
14357   DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
14358                   MBB->end());
14359   // Update machine-CFG edges by transferring all successors of the current
14360   // block to the new block which will contain the Phi nodes for the selects.
14361   DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
14362   // Set the successors for MBB.
14363   MBB->addSuccessor(CvtMBB);
14364   MBB->addSuccessor(DoneMBB);
14365 
14366   Register DstReg = MI.getOperand(0).getReg();
14367   Register SrcReg = MI.getOperand(1).getReg();
14368   Register MaxReg = MI.getOperand(2).getReg();
14369   int64_t FRM = MI.getOperand(3).getImm();
14370 
14371   const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
14372   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
14373 
14374   Register FabsReg = MRI.createVirtualRegister(RC);
14375   BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
14376 
14377   // Compare the FP value to the max value.
14378   Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14379   auto MIB =
14380       BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
14381   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14382     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14383 
14384   // Insert branch.
14385   BuildMI(MBB, DL, TII.get(RISCV::BEQ))
14386       .addReg(CmpReg)
14387       .addReg(RISCV::X0)
14388       .addMBB(DoneMBB);
14389 
14390   CvtMBB->addSuccessor(DoneMBB);
14391 
14392   // Convert to integer.
14393   Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
14394   MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
14395   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14396     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14397 
14398   // Convert back to FP.
14399   Register I2FReg = MRI.createVirtualRegister(RC);
14400   MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
14401   if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
14402     MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
14403 
14404   // Restore the sign bit.
14405   Register CvtReg = MRI.createVirtualRegister(RC);
14406   BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
14407 
14408   // Merge the results.
14409   BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
14410       .addReg(SrcReg)
14411       .addMBB(MBB)
14412       .addReg(CvtReg)
14413       .addMBB(CvtMBB);
14414 
14415   MI.eraseFromParent();
14416   return DoneMBB;
14417 }
14418 
14419 MachineBasicBlock *
14420 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
14421                                                  MachineBasicBlock *BB) const {
14422   switch (MI.getOpcode()) {
14423   default:
14424     llvm_unreachable("Unexpected instr type to insert");
14425   case RISCV::ReadCycleWide:
14426     assert(!Subtarget.is64Bit() &&
14427            "ReadCycleWrite is only to be used on riscv32");
14428     return emitReadCycleWidePseudo(MI, BB);
14429   case RISCV::Select_GPR_Using_CC_GPR:
14430   case RISCV::Select_FPR16_Using_CC_GPR:
14431   case RISCV::Select_FPR16INX_Using_CC_GPR:
14432   case RISCV::Select_FPR32_Using_CC_GPR:
14433   case RISCV::Select_FPR32INX_Using_CC_GPR:
14434   case RISCV::Select_FPR64_Using_CC_GPR:
14435   case RISCV::Select_FPR64INX_Using_CC_GPR:
14436   case RISCV::Select_FPR64IN32X_Using_CC_GPR:
14437     return emitSelectPseudo(MI, BB, Subtarget);
14438   case RISCV::BuildPairF64Pseudo:
14439   case RISCV::BuildPairF64Pseudo_INX:
14440     return emitBuildPairF64Pseudo(MI, BB, Subtarget);
14441   case RISCV::SplitF64Pseudo:
14442   case RISCV::SplitF64Pseudo_INX:
14443     return emitSplitF64Pseudo(MI, BB, Subtarget);
14444   case RISCV::PseudoQuietFLE_H:
14445     return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
14446   case RISCV::PseudoQuietFLE_H_INX:
14447     return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
14448   case RISCV::PseudoQuietFLT_H:
14449     return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
14450   case RISCV::PseudoQuietFLT_H_INX:
14451     return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
14452   case RISCV::PseudoQuietFLE_S:
14453     return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
14454   case RISCV::PseudoQuietFLE_S_INX:
14455     return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
14456   case RISCV::PseudoQuietFLT_S:
14457     return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
14458   case RISCV::PseudoQuietFLT_S_INX:
14459     return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
14460   case RISCV::PseudoQuietFLE_D:
14461     return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
14462   case RISCV::PseudoQuietFLE_D_INX:
14463     return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
14464   case RISCV::PseudoQuietFLE_D_IN32X:
14465     return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
14466                          Subtarget);
14467   case RISCV::PseudoQuietFLT_D:
14468     return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
14469   case RISCV::PseudoQuietFLT_D_INX:
14470     return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
14471   case RISCV::PseudoQuietFLT_D_IN32X:
14472     return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
14473                          Subtarget);
14474 
14475 #define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL)                             \
14476   case RISCV::RMOpc##_##LMUL:                                                  \
14477     return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL);                          \
14478   case RISCV::RMOpc##_##LMUL##_MASK:                                           \
14479     return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
14480 
14481 #define PseudoVFCVT_RM_CASE(RMOpc, Opc)                                        \
14482   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1)                                     \
14483   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2)                                     \
14484   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4)                                     \
14485   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2)                                    \
14486   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
14487 
14488 #define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc)                                     \
14489   PseudoVFCVT_RM_CASE(RMOpc, Opc)                                              \
14490   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
14491 
14492 #define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc)                                    \
14493   PseudoVFCVT_RM_CASE(RMOpc, Opc)                                              \
14494   PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
14495 
14496   // VFCVT
14497   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
14498   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
14499   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
14500   PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
14501 
14502   // VFWCVT
14503   PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
14504   PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
14505 
14506   // VFNCVT
14507   PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
14508   PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
14509   PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
14510   PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
14511 
14512   case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
14513     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
14514                                      RISCV::PseudoVFCVT_F_X_V_M1_MASK);
14515   case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
14516     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
14517                                      RISCV::PseudoVFCVT_F_X_V_M2_MASK);
14518   case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
14519     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
14520                                      RISCV::PseudoVFCVT_F_X_V_M4_MASK);
14521   case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
14522     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
14523                                      RISCV::PseudoVFCVT_F_X_V_M8_MASK);
14524   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
14525     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
14526                                      RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
14527   case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
14528     return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
14529                                      RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
14530   case RISCV::PseudoFROUND_H:
14531   case RISCV::PseudoFROUND_H_INX:
14532   case RISCV::PseudoFROUND_S:
14533   case RISCV::PseudoFROUND_S_INX:
14534   case RISCV::PseudoFROUND_D:
14535   case RISCV::PseudoFROUND_D_INX:
14536   case RISCV::PseudoFROUND_D_IN32X:
14537     return emitFROUND(MI, BB, Subtarget);
14538   }
14539 }
14540 
14541 // Returns the index to the rounding mode immediate value if any, otherwise the
14542 // function will return None.
14543 static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
14544   uint64_t TSFlags = MI.getDesc().TSFlags;
14545   if (!RISCVII::hasRoundModeOp(TSFlags))
14546     return std::nullopt;
14547 
14548   // The operand order
14549   // -------------------------------------
14550   // | n-1 (if any)   | n-2  | n-3 | n-4 |
14551   // | policy         | sew  | vl  | rm  |
14552   // -------------------------------------
14553   return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
14554 }
14555 
14556 void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
14557                                                         SDNode *Node) const {
14558   // Add FRM dependency to vector floating-point instructions with dynamic
14559   // rounding mode.
14560   if (auto RoundModeIdx = getRoundModeIdx(MI)) {
14561     unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
14562     if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
14563       MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
14564                                               /*isImp*/ true));
14565     }
14566   }
14567 
14568   // Add FRM dependency to any instructions with dynamic rounding mode.
14569   unsigned Opc = MI.getOpcode();
14570   auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
14571   if (Idx < 0)
14572     return;
14573   if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
14574     return;
14575   // If the instruction already reads FRM, don't add another read.
14576   if (MI.readsRegister(RISCV::FRM))
14577     return;
14578   MI.addOperand(
14579       MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
14580 }
14581 
14582 // Calling Convention Implementation.
14583 // The expectations for frontend ABI lowering vary from target to target.
14584 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
14585 // details, but this is a longer term goal. For now, we simply try to keep the
14586 // role of the frontend as simple and well-defined as possible. The rules can
14587 // be summarised as:
14588 // * Never split up large scalar arguments. We handle them here.
14589 // * If a hardfloat calling convention is being used, and the struct may be
14590 // passed in a pair of registers (fp+fp, int+fp), and both registers are
14591 // available, then pass as two separate arguments. If either the GPRs or FPRs
14592 // are exhausted, then pass according to the rule below.
14593 // * If a struct could never be passed in registers or directly in a stack
14594 // slot (as it is larger than 2*XLEN and the floating point rules don't
14595 // apply), then pass it using a pointer with the byval attribute.
14596 // * If a struct is less than 2*XLEN, then coerce to either a two-element
14597 // word-sized array or a 2*XLEN scalar (depending on alignment).
14598 // * The frontend can determine whether a struct is returned by reference or
14599 // not based on its size and fields. If it will be returned by reference, the
14600 // frontend must modify the prototype so a pointer with the sret annotation is
14601 // passed as the first argument. This is not necessary for large scalar
14602 // returns.
14603 // * Struct return values and varargs should be coerced to structs containing
14604 // register-size fields in the same situations they would be for fixed
14605 // arguments.
14606 
14607 static const MCPhysReg ArgGPRs[] = {
14608   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
14609   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
14610 };
14611 static const MCPhysReg ArgFPR16s[] = {
14612   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
14613   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
14614 };
14615 static const MCPhysReg ArgFPR32s[] = {
14616   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
14617   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
14618 };
14619 static const MCPhysReg ArgFPR64s[] = {
14620   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
14621   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
14622 };
14623 // This is an interim calling convention and it may be changed in the future.
14624 static const MCPhysReg ArgVRs[] = {
14625     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
14626     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
14627     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
14628 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
14629                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
14630                                      RISCV::V20M2, RISCV::V22M2};
14631 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
14632                                      RISCV::V20M4};
14633 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
14634 
14635 // Pass a 2*XLEN argument that has been split into two XLEN values through
14636 // registers or the stack as necessary.
14637 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
14638                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
14639                                 MVT ValVT2, MVT LocVT2,
14640                                 ISD::ArgFlagsTy ArgFlags2) {
14641   unsigned XLenInBytes = XLen / 8;
14642   if (Register Reg = State.AllocateReg(ArgGPRs)) {
14643     // At least one half can be passed via register.
14644     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
14645                                      VA1.getLocVT(), CCValAssign::Full));
14646   } else {
14647     // Both halves must be passed on the stack, with proper alignment.
14648     Align StackAlign =
14649         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
14650     State.addLoc(
14651         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
14652                             State.AllocateStack(XLenInBytes, StackAlign),
14653                             VA1.getLocVT(), CCValAssign::Full));
14654     State.addLoc(CCValAssign::getMem(
14655         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14656         LocVT2, CCValAssign::Full));
14657     return false;
14658   }
14659 
14660   if (Register Reg = State.AllocateReg(ArgGPRs)) {
14661     // The second half can also be passed via register.
14662     State.addLoc(
14663         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
14664   } else {
14665     // The second half is passed via the stack, without additional alignment.
14666     State.addLoc(CCValAssign::getMem(
14667         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
14668         LocVT2, CCValAssign::Full));
14669   }
14670 
14671   return false;
14672 }
14673 
14674 static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
14675                                std::optional<unsigned> FirstMaskArgument,
14676                                CCState &State, const RISCVTargetLowering &TLI) {
14677   const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
14678   if (RC == &RISCV::VRRegClass) {
14679     // Assign the first mask argument to V0.
14680     // This is an interim calling convention and it may be changed in the
14681     // future.
14682     if (FirstMaskArgument && ValNo == *FirstMaskArgument)
14683       return State.AllocateReg(RISCV::V0);
14684     return State.AllocateReg(ArgVRs);
14685   }
14686   if (RC == &RISCV::VRM2RegClass)
14687     return State.AllocateReg(ArgVRM2s);
14688   if (RC == &RISCV::VRM4RegClass)
14689     return State.AllocateReg(ArgVRM4s);
14690   if (RC == &RISCV::VRM8RegClass)
14691     return State.AllocateReg(ArgVRM8s);
14692   llvm_unreachable("Unhandled register class for ValueType");
14693 }
14694 
14695 // Implements the RISC-V calling convention. Returns true upon failure.
14696 bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
14697                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
14698                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
14699                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
14700                      std::optional<unsigned> FirstMaskArgument) {
14701   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
14702   assert(XLen == 32 || XLen == 64);
14703   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
14704 
14705   // Static chain parameter must not be passed in normal argument registers,
14706   // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
14707   if (ArgFlags.isNest()) {
14708     if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
14709       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14710       return false;
14711     }
14712   }
14713 
14714   // Any return value split in to more than two values can't be returned
14715   // directly. Vectors are returned via the available vector registers.
14716   if (!LocVT.isVector() && IsRet && ValNo > 1)
14717     return true;
14718 
14719   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
14720   // variadic argument, or if no F16/F32 argument registers are available.
14721   bool UseGPRForF16_F32 = true;
14722   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
14723   // variadic argument, or if no F64 argument registers are available.
14724   bool UseGPRForF64 = true;
14725 
14726   switch (ABI) {
14727   default:
14728     llvm_unreachable("Unexpected ABI");
14729   case RISCVABI::ABI_ILP32:
14730   case RISCVABI::ABI_LP64:
14731     break;
14732   case RISCVABI::ABI_ILP32F:
14733   case RISCVABI::ABI_LP64F:
14734     UseGPRForF16_F32 = !IsFixed;
14735     break;
14736   case RISCVABI::ABI_ILP32D:
14737   case RISCVABI::ABI_LP64D:
14738     UseGPRForF16_F32 = !IsFixed;
14739     UseGPRForF64 = !IsFixed;
14740     break;
14741   }
14742 
14743   // FPR16, FPR32, and FPR64 alias each other.
14744   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
14745     UseGPRForF16_F32 = true;
14746     UseGPRForF64 = true;
14747   }
14748 
14749   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
14750   // similar local variables rather than directly checking against the target
14751   // ABI.
14752 
14753   if (UseGPRForF16_F32 &&
14754       (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
14755     LocVT = XLenVT;
14756     LocInfo = CCValAssign::BCvt;
14757   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
14758     LocVT = MVT::i64;
14759     LocInfo = CCValAssign::BCvt;
14760   }
14761 
14762   // If this is a variadic argument, the RISC-V calling convention requires
14763   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
14764   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
14765   // be used regardless of whether the original argument was split during
14766   // legalisation or not. The argument will not be passed by registers if the
14767   // original type is larger than 2*XLEN, so the register alignment rule does
14768   // not apply.
14769   unsigned TwoXLenInBytes = (2 * XLen) / 8;
14770   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
14771       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
14772     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
14773     // Skip 'odd' register if necessary.
14774     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
14775       State.AllocateReg(ArgGPRs);
14776   }
14777 
14778   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
14779   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
14780       State.getPendingArgFlags();
14781 
14782   assert(PendingLocs.size() == PendingArgFlags.size() &&
14783          "PendingLocs and PendingArgFlags out of sync");
14784 
14785   // Handle passing f64 on RV32D with a soft float ABI or when floating point
14786   // registers are exhausted.
14787   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
14788     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
14789            "Can't lower f64 if it is split");
14790     // Depending on available argument GPRS, f64 may be passed in a pair of
14791     // GPRs, split between a GPR and the stack, or passed completely on the
14792     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
14793     // cases.
14794     Register Reg = State.AllocateReg(ArgGPRs);
14795     LocVT = MVT::i32;
14796     if (!Reg) {
14797       unsigned StackOffset = State.AllocateStack(8, Align(8));
14798       State.addLoc(
14799           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14800       return false;
14801     }
14802     if (!State.AllocateReg(ArgGPRs))
14803       State.AllocateStack(4, Align(4));
14804     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14805     return false;
14806   }
14807 
14808   // Fixed-length vectors are located in the corresponding scalable-vector
14809   // container types.
14810   if (ValVT.isFixedLengthVector())
14811     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
14812 
14813   // Split arguments might be passed indirectly, so keep track of the pending
14814   // values. Split vectors are passed via a mix of registers and indirectly, so
14815   // treat them as we would any other argument.
14816   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
14817     LocVT = XLenVT;
14818     LocInfo = CCValAssign::Indirect;
14819     PendingLocs.push_back(
14820         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
14821     PendingArgFlags.push_back(ArgFlags);
14822     if (!ArgFlags.isSplitEnd()) {
14823       return false;
14824     }
14825   }
14826 
14827   // If the split argument only had two elements, it should be passed directly
14828   // in registers or on the stack.
14829   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
14830       PendingLocs.size() <= 2) {
14831     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
14832     // Apply the normal calling convention rules to the first half of the
14833     // split argument.
14834     CCValAssign VA = PendingLocs[0];
14835     ISD::ArgFlagsTy AF = PendingArgFlags[0];
14836     PendingLocs.clear();
14837     PendingArgFlags.clear();
14838     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
14839                                ArgFlags);
14840   }
14841 
14842   // Allocate to a register if possible, or else a stack slot.
14843   Register Reg;
14844   unsigned StoreSizeBytes = XLen / 8;
14845   Align StackAlign = Align(XLen / 8);
14846 
14847   if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
14848     Reg = State.AllocateReg(ArgFPR16s);
14849   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
14850     Reg = State.AllocateReg(ArgFPR32s);
14851   else if (ValVT == MVT::f64 && !UseGPRForF64)
14852     Reg = State.AllocateReg(ArgFPR64s);
14853   else if (ValVT.isVector()) {
14854     Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
14855     if (!Reg) {
14856       // For return values, the vector must be passed fully via registers or
14857       // via the stack.
14858       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
14859       // but we're using all of them.
14860       if (IsRet)
14861         return true;
14862       // Try using a GPR to pass the address
14863       if ((Reg = State.AllocateReg(ArgGPRs))) {
14864         LocVT = XLenVT;
14865         LocInfo = CCValAssign::Indirect;
14866       } else if (ValVT.isScalableVector()) {
14867         LocVT = XLenVT;
14868         LocInfo = CCValAssign::Indirect;
14869       } else {
14870         // Pass fixed-length vectors on the stack.
14871         LocVT = ValVT;
14872         StoreSizeBytes = ValVT.getStoreSize();
14873         // Align vectors to their element sizes, being careful for vXi1
14874         // vectors.
14875         StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
14876       }
14877     }
14878   } else {
14879     Reg = State.AllocateReg(ArgGPRs);
14880   }
14881 
14882   unsigned StackOffset =
14883       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
14884 
14885   // If we reach this point and PendingLocs is non-empty, we must be at the
14886   // end of a split argument that must be passed indirectly.
14887   if (!PendingLocs.empty()) {
14888     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
14889     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
14890 
14891     for (auto &It : PendingLocs) {
14892       if (Reg)
14893         It.convertToReg(Reg);
14894       else
14895         It.convertToMem(StackOffset);
14896       State.addLoc(It);
14897     }
14898     PendingLocs.clear();
14899     PendingArgFlags.clear();
14900     return false;
14901   }
14902 
14903   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
14904           (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
14905          "Expected an XLenVT or vector types at this stage");
14906 
14907   if (Reg) {
14908     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
14909     return false;
14910   }
14911 
14912   // When a scalar floating-point value is passed on the stack, no
14913   // bit-conversion is needed.
14914   if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
14915     assert(!ValVT.isVector());
14916     LocVT = ValVT;
14917     LocInfo = CCValAssign::Full;
14918   }
14919   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
14920   return false;
14921 }
14922 
14923 template <typename ArgTy>
14924 static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
14925   for (const auto &ArgIdx : enumerate(Args)) {
14926     MVT ArgVT = ArgIdx.value().VT;
14927     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
14928       return ArgIdx.index();
14929   }
14930   return std::nullopt;
14931 }
14932 
14933 void RISCVTargetLowering::analyzeInputArgs(
14934     MachineFunction &MF, CCState &CCInfo,
14935     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
14936     RISCVCCAssignFn Fn) const {
14937   unsigned NumArgs = Ins.size();
14938   FunctionType *FType = MF.getFunction().getFunctionType();
14939 
14940   std::optional<unsigned> FirstMaskArgument;
14941   if (Subtarget.hasVInstructions())
14942     FirstMaskArgument = preAssignMask(Ins);
14943 
14944   for (unsigned i = 0; i != NumArgs; ++i) {
14945     MVT ArgVT = Ins[i].VT;
14946     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
14947 
14948     Type *ArgTy = nullptr;
14949     if (IsRet)
14950       ArgTy = FType->getReturnType();
14951     else if (Ins[i].isOrigArg())
14952       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
14953 
14954     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
14955     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14956            ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
14957            FirstMaskArgument)) {
14958       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
14959                         << ArgVT << '\n');
14960       llvm_unreachable(nullptr);
14961     }
14962   }
14963 }
14964 
14965 void RISCVTargetLowering::analyzeOutputArgs(
14966     MachineFunction &MF, CCState &CCInfo,
14967     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
14968     CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
14969   unsigned NumArgs = Outs.size();
14970 
14971   std::optional<unsigned> FirstMaskArgument;
14972   if (Subtarget.hasVInstructions())
14973     FirstMaskArgument = preAssignMask(Outs);
14974 
14975   for (unsigned i = 0; i != NumArgs; i++) {
14976     MVT ArgVT = Outs[i].VT;
14977     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
14978     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
14979 
14980     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
14981     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
14982            ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
14983            FirstMaskArgument)) {
14984       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
14985                         << ArgVT << "\n");
14986       llvm_unreachable(nullptr);
14987     }
14988   }
14989 }
14990 
14991 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
14992 // values.
14993 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
14994                                    const CCValAssign &VA, const SDLoc &DL,
14995                                    const RISCVSubtarget &Subtarget) {
14996   switch (VA.getLocInfo()) {
14997   default:
14998     llvm_unreachable("Unexpected CCValAssign::LocInfo");
14999   case CCValAssign::Full:
15000     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
15001       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
15002     break;
15003   case CCValAssign::BCvt:
15004     if (VA.getLocVT().isInteger() &&
15005         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15006       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
15007     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15008       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
15009     else
15010       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
15011     break;
15012   }
15013   return Val;
15014 }
15015 
15016 // The caller is responsible for loading the full value if the argument is
15017 // passed with CCValAssign::Indirect.
15018 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
15019                                 const CCValAssign &VA, const SDLoc &DL,
15020                                 const ISD::InputArg &In,
15021                                 const RISCVTargetLowering &TLI) {
15022   MachineFunction &MF = DAG.getMachineFunction();
15023   MachineRegisterInfo &RegInfo = MF.getRegInfo();
15024   EVT LocVT = VA.getLocVT();
15025   SDValue Val;
15026   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
15027   Register VReg = RegInfo.createVirtualRegister(RC);
15028   RegInfo.addLiveIn(VA.getLocReg(), VReg);
15029   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
15030 
15031   // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
15032   if (In.isOrigArg()) {
15033     Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
15034     if (OrigArg->getType()->isIntegerTy()) {
15035       unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
15036       // An input zero extended from i31 can also be considered sign extended.
15037       if ((BitWidth <= 32 && In.Flags.isSExt()) ||
15038           (BitWidth < 32 && In.Flags.isZExt())) {
15039         RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
15040         RVFI->addSExt32Register(VReg);
15041       }
15042     }
15043   }
15044 
15045   if (VA.getLocInfo() == CCValAssign::Indirect)
15046     return Val;
15047 
15048   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
15049 }
15050 
15051 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
15052                                    const CCValAssign &VA, const SDLoc &DL,
15053                                    const RISCVSubtarget &Subtarget) {
15054   EVT LocVT = VA.getLocVT();
15055 
15056   switch (VA.getLocInfo()) {
15057   default:
15058     llvm_unreachable("Unexpected CCValAssign::LocInfo");
15059   case CCValAssign::Full:
15060     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
15061       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
15062     break;
15063   case CCValAssign::BCvt:
15064     if (VA.getLocVT().isInteger() &&
15065         (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
15066       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
15067     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
15068       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
15069     else
15070       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
15071     break;
15072   }
15073   return Val;
15074 }
15075 
15076 // The caller is responsible for loading the full value if the argument is
15077 // passed with CCValAssign::Indirect.
15078 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
15079                                 const CCValAssign &VA, const SDLoc &DL) {
15080   MachineFunction &MF = DAG.getMachineFunction();
15081   MachineFrameInfo &MFI = MF.getFrameInfo();
15082   EVT LocVT = VA.getLocVT();
15083   EVT ValVT = VA.getValVT();
15084   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
15085   if (ValVT.isScalableVector()) {
15086     // When the value is a scalable vector, we save the pointer which points to
15087     // the scalable vector value in the stack. The ValVT will be the pointer
15088     // type, instead of the scalable vector type.
15089     ValVT = LocVT;
15090   }
15091   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
15092                                  /*IsImmutable=*/true);
15093   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
15094   SDValue Val;
15095 
15096   ISD::LoadExtType ExtType;
15097   switch (VA.getLocInfo()) {
15098   default:
15099     llvm_unreachable("Unexpected CCValAssign::LocInfo");
15100   case CCValAssign::Full:
15101   case CCValAssign::Indirect:
15102   case CCValAssign::BCvt:
15103     ExtType = ISD::NON_EXTLOAD;
15104     break;
15105   }
15106   Val = DAG.getExtLoad(
15107       ExtType, DL, LocVT, Chain, FIN,
15108       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
15109   return Val;
15110 }
15111 
15112 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
15113                                        const CCValAssign &VA, const SDLoc &DL) {
15114   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
15115          "Unexpected VA");
15116   MachineFunction &MF = DAG.getMachineFunction();
15117   MachineFrameInfo &MFI = MF.getFrameInfo();
15118   MachineRegisterInfo &RegInfo = MF.getRegInfo();
15119 
15120   if (VA.isMemLoc()) {
15121     // f64 is passed on the stack.
15122     int FI =
15123         MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
15124     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15125     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
15126                        MachinePointerInfo::getFixedStack(MF, FI));
15127   }
15128 
15129   assert(VA.isRegLoc() && "Expected register VA assignment");
15130 
15131   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15132   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
15133   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
15134   SDValue Hi;
15135   if (VA.getLocReg() == RISCV::X17) {
15136     // Second half of f64 is passed on the stack.
15137     int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
15138     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
15139     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
15140                      MachinePointerInfo::getFixedStack(MF, FI));
15141   } else {
15142     // Second half of f64 is passed in another GPR.
15143     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
15144     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
15145     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
15146   }
15147   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
15148 }
15149 
15150 // FastCC has less than 1% performance improvement for some particular
15151 // benchmark. But theoretically, it may has benenfit for some cases.
15152 bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
15153                             unsigned ValNo, MVT ValVT, MVT LocVT,
15154                             CCValAssign::LocInfo LocInfo,
15155                             ISD::ArgFlagsTy ArgFlags, CCState &State,
15156                             bool IsFixed, bool IsRet, Type *OrigTy,
15157                             const RISCVTargetLowering &TLI,
15158                             std::optional<unsigned> FirstMaskArgument) {
15159 
15160   // X5 and X6 might be used for save-restore libcall.
15161   static const MCPhysReg GPRList[] = {
15162       RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
15163       RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
15164       RISCV::X29, RISCV::X30, RISCV::X31};
15165 
15166   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15167     if (unsigned Reg = State.AllocateReg(GPRList)) {
15168       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15169       return false;
15170     }
15171   }
15172 
15173   const RISCVSubtarget &Subtarget = TLI.getSubtarget();
15174 
15175   if (LocVT == MVT::f16 &&
15176       (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
15177     static const MCPhysReg FPR16List[] = {
15178         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
15179         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
15180         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
15181         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
15182     if (unsigned Reg = State.AllocateReg(FPR16List)) {
15183       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15184       return false;
15185     }
15186   }
15187 
15188   if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15189     static const MCPhysReg FPR32List[] = {
15190         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
15191         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
15192         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
15193         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
15194     if (unsigned Reg = State.AllocateReg(FPR32List)) {
15195       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15196       return false;
15197     }
15198   }
15199 
15200   if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15201     static const MCPhysReg FPR64List[] = {
15202         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
15203         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
15204         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
15205         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
15206     if (unsigned Reg = State.AllocateReg(FPR64List)) {
15207       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15208       return false;
15209     }
15210   }
15211 
15212   // Check if there is an available GPR before hitting the stack.
15213   if ((LocVT == MVT::f16 &&
15214        (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
15215       (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15216       (LocVT == MVT::f64 && Subtarget.is64Bit() &&
15217        Subtarget.hasStdExtZdinx())) {
15218     if (unsigned Reg = State.AllocateReg(GPRList)) {
15219       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15220       return false;
15221     }
15222   }
15223 
15224   if (LocVT == MVT::f16) {
15225     unsigned Offset2 = State.AllocateStack(2, Align(2));
15226     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
15227     return false;
15228   }
15229 
15230   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
15231     unsigned Offset4 = State.AllocateStack(4, Align(4));
15232     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
15233     return false;
15234   }
15235 
15236   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
15237     unsigned Offset5 = State.AllocateStack(8, Align(8));
15238     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
15239     return false;
15240   }
15241 
15242   if (LocVT.isVector()) {
15243     if (unsigned Reg =
15244             allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
15245       // Fixed-length vectors are located in the corresponding scalable-vector
15246       // container types.
15247       if (ValVT.isFixedLengthVector())
15248         LocVT = TLI.getContainerForFixedLengthVector(LocVT);
15249       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15250     } else {
15251       // Try and pass the address via a "fast" GPR.
15252       if (unsigned GPRReg = State.AllocateReg(GPRList)) {
15253         LocInfo = CCValAssign::Indirect;
15254         LocVT = TLI.getSubtarget().getXLenVT();
15255         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
15256       } else if (ValVT.isFixedLengthVector()) {
15257         auto StackAlign =
15258             MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
15259         unsigned StackOffset =
15260             State.AllocateStack(ValVT.getStoreSize(), StackAlign);
15261         State.addLoc(
15262             CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
15263       } else {
15264         // Can't pass scalable vectors on the stack.
15265         return true;
15266       }
15267     }
15268 
15269     return false;
15270   }
15271 
15272   return true; // CC didn't match.
15273 }
15274 
15275 bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
15276                          CCValAssign::LocInfo LocInfo,
15277                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
15278   if (ArgFlags.isNest()) {
15279     report_fatal_error(
15280         "Attribute 'nest' is not supported in GHC calling convention");
15281   }
15282 
15283   static const MCPhysReg GPRList[] = {
15284       RISCV::X9,  RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
15285       RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
15286 
15287   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
15288     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
15289     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
15290     if (unsigned Reg = State.AllocateReg(GPRList)) {
15291       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15292       return false;
15293     }
15294   }
15295 
15296   const RISCVSubtarget &Subtarget =
15297       State.getMachineFunction().getSubtarget<RISCVSubtarget>();
15298 
15299   if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
15300     // Pass in STG registers: F1, ..., F6
15301     //                        fs0 ... fs5
15302     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
15303                                           RISCV::F18_F, RISCV::F19_F,
15304                                           RISCV::F20_F, RISCV::F21_F};
15305     if (unsigned Reg = State.AllocateReg(FPR32List)) {
15306       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15307       return false;
15308     }
15309   }
15310 
15311   if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
15312     // Pass in STG registers: D1, ..., D6
15313     //                        fs6 ... fs11
15314     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
15315                                           RISCV::F24_D, RISCV::F25_D,
15316                                           RISCV::F26_D, RISCV::F27_D};
15317     if (unsigned Reg = State.AllocateReg(FPR64List)) {
15318       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15319       return false;
15320     }
15321   }
15322 
15323   if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
15324       (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
15325        Subtarget.is64Bit())) {
15326     if (unsigned Reg = State.AllocateReg(GPRList)) {
15327       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
15328       return false;
15329     }
15330   }
15331 
15332   report_fatal_error("No registers left in GHC calling convention");
15333   return true;
15334 }
15335 
15336 // Transform physical registers into virtual registers.
15337 SDValue RISCVTargetLowering::LowerFormalArguments(
15338     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
15339     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
15340     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
15341 
15342   MachineFunction &MF = DAG.getMachineFunction();
15343 
15344   switch (CallConv) {
15345   default:
15346     report_fatal_error("Unsupported calling convention");
15347   case CallingConv::C:
15348   case CallingConv::Fast:
15349     break;
15350   case CallingConv::GHC:
15351     if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
15352       report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
15353                          "(Zdinx/D) instruction set extensions");
15354   }
15355 
15356   const Function &Func = MF.getFunction();
15357   if (Func.hasFnAttribute("interrupt")) {
15358     if (!Func.arg_empty())
15359       report_fatal_error(
15360         "Functions with the interrupt attribute cannot have arguments!");
15361 
15362     StringRef Kind =
15363       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15364 
15365     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
15366       report_fatal_error(
15367         "Function interrupt attribute argument not supported!");
15368   }
15369 
15370   EVT PtrVT = getPointerTy(DAG.getDataLayout());
15371   MVT XLenVT = Subtarget.getXLenVT();
15372   unsigned XLenInBytes = Subtarget.getXLen() / 8;
15373   // Used with vargs to acumulate store chains.
15374   std::vector<SDValue> OutChains;
15375 
15376   // Assign locations to all of the incoming arguments.
15377   SmallVector<CCValAssign, 16> ArgLocs;
15378   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15379 
15380   if (CallConv == CallingConv::GHC)
15381     CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
15382   else
15383     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
15384                      CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
15385                                                    : RISCV::CC_RISCV);
15386 
15387   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
15388     CCValAssign &VA = ArgLocs[i];
15389     SDValue ArgValue;
15390     // Passing f64 on RV32D with a soft float ABI must be handled as a special
15391     // case.
15392     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
15393       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
15394     else if (VA.isRegLoc())
15395       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
15396     else
15397       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
15398 
15399     if (VA.getLocInfo() == CCValAssign::Indirect) {
15400       // If the original argument was split and passed by reference (e.g. i128
15401       // on RV32), we need to load all parts of it here (using the same
15402       // address). Vectors may be partly split to registers and partly to the
15403       // stack, in which case the base address is partly offset and subsequent
15404       // stores are relative to that.
15405       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
15406                                    MachinePointerInfo()));
15407       unsigned ArgIndex = Ins[i].OrigArgIndex;
15408       unsigned ArgPartOffset = Ins[i].PartOffset;
15409       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15410       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
15411         CCValAssign &PartVA = ArgLocs[i + 1];
15412         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
15413         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15414         if (PartVA.getValVT().isScalableVector())
15415           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15416         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
15417         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
15418                                      MachinePointerInfo()));
15419         ++i;
15420       }
15421       continue;
15422     }
15423     InVals.push_back(ArgValue);
15424   }
15425 
15426   if (any_of(ArgLocs,
15427              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15428     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
15429 
15430   if (IsVarArg) {
15431     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
15432     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
15433     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
15434     MachineFrameInfo &MFI = MF.getFrameInfo();
15435     MachineRegisterInfo &RegInfo = MF.getRegInfo();
15436     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
15437 
15438     // Offset of the first variable argument from stack pointer, and size of
15439     // the vararg save area. For now, the varargs save area is either zero or
15440     // large enough to hold a0-a7.
15441     int VaArgOffset, VarArgsSaveSize;
15442 
15443     // If all registers are allocated, then all varargs must be passed on the
15444     // stack and we don't need to save any argregs.
15445     if (ArgRegs.size() == Idx) {
15446       VaArgOffset = CCInfo.getStackSize();
15447       VarArgsSaveSize = 0;
15448     } else {
15449       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
15450       VaArgOffset = -VarArgsSaveSize;
15451     }
15452 
15453     // Record the frame index of the first variable argument
15454     // which is a value necessary to VASTART.
15455     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15456     RVFI->setVarArgsFrameIndex(FI);
15457 
15458     // If saving an odd number of registers then create an extra stack slot to
15459     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
15460     // offsets to even-numbered registered remain 2*XLEN-aligned.
15461     if (Idx % 2) {
15462       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
15463       VarArgsSaveSize += XLenInBytes;
15464     }
15465 
15466     // Copy the integer registers that may have been used for passing varargs
15467     // to the vararg save area.
15468     for (unsigned I = Idx; I < ArgRegs.size();
15469          ++I, VaArgOffset += XLenInBytes) {
15470       const Register Reg = RegInfo.createVirtualRegister(RC);
15471       RegInfo.addLiveIn(ArgRegs[I], Reg);
15472       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
15473       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
15474       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15475       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
15476                                    MachinePointerInfo::getFixedStack(MF, FI));
15477       cast<StoreSDNode>(Store.getNode())
15478           ->getMemOperand()
15479           ->setValue((Value *)nullptr);
15480       OutChains.push_back(Store);
15481     }
15482     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
15483   }
15484 
15485   // All stores are grouped in one node to allow the matching between
15486   // the size of Ins and InVals. This only happens for vararg functions.
15487   if (!OutChains.empty()) {
15488     OutChains.push_back(Chain);
15489     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
15490   }
15491 
15492   return Chain;
15493 }
15494 
15495 /// isEligibleForTailCallOptimization - Check whether the call is eligible
15496 /// for tail call optimization.
15497 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
15498 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
15499     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
15500     const SmallVector<CCValAssign, 16> &ArgLocs) const {
15501 
15502   auto CalleeCC = CLI.CallConv;
15503   auto &Outs = CLI.Outs;
15504   auto &Caller = MF.getFunction();
15505   auto CallerCC = Caller.getCallingConv();
15506 
15507   // Exception-handling functions need a special set of instructions to
15508   // indicate a return to the hardware. Tail-calling another function would
15509   // probably break this.
15510   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
15511   // should be expanded as new function attributes are introduced.
15512   if (Caller.hasFnAttribute("interrupt"))
15513     return false;
15514 
15515   // Do not tail call opt if the stack is used to pass parameters.
15516   if (CCInfo.getStackSize() != 0)
15517     return false;
15518 
15519   // Do not tail call opt if any parameters need to be passed indirectly.
15520   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
15521   // passed indirectly. So the address of the value will be passed in a
15522   // register, or if not available, then the address is put on the stack. In
15523   // order to pass indirectly, space on the stack often needs to be allocated
15524   // in order to store the value. In this case the CCInfo.getNextStackOffset()
15525   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
15526   // are passed CCValAssign::Indirect.
15527   for (auto &VA : ArgLocs)
15528     if (VA.getLocInfo() == CCValAssign::Indirect)
15529       return false;
15530 
15531   // Do not tail call opt if either caller or callee uses struct return
15532   // semantics.
15533   auto IsCallerStructRet = Caller.hasStructRetAttr();
15534   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
15535   if (IsCallerStructRet || IsCalleeStructRet)
15536     return false;
15537 
15538   // The callee has to preserve all registers the caller needs to preserve.
15539   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
15540   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
15541   if (CalleeCC != CallerCC) {
15542     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
15543     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
15544       return false;
15545   }
15546 
15547   // Byval parameters hand the function a pointer directly into the stack area
15548   // we want to reuse during a tail call. Working around this *is* possible
15549   // but less efficient and uglier in LowerCall.
15550   for (auto &Arg : Outs)
15551     if (Arg.Flags.isByVal())
15552       return false;
15553 
15554   return true;
15555 }
15556 
15557 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
15558   return DAG.getDataLayout().getPrefTypeAlign(
15559       VT.getTypeForEVT(*DAG.getContext()));
15560 }
15561 
15562 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
15563 // and output parameter nodes.
15564 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
15565                                        SmallVectorImpl<SDValue> &InVals) const {
15566   SelectionDAG &DAG = CLI.DAG;
15567   SDLoc &DL = CLI.DL;
15568   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
15569   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
15570   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
15571   SDValue Chain = CLI.Chain;
15572   SDValue Callee = CLI.Callee;
15573   bool &IsTailCall = CLI.IsTailCall;
15574   CallingConv::ID CallConv = CLI.CallConv;
15575   bool IsVarArg = CLI.IsVarArg;
15576   EVT PtrVT = getPointerTy(DAG.getDataLayout());
15577   MVT XLenVT = Subtarget.getXLenVT();
15578 
15579   MachineFunction &MF = DAG.getMachineFunction();
15580 
15581   // Analyze the operands of the call, assigning locations to each operand.
15582   SmallVector<CCValAssign, 16> ArgLocs;
15583   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
15584 
15585   if (CallConv == CallingConv::GHC)
15586     ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
15587   else
15588     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
15589                       CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
15590                                                     : RISCV::CC_RISCV);
15591 
15592   // Check if it's really possible to do a tail call.
15593   if (IsTailCall)
15594     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
15595 
15596   if (IsTailCall)
15597     ++NumTailCalls;
15598   else if (CLI.CB && CLI.CB->isMustTailCall())
15599     report_fatal_error("failed to perform tail call elimination on a call "
15600                        "site marked musttail");
15601 
15602   // Get a count of how many bytes are to be pushed on the stack.
15603   unsigned NumBytes = ArgCCInfo.getStackSize();
15604 
15605   // Create local copies for byval args
15606   SmallVector<SDValue, 8> ByValArgs;
15607   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15608     ISD::ArgFlagsTy Flags = Outs[i].Flags;
15609     if (!Flags.isByVal())
15610       continue;
15611 
15612     SDValue Arg = OutVals[i];
15613     unsigned Size = Flags.getByValSize();
15614     Align Alignment = Flags.getNonZeroByValAlign();
15615 
15616     int FI =
15617         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
15618     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
15619     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
15620 
15621     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
15622                           /*IsVolatile=*/false,
15623                           /*AlwaysInline=*/false, IsTailCall,
15624                           MachinePointerInfo(), MachinePointerInfo());
15625     ByValArgs.push_back(FIPtr);
15626   }
15627 
15628   if (!IsTailCall)
15629     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
15630 
15631   // Copy argument values to their designated locations.
15632   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
15633   SmallVector<SDValue, 8> MemOpChains;
15634   SDValue StackPtr;
15635   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
15636     CCValAssign &VA = ArgLocs[i];
15637     SDValue ArgValue = OutVals[i];
15638     ISD::ArgFlagsTy Flags = Outs[i].Flags;
15639 
15640     // Handle passing f64 on RV32D with a soft float ABI as a special case.
15641     bool IsF64OnRV32DSoftABI =
15642         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
15643     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
15644       SDValue SplitF64 = DAG.getNode(
15645           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
15646       SDValue Lo = SplitF64.getValue(0);
15647       SDValue Hi = SplitF64.getValue(1);
15648 
15649       Register RegLo = VA.getLocReg();
15650       RegsToPass.push_back(std::make_pair(RegLo, Lo));
15651 
15652       if (RegLo == RISCV::X17) {
15653         // Second half of f64 is passed on the stack.
15654         // Work out the address of the stack slot.
15655         if (!StackPtr.getNode())
15656           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15657         // Emit the store.
15658         MemOpChains.push_back(
15659             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
15660       } else {
15661         // Second half of f64 is passed in another GPR.
15662         assert(RegLo < RISCV::X31 && "Invalid register pair");
15663         Register RegHigh = RegLo + 1;
15664         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
15665       }
15666       continue;
15667     }
15668 
15669     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
15670     // as any other MemLoc.
15671 
15672     // Promote the value if needed.
15673     // For now, only handle fully promoted and indirect arguments.
15674     if (VA.getLocInfo() == CCValAssign::Indirect) {
15675       // Store the argument in a stack slot and pass its address.
15676       Align StackAlign =
15677           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
15678                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
15679       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
15680       // If the original argument was split (e.g. i128), we need
15681       // to store the required parts of it here (and pass just one address).
15682       // Vectors may be partly split to registers and partly to the stack, in
15683       // which case the base address is partly offset and subsequent stores are
15684       // relative to that.
15685       unsigned ArgIndex = Outs[i].OrigArgIndex;
15686       unsigned ArgPartOffset = Outs[i].PartOffset;
15687       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
15688       // Calculate the total size to store. We don't have access to what we're
15689       // actually storing other than performing the loop and collecting the
15690       // info.
15691       SmallVector<std::pair<SDValue, SDValue>> Parts;
15692       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
15693         SDValue PartValue = OutVals[i + 1];
15694         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
15695         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
15696         EVT PartVT = PartValue.getValueType();
15697         if (PartVT.isScalableVector())
15698           Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
15699         StoredSize += PartVT.getStoreSize();
15700         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
15701         Parts.push_back(std::make_pair(PartValue, Offset));
15702         ++i;
15703       }
15704       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
15705       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
15706       MemOpChains.push_back(
15707           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
15708                        MachinePointerInfo::getFixedStack(MF, FI)));
15709       for (const auto &Part : Parts) {
15710         SDValue PartValue = Part.first;
15711         SDValue PartOffset = Part.second;
15712         SDValue Address =
15713             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
15714         MemOpChains.push_back(
15715             DAG.getStore(Chain, DL, PartValue, Address,
15716                          MachinePointerInfo::getFixedStack(MF, FI)));
15717       }
15718       ArgValue = SpillSlot;
15719     } else {
15720       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
15721     }
15722 
15723     // Use local copy if it is a byval arg.
15724     if (Flags.isByVal())
15725       ArgValue = ByValArgs[j++];
15726 
15727     if (VA.isRegLoc()) {
15728       // Queue up the argument copies and emit them at the end.
15729       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
15730     } else {
15731       assert(VA.isMemLoc() && "Argument not register or memory");
15732       assert(!IsTailCall && "Tail call not allowed if stack is used "
15733                             "for passing parameters");
15734 
15735       // Work out the address of the stack slot.
15736       if (!StackPtr.getNode())
15737         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
15738       SDValue Address =
15739           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
15740                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
15741 
15742       // Emit the store.
15743       MemOpChains.push_back(
15744           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
15745     }
15746   }
15747 
15748   // Join the stores, which are independent of one another.
15749   if (!MemOpChains.empty())
15750     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
15751 
15752   SDValue Glue;
15753 
15754   // Build a sequence of copy-to-reg nodes, chained and glued together.
15755   for (auto &Reg : RegsToPass) {
15756     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
15757     Glue = Chain.getValue(1);
15758   }
15759 
15760   // Validate that none of the argument registers have been marked as
15761   // reserved, if so report an error. Do the same for the return address if this
15762   // is not a tailcall.
15763   validateCCReservedRegs(RegsToPass, MF);
15764   if (!IsTailCall &&
15765       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
15766     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15767         MF.getFunction(),
15768         "Return address register required, but has been reserved."});
15769 
15770   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
15771   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
15772   // split it and then direct call can be matched by PseudoCALL.
15773   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
15774     const GlobalValue *GV = S->getGlobal();
15775 
15776     unsigned OpFlags = RISCVII::MO_CALL;
15777     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
15778       OpFlags = RISCVII::MO_PLT;
15779 
15780     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
15781   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
15782     unsigned OpFlags = RISCVII::MO_CALL;
15783 
15784     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
15785                                                  nullptr))
15786       OpFlags = RISCVII::MO_PLT;
15787 
15788     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
15789   }
15790 
15791   // The first call operand is the chain and the second is the target address.
15792   SmallVector<SDValue, 8> Ops;
15793   Ops.push_back(Chain);
15794   Ops.push_back(Callee);
15795 
15796   // Add argument registers to the end of the list so that they are
15797   // known live into the call.
15798   for (auto &Reg : RegsToPass)
15799     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
15800 
15801   if (!IsTailCall) {
15802     // Add a register mask operand representing the call-preserved registers.
15803     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
15804     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
15805     assert(Mask && "Missing call preserved mask for calling convention");
15806     Ops.push_back(DAG.getRegisterMask(Mask));
15807   }
15808 
15809   // Glue the call to the argument copies, if any.
15810   if (Glue.getNode())
15811     Ops.push_back(Glue);
15812 
15813   assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
15814          "Unexpected CFI type for a direct call");
15815 
15816   // Emit the call.
15817   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
15818 
15819   if (IsTailCall) {
15820     MF.getFrameInfo().setHasTailCall();
15821     SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
15822     if (CLI.CFIType)
15823       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15824     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
15825     return Ret;
15826   }
15827 
15828   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
15829   if (CLI.CFIType)
15830     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
15831   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
15832   Glue = Chain.getValue(1);
15833 
15834   // Mark the end of the call, which is glued to the call itself.
15835   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
15836   Glue = Chain.getValue(1);
15837 
15838   // Assign locations to each value returned by this call.
15839   SmallVector<CCValAssign, 16> RVLocs;
15840   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
15841   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
15842 
15843   // Copy all of the result registers out of their specified physreg.
15844   for (auto &VA : RVLocs) {
15845     // Copy the value out
15846     SDValue RetValue =
15847         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
15848     // Glue the RetValue to the end of the call sequence
15849     Chain = RetValue.getValue(1);
15850     Glue = RetValue.getValue(2);
15851 
15852     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15853       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
15854       SDValue RetValue2 =
15855           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
15856       Chain = RetValue2.getValue(1);
15857       Glue = RetValue2.getValue(2);
15858       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
15859                              RetValue2);
15860     }
15861 
15862     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
15863 
15864     InVals.push_back(RetValue);
15865   }
15866 
15867   return Chain;
15868 }
15869 
15870 bool RISCVTargetLowering::CanLowerReturn(
15871     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
15872     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
15873   SmallVector<CCValAssign, 16> RVLocs;
15874   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
15875 
15876   std::optional<unsigned> FirstMaskArgument;
15877   if (Subtarget.hasVInstructions())
15878     FirstMaskArgument = preAssignMask(Outs);
15879 
15880   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
15881     MVT VT = Outs[i].VT;
15882     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
15883     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
15884     if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
15885                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
15886                  *this, FirstMaskArgument))
15887       return false;
15888   }
15889   return true;
15890 }
15891 
15892 SDValue
15893 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
15894                                  bool IsVarArg,
15895                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
15896                                  const SmallVectorImpl<SDValue> &OutVals,
15897                                  const SDLoc &DL, SelectionDAG &DAG) const {
15898   MachineFunction &MF = DAG.getMachineFunction();
15899   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15900 
15901   // Stores the assignment of the return value to a location.
15902   SmallVector<CCValAssign, 16> RVLocs;
15903 
15904   // Info about the registers and stack slot.
15905   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
15906                  *DAG.getContext());
15907 
15908   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
15909                     nullptr, RISCV::CC_RISCV);
15910 
15911   if (CallConv == CallingConv::GHC && !RVLocs.empty())
15912     report_fatal_error("GHC functions return void only");
15913 
15914   SDValue Glue;
15915   SmallVector<SDValue, 4> RetOps(1, Chain);
15916 
15917   // Copy the result values into the output registers.
15918   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
15919     SDValue Val = OutVals[i];
15920     CCValAssign &VA = RVLocs[i];
15921     assert(VA.isRegLoc() && "Can only return in registers!");
15922 
15923     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
15924       // Handle returning f64 on RV32D with a soft float ABI.
15925       assert(VA.isRegLoc() && "Expected return via registers");
15926       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
15927                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
15928       SDValue Lo = SplitF64.getValue(0);
15929       SDValue Hi = SplitF64.getValue(1);
15930       Register RegLo = VA.getLocReg();
15931       assert(RegLo < RISCV::X31 && "Invalid register pair");
15932       Register RegHi = RegLo + 1;
15933 
15934       if (STI.isRegisterReservedByUser(RegLo) ||
15935           STI.isRegisterReservedByUser(RegHi))
15936         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15937             MF.getFunction(),
15938             "Return value register required, but has been reserved."});
15939 
15940       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
15941       Glue = Chain.getValue(1);
15942       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
15943       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
15944       Glue = Chain.getValue(1);
15945       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
15946     } else {
15947       // Handle a 'normal' return.
15948       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
15949       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
15950 
15951       if (STI.isRegisterReservedByUser(VA.getLocReg()))
15952         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
15953             MF.getFunction(),
15954             "Return value register required, but has been reserved."});
15955 
15956       // Guarantee that all emitted copies are stuck together.
15957       Glue = Chain.getValue(1);
15958       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
15959     }
15960   }
15961 
15962   RetOps[0] = Chain; // Update chain.
15963 
15964   // Add the glue node if we have it.
15965   if (Glue.getNode()) {
15966     RetOps.push_back(Glue);
15967   }
15968 
15969   if (any_of(RVLocs,
15970              [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
15971     MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
15972 
15973   unsigned RetOpc = RISCVISD::RET_GLUE;
15974   // Interrupt service routines use different return instructions.
15975   const Function &Func = DAG.getMachineFunction().getFunction();
15976   if (Func.hasFnAttribute("interrupt")) {
15977     if (!Func.getReturnType()->isVoidTy())
15978       report_fatal_error(
15979           "Functions with the interrupt attribute must have void return type!");
15980 
15981     MachineFunction &MF = DAG.getMachineFunction();
15982     StringRef Kind =
15983       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
15984 
15985     if (Kind == "supervisor")
15986       RetOpc = RISCVISD::SRET_GLUE;
15987     else
15988       RetOpc = RISCVISD::MRET_GLUE;
15989   }
15990 
15991   return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
15992 }
15993 
15994 void RISCVTargetLowering::validateCCReservedRegs(
15995     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
15996     MachineFunction &MF) const {
15997   const Function &F = MF.getFunction();
15998   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
15999 
16000   if (llvm::any_of(Regs, [&STI](auto Reg) {
16001         return STI.isRegisterReservedByUser(Reg.first);
16002       }))
16003     F.getContext().diagnose(DiagnosticInfoUnsupported{
16004         F, "Argument register required, but has been reserved."});
16005 }
16006 
16007 // Check if the result of the node is only used as a return value, as
16008 // otherwise we can't perform a tail-call.
16009 bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
16010   if (N->getNumValues() != 1)
16011     return false;
16012   if (!N->hasNUsesOfValue(1, 0))
16013     return false;
16014 
16015   SDNode *Copy = *N->use_begin();
16016 
16017   if (Copy->getOpcode() == ISD::BITCAST) {
16018     return isUsedByReturnOnly(Copy, Chain);
16019   }
16020 
16021   // TODO: Handle additional opcodes in order to support tail-calling libcalls
16022   // with soft float ABIs.
16023   if (Copy->getOpcode() != ISD::CopyToReg) {
16024     return false;
16025   }
16026 
16027   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
16028   // isn't safe to perform a tail call.
16029   if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
16030     return false;
16031 
16032   // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
16033   bool HasRet = false;
16034   for (SDNode *Node : Copy->uses()) {
16035     if (Node->getOpcode() != RISCVISD::RET_GLUE)
16036       return false;
16037     HasRet = true;
16038   }
16039   if (!HasRet)
16040     return false;
16041 
16042   Chain = Copy->getOperand(0);
16043   return true;
16044 }
16045 
16046 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16047   return CI->isTailCall();
16048 }
16049 
16050 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
16051 #define NODE_NAME_CASE(NODE)                                                   \
16052   case RISCVISD::NODE:                                                         \
16053     return "RISCVISD::" #NODE;
16054   // clang-format off
16055   switch ((RISCVISD::NodeType)Opcode) {
16056   case RISCVISD::FIRST_NUMBER:
16057     break;
16058   NODE_NAME_CASE(RET_GLUE)
16059   NODE_NAME_CASE(SRET_GLUE)
16060   NODE_NAME_CASE(MRET_GLUE)
16061   NODE_NAME_CASE(CALL)
16062   NODE_NAME_CASE(SELECT_CC)
16063   NODE_NAME_CASE(BR_CC)
16064   NODE_NAME_CASE(BuildPairF64)
16065   NODE_NAME_CASE(SplitF64)
16066   NODE_NAME_CASE(TAIL)
16067   NODE_NAME_CASE(ADD_LO)
16068   NODE_NAME_CASE(HI)
16069   NODE_NAME_CASE(LLA)
16070   NODE_NAME_CASE(LGA)
16071   NODE_NAME_CASE(ADD_TPREL)
16072   NODE_NAME_CASE(LA_TLS_IE)
16073   NODE_NAME_CASE(LA_TLS_GD)
16074   NODE_NAME_CASE(MULHSU)
16075   NODE_NAME_CASE(SLLW)
16076   NODE_NAME_CASE(SRAW)
16077   NODE_NAME_CASE(SRLW)
16078   NODE_NAME_CASE(DIVW)
16079   NODE_NAME_CASE(DIVUW)
16080   NODE_NAME_CASE(REMUW)
16081   NODE_NAME_CASE(ROLW)
16082   NODE_NAME_CASE(RORW)
16083   NODE_NAME_CASE(CLZW)
16084   NODE_NAME_CASE(CTZW)
16085   NODE_NAME_CASE(ABSW)
16086   NODE_NAME_CASE(FMV_H_X)
16087   NODE_NAME_CASE(FMV_X_ANYEXTH)
16088   NODE_NAME_CASE(FMV_X_SIGNEXTH)
16089   NODE_NAME_CASE(FMV_W_X_RV64)
16090   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
16091   NODE_NAME_CASE(FCVT_X)
16092   NODE_NAME_CASE(FCVT_XU)
16093   NODE_NAME_CASE(FCVT_W_RV64)
16094   NODE_NAME_CASE(FCVT_WU_RV64)
16095   NODE_NAME_CASE(STRICT_FCVT_W_RV64)
16096   NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
16097   NODE_NAME_CASE(FP_ROUND_BF16)
16098   NODE_NAME_CASE(FP_EXTEND_BF16)
16099   NODE_NAME_CASE(FROUND)
16100   NODE_NAME_CASE(FPCLASS)
16101   NODE_NAME_CASE(FMAX)
16102   NODE_NAME_CASE(FMIN)
16103   NODE_NAME_CASE(READ_CYCLE_WIDE)
16104   NODE_NAME_CASE(BREV8)
16105   NODE_NAME_CASE(ORC_B)
16106   NODE_NAME_CASE(ZIP)
16107   NODE_NAME_CASE(UNZIP)
16108   NODE_NAME_CASE(CLMUL)
16109   NODE_NAME_CASE(CLMULH)
16110   NODE_NAME_CASE(CLMULR)
16111   NODE_NAME_CASE(SHA256SIG0)
16112   NODE_NAME_CASE(SHA256SIG1)
16113   NODE_NAME_CASE(SHA256SUM0)
16114   NODE_NAME_CASE(SHA256SUM1)
16115   NODE_NAME_CASE(SM4KS)
16116   NODE_NAME_CASE(SM4ED)
16117   NODE_NAME_CASE(SM3P0)
16118   NODE_NAME_CASE(SM3P1)
16119   NODE_NAME_CASE(TH_LWD)
16120   NODE_NAME_CASE(TH_LWUD)
16121   NODE_NAME_CASE(TH_LDD)
16122   NODE_NAME_CASE(TH_SWD)
16123   NODE_NAME_CASE(TH_SDD)
16124   NODE_NAME_CASE(VMV_V_V_VL)
16125   NODE_NAME_CASE(VMV_V_X_VL)
16126   NODE_NAME_CASE(VFMV_V_F_VL)
16127   NODE_NAME_CASE(VMV_X_S)
16128   NODE_NAME_CASE(VMV_S_X_VL)
16129   NODE_NAME_CASE(VFMV_S_F_VL)
16130   NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
16131   NODE_NAME_CASE(READ_VLENB)
16132   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
16133   NODE_NAME_CASE(VSLIDEUP_VL)
16134   NODE_NAME_CASE(VSLIDE1UP_VL)
16135   NODE_NAME_CASE(VSLIDEDOWN_VL)
16136   NODE_NAME_CASE(VSLIDE1DOWN_VL)
16137   NODE_NAME_CASE(VFSLIDE1UP_VL)
16138   NODE_NAME_CASE(VFSLIDE1DOWN_VL)
16139   NODE_NAME_CASE(VID_VL)
16140   NODE_NAME_CASE(VFNCVT_ROD_VL)
16141   NODE_NAME_CASE(VECREDUCE_ADD_VL)
16142   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
16143   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
16144   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
16145   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
16146   NODE_NAME_CASE(VECREDUCE_AND_VL)
16147   NODE_NAME_CASE(VECREDUCE_OR_VL)
16148   NODE_NAME_CASE(VECREDUCE_XOR_VL)
16149   NODE_NAME_CASE(VECREDUCE_FADD_VL)
16150   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
16151   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
16152   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
16153   NODE_NAME_CASE(ADD_VL)
16154   NODE_NAME_CASE(AND_VL)
16155   NODE_NAME_CASE(MUL_VL)
16156   NODE_NAME_CASE(OR_VL)
16157   NODE_NAME_CASE(SDIV_VL)
16158   NODE_NAME_CASE(SHL_VL)
16159   NODE_NAME_CASE(SREM_VL)
16160   NODE_NAME_CASE(SRA_VL)
16161   NODE_NAME_CASE(SRL_VL)
16162   NODE_NAME_CASE(SUB_VL)
16163   NODE_NAME_CASE(UDIV_VL)
16164   NODE_NAME_CASE(UREM_VL)
16165   NODE_NAME_CASE(XOR_VL)
16166   NODE_NAME_CASE(SADDSAT_VL)
16167   NODE_NAME_CASE(UADDSAT_VL)
16168   NODE_NAME_CASE(SSUBSAT_VL)
16169   NODE_NAME_CASE(USUBSAT_VL)
16170   NODE_NAME_CASE(FADD_VL)
16171   NODE_NAME_CASE(FSUB_VL)
16172   NODE_NAME_CASE(FMUL_VL)
16173   NODE_NAME_CASE(FDIV_VL)
16174   NODE_NAME_CASE(FNEG_VL)
16175   NODE_NAME_CASE(FABS_VL)
16176   NODE_NAME_CASE(FSQRT_VL)
16177   NODE_NAME_CASE(FCLASS_VL)
16178   NODE_NAME_CASE(VFMADD_VL)
16179   NODE_NAME_CASE(VFNMADD_VL)
16180   NODE_NAME_CASE(VFMSUB_VL)
16181   NODE_NAME_CASE(VFNMSUB_VL)
16182   NODE_NAME_CASE(VFWMADD_VL)
16183   NODE_NAME_CASE(VFWNMADD_VL)
16184   NODE_NAME_CASE(VFWMSUB_VL)
16185   NODE_NAME_CASE(VFWNMSUB_VL)
16186   NODE_NAME_CASE(FCOPYSIGN_VL)
16187   NODE_NAME_CASE(SMIN_VL)
16188   NODE_NAME_CASE(SMAX_VL)
16189   NODE_NAME_CASE(UMIN_VL)
16190   NODE_NAME_CASE(UMAX_VL)
16191   NODE_NAME_CASE(BITREVERSE_VL)
16192   NODE_NAME_CASE(BSWAP_VL)
16193   NODE_NAME_CASE(CTLZ_VL)
16194   NODE_NAME_CASE(CTTZ_VL)
16195   NODE_NAME_CASE(CTPOP_VL)
16196   NODE_NAME_CASE(FMINNUM_VL)
16197   NODE_NAME_CASE(FMAXNUM_VL)
16198   NODE_NAME_CASE(MULHS_VL)
16199   NODE_NAME_CASE(MULHU_VL)
16200   NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
16201   NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
16202   NODE_NAME_CASE(VFCVT_RM_X_F_VL)
16203   NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
16204   NODE_NAME_CASE(VFCVT_X_F_VL)
16205   NODE_NAME_CASE(VFCVT_XU_F_VL)
16206   NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
16207   NODE_NAME_CASE(SINT_TO_FP_VL)
16208   NODE_NAME_CASE(UINT_TO_FP_VL)
16209   NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
16210   NODE_NAME_CASE(VFCVT_RM_F_X_VL)
16211   NODE_NAME_CASE(FP_EXTEND_VL)
16212   NODE_NAME_CASE(FP_ROUND_VL)
16213   NODE_NAME_CASE(STRICT_FADD_VL)
16214   NODE_NAME_CASE(STRICT_FSUB_VL)
16215   NODE_NAME_CASE(STRICT_FMUL_VL)
16216   NODE_NAME_CASE(STRICT_FDIV_VL)
16217   NODE_NAME_CASE(STRICT_FSQRT_VL)
16218   NODE_NAME_CASE(STRICT_VFMADD_VL)
16219   NODE_NAME_CASE(STRICT_VFNMADD_VL)
16220   NODE_NAME_CASE(STRICT_VFMSUB_VL)
16221   NODE_NAME_CASE(STRICT_VFNMSUB_VL)
16222   NODE_NAME_CASE(STRICT_FP_ROUND_VL)
16223   NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
16224   NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
16225   NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
16226   NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
16227   NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
16228   NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
16229   NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
16230   NODE_NAME_CASE(STRICT_FSETCC_VL)
16231   NODE_NAME_CASE(STRICT_FSETCCS_VL)
16232   NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
16233   NODE_NAME_CASE(VWMUL_VL)
16234   NODE_NAME_CASE(VWMULU_VL)
16235   NODE_NAME_CASE(VWMULSU_VL)
16236   NODE_NAME_CASE(VWADD_VL)
16237   NODE_NAME_CASE(VWADDU_VL)
16238   NODE_NAME_CASE(VWSUB_VL)
16239   NODE_NAME_CASE(VWSUBU_VL)
16240   NODE_NAME_CASE(VWADD_W_VL)
16241   NODE_NAME_CASE(VWADDU_W_VL)
16242   NODE_NAME_CASE(VWSUB_W_VL)
16243   NODE_NAME_CASE(VWSUBU_W_VL)
16244   NODE_NAME_CASE(VFWMUL_VL)
16245   NODE_NAME_CASE(VFWADD_VL)
16246   NODE_NAME_CASE(VFWSUB_VL)
16247   NODE_NAME_CASE(VFWADD_W_VL)
16248   NODE_NAME_CASE(VFWSUB_W_VL)
16249   NODE_NAME_CASE(VWMACC_VL)
16250   NODE_NAME_CASE(VWMACCU_VL)
16251   NODE_NAME_CASE(VWMACCSU_VL)
16252   NODE_NAME_CASE(VNSRL_VL)
16253   NODE_NAME_CASE(SETCC_VL)
16254   NODE_NAME_CASE(VSELECT_VL)
16255   NODE_NAME_CASE(VP_MERGE_VL)
16256   NODE_NAME_CASE(VMAND_VL)
16257   NODE_NAME_CASE(VMOR_VL)
16258   NODE_NAME_CASE(VMXOR_VL)
16259   NODE_NAME_CASE(VMCLR_VL)
16260   NODE_NAME_CASE(VMSET_VL)
16261   NODE_NAME_CASE(VRGATHER_VX_VL)
16262   NODE_NAME_CASE(VRGATHER_VV_VL)
16263   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
16264   NODE_NAME_CASE(VSEXT_VL)
16265   NODE_NAME_CASE(VZEXT_VL)
16266   NODE_NAME_CASE(VCPOP_VL)
16267   NODE_NAME_CASE(VFIRST_VL)
16268   NODE_NAME_CASE(READ_CSR)
16269   NODE_NAME_CASE(WRITE_CSR)
16270   NODE_NAME_CASE(SWAP_CSR)
16271   NODE_NAME_CASE(CZERO_EQZ)
16272   NODE_NAME_CASE(CZERO_NEZ)
16273   }
16274   // clang-format on
16275   return nullptr;
16276 #undef NODE_NAME_CASE
16277 }
16278 
16279 /// getConstraintType - Given a constraint letter, return the type of
16280 /// constraint it is for this target.
16281 RISCVTargetLowering::ConstraintType
16282 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
16283   if (Constraint.size() == 1) {
16284     switch (Constraint[0]) {
16285     default:
16286       break;
16287     case 'f':
16288       return C_RegisterClass;
16289     case 'I':
16290     case 'J':
16291     case 'K':
16292       return C_Immediate;
16293     case 'A':
16294       return C_Memory;
16295     case 'S': // A symbolic address
16296       return C_Other;
16297     }
16298   } else {
16299     if (Constraint == "vr" || Constraint == "vm")
16300       return C_RegisterClass;
16301   }
16302   return TargetLowering::getConstraintType(Constraint);
16303 }
16304 
16305 std::pair<unsigned, const TargetRegisterClass *>
16306 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16307                                                   StringRef Constraint,
16308                                                   MVT VT) const {
16309   // First, see if this is a constraint that directly corresponds to a RISC-V
16310   // register class.
16311   if (Constraint.size() == 1) {
16312     switch (Constraint[0]) {
16313     case 'r':
16314       // TODO: Support fixed vectors up to XLen for P extension?
16315       if (VT.isVector())
16316         break;
16317       return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
16318     case 'f':
16319       if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
16320         return std::make_pair(0U, &RISCV::FPR16RegClass);
16321       if (Subtarget.hasStdExtF() && VT == MVT::f32)
16322         return std::make_pair(0U, &RISCV::FPR32RegClass);
16323       if (Subtarget.hasStdExtD() && VT == MVT::f64)
16324         return std::make_pair(0U, &RISCV::FPR64RegClass);
16325       break;
16326     default:
16327       break;
16328     }
16329   } else if (Constraint == "vr") {
16330     for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
16331                            &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16332       if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
16333         return std::make_pair(0U, RC);
16334     }
16335   } else if (Constraint == "vm") {
16336     if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
16337       return std::make_pair(0U, &RISCV::VMV0RegClass);
16338   }
16339 
16340   // Clang will correctly decode the usage of register name aliases into their
16341   // official names. However, other frontends like `rustc` do not. This allows
16342   // users of these frontends to use the ABI names for registers in LLVM-style
16343   // register constraints.
16344   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
16345                                .Case("{zero}", RISCV::X0)
16346                                .Case("{ra}", RISCV::X1)
16347                                .Case("{sp}", RISCV::X2)
16348                                .Case("{gp}", RISCV::X3)
16349                                .Case("{tp}", RISCV::X4)
16350                                .Case("{t0}", RISCV::X5)
16351                                .Case("{t1}", RISCV::X6)
16352                                .Case("{t2}", RISCV::X7)
16353                                .Cases("{s0}", "{fp}", RISCV::X8)
16354                                .Case("{s1}", RISCV::X9)
16355                                .Case("{a0}", RISCV::X10)
16356                                .Case("{a1}", RISCV::X11)
16357                                .Case("{a2}", RISCV::X12)
16358                                .Case("{a3}", RISCV::X13)
16359                                .Case("{a4}", RISCV::X14)
16360                                .Case("{a5}", RISCV::X15)
16361                                .Case("{a6}", RISCV::X16)
16362                                .Case("{a7}", RISCV::X17)
16363                                .Case("{s2}", RISCV::X18)
16364                                .Case("{s3}", RISCV::X19)
16365                                .Case("{s4}", RISCV::X20)
16366                                .Case("{s5}", RISCV::X21)
16367                                .Case("{s6}", RISCV::X22)
16368                                .Case("{s7}", RISCV::X23)
16369                                .Case("{s8}", RISCV::X24)
16370                                .Case("{s9}", RISCV::X25)
16371                                .Case("{s10}", RISCV::X26)
16372                                .Case("{s11}", RISCV::X27)
16373                                .Case("{t3}", RISCV::X28)
16374                                .Case("{t4}", RISCV::X29)
16375                                .Case("{t5}", RISCV::X30)
16376                                .Case("{t6}", RISCV::X31)
16377                                .Default(RISCV::NoRegister);
16378   if (XRegFromAlias != RISCV::NoRegister)
16379     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
16380 
16381   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
16382   // TableGen record rather than the AsmName to choose registers for InlineAsm
16383   // constraints, plus we want to match those names to the widest floating point
16384   // register type available, manually select floating point registers here.
16385   //
16386   // The second case is the ABI name of the register, so that frontends can also
16387   // use the ABI names in register constraint lists.
16388   if (Subtarget.hasStdExtF()) {
16389     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
16390                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
16391                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
16392                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
16393                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
16394                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
16395                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
16396                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
16397                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
16398                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
16399                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
16400                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
16401                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
16402                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
16403                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
16404                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
16405                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
16406                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
16407                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
16408                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
16409                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
16410                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
16411                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
16412                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
16413                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
16414                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
16415                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
16416                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
16417                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
16418                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
16419                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
16420                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
16421                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
16422                         .Default(RISCV::NoRegister);
16423     if (FReg != RISCV::NoRegister) {
16424       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
16425       if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
16426         unsigned RegNo = FReg - RISCV::F0_F;
16427         unsigned DReg = RISCV::F0_D + RegNo;
16428         return std::make_pair(DReg, &RISCV::FPR64RegClass);
16429       }
16430       if (VT == MVT::f32 || VT == MVT::Other)
16431         return std::make_pair(FReg, &RISCV::FPR32RegClass);
16432       if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16) {
16433         unsigned RegNo = FReg - RISCV::F0_F;
16434         unsigned HReg = RISCV::F0_H + RegNo;
16435         return std::make_pair(HReg, &RISCV::FPR16RegClass);
16436       }
16437     }
16438   }
16439 
16440   if (Subtarget.hasVInstructions()) {
16441     Register VReg = StringSwitch<Register>(Constraint.lower())
16442                         .Case("{v0}", RISCV::V0)
16443                         .Case("{v1}", RISCV::V1)
16444                         .Case("{v2}", RISCV::V2)
16445                         .Case("{v3}", RISCV::V3)
16446                         .Case("{v4}", RISCV::V4)
16447                         .Case("{v5}", RISCV::V5)
16448                         .Case("{v6}", RISCV::V6)
16449                         .Case("{v7}", RISCV::V7)
16450                         .Case("{v8}", RISCV::V8)
16451                         .Case("{v9}", RISCV::V9)
16452                         .Case("{v10}", RISCV::V10)
16453                         .Case("{v11}", RISCV::V11)
16454                         .Case("{v12}", RISCV::V12)
16455                         .Case("{v13}", RISCV::V13)
16456                         .Case("{v14}", RISCV::V14)
16457                         .Case("{v15}", RISCV::V15)
16458                         .Case("{v16}", RISCV::V16)
16459                         .Case("{v17}", RISCV::V17)
16460                         .Case("{v18}", RISCV::V18)
16461                         .Case("{v19}", RISCV::V19)
16462                         .Case("{v20}", RISCV::V20)
16463                         .Case("{v21}", RISCV::V21)
16464                         .Case("{v22}", RISCV::V22)
16465                         .Case("{v23}", RISCV::V23)
16466                         .Case("{v24}", RISCV::V24)
16467                         .Case("{v25}", RISCV::V25)
16468                         .Case("{v26}", RISCV::V26)
16469                         .Case("{v27}", RISCV::V27)
16470                         .Case("{v28}", RISCV::V28)
16471                         .Case("{v29}", RISCV::V29)
16472                         .Case("{v30}", RISCV::V30)
16473                         .Case("{v31}", RISCV::V31)
16474                         .Default(RISCV::NoRegister);
16475     if (VReg != RISCV::NoRegister) {
16476       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
16477         return std::make_pair(VReg, &RISCV::VMRegClass);
16478       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
16479         return std::make_pair(VReg, &RISCV::VRRegClass);
16480       for (const auto *RC :
16481            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
16482         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
16483           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
16484           return std::make_pair(VReg, RC);
16485         }
16486       }
16487     }
16488   }
16489 
16490   std::pair<Register, const TargetRegisterClass *> Res =
16491       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16492 
16493   // If we picked one of the Zfinx register classes, remap it to the GPR class.
16494   // FIXME: When Zfinx is supported in CodeGen this will need to take the
16495   // Subtarget into account.
16496   if (Res.second == &RISCV::GPRF16RegClass ||
16497       Res.second == &RISCV::GPRF32RegClass ||
16498       Res.second == &RISCV::GPRPF64RegClass)
16499     return std::make_pair(Res.first, &RISCV::GPRRegClass);
16500 
16501   return Res;
16502 }
16503 
16504 unsigned
16505 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
16506   // Currently only support length 1 constraints.
16507   if (ConstraintCode.size() == 1) {
16508     switch (ConstraintCode[0]) {
16509     case 'A':
16510       return InlineAsm::Constraint_A;
16511     default:
16512       break;
16513     }
16514   }
16515 
16516   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
16517 }
16518 
16519 void RISCVTargetLowering::LowerAsmOperandForConstraint(
16520     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
16521     SelectionDAG &DAG) const {
16522   // Currently only support length 1 constraints.
16523   if (Constraint.length() == 1) {
16524     switch (Constraint[0]) {
16525     case 'I':
16526       // Validate & create a 12-bit signed immediate operand.
16527       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16528         uint64_t CVal = C->getSExtValue();
16529         if (isInt<12>(CVal))
16530           Ops.push_back(
16531               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16532       }
16533       return;
16534     case 'J':
16535       // Validate & create an integer zero operand.
16536       if (isNullConstant(Op))
16537         Ops.push_back(
16538             DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
16539       return;
16540     case 'K':
16541       // Validate & create a 5-bit unsigned immediate operand.
16542       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
16543         uint64_t CVal = C->getZExtValue();
16544         if (isUInt<5>(CVal))
16545           Ops.push_back(
16546               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
16547       }
16548       return;
16549     case 'S':
16550       if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
16551         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
16552                                                  GA->getValueType(0)));
16553       } else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
16554         Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
16555                                                 BA->getValueType(0)));
16556       }
16557       return;
16558     default:
16559       break;
16560     }
16561   }
16562   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16563 }
16564 
16565 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
16566                                                    Instruction *Inst,
16567                                                    AtomicOrdering Ord) const {
16568   if (Subtarget.hasStdExtZtso()) {
16569     if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
16570       return Builder.CreateFence(Ord);
16571     return nullptr;
16572   }
16573 
16574   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
16575     return Builder.CreateFence(Ord);
16576   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
16577     return Builder.CreateFence(AtomicOrdering::Release);
16578   return nullptr;
16579 }
16580 
16581 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
16582                                                     Instruction *Inst,
16583                                                     AtomicOrdering Ord) const {
16584   if (Subtarget.hasStdExtZtso())
16585     return nullptr;
16586 
16587   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
16588     return Builder.CreateFence(AtomicOrdering::Acquire);
16589   if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
16590       Ord == AtomicOrdering::SequentiallyConsistent)
16591     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
16592   return nullptr;
16593 }
16594 
16595 TargetLowering::AtomicExpansionKind
16596 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
16597   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
16598   // point operations can't be used in an lr/sc sequence without breaking the
16599   // forward-progress guarantee.
16600   if (AI->isFloatingPointOperation() ||
16601       AI->getOperation() == AtomicRMWInst::UIncWrap ||
16602       AI->getOperation() == AtomicRMWInst::UDecWrap)
16603     return AtomicExpansionKind::CmpXChg;
16604 
16605   // Don't expand forced atomics, we want to have __sync libcalls instead.
16606   if (Subtarget.hasForcedAtomics())
16607     return AtomicExpansionKind::None;
16608 
16609   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
16610   if (Size == 8 || Size == 16)
16611     return AtomicExpansionKind::MaskedIntrinsic;
16612   return AtomicExpansionKind::None;
16613 }
16614 
16615 static Intrinsic::ID
16616 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
16617   if (XLen == 32) {
16618     switch (BinOp) {
16619     default:
16620       llvm_unreachable("Unexpected AtomicRMW BinOp");
16621     case AtomicRMWInst::Xchg:
16622       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
16623     case AtomicRMWInst::Add:
16624       return Intrinsic::riscv_masked_atomicrmw_add_i32;
16625     case AtomicRMWInst::Sub:
16626       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
16627     case AtomicRMWInst::Nand:
16628       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
16629     case AtomicRMWInst::Max:
16630       return Intrinsic::riscv_masked_atomicrmw_max_i32;
16631     case AtomicRMWInst::Min:
16632       return Intrinsic::riscv_masked_atomicrmw_min_i32;
16633     case AtomicRMWInst::UMax:
16634       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
16635     case AtomicRMWInst::UMin:
16636       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
16637     }
16638   }
16639 
16640   if (XLen == 64) {
16641     switch (BinOp) {
16642     default:
16643       llvm_unreachable("Unexpected AtomicRMW BinOp");
16644     case AtomicRMWInst::Xchg:
16645       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
16646     case AtomicRMWInst::Add:
16647       return Intrinsic::riscv_masked_atomicrmw_add_i64;
16648     case AtomicRMWInst::Sub:
16649       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
16650     case AtomicRMWInst::Nand:
16651       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
16652     case AtomicRMWInst::Max:
16653       return Intrinsic::riscv_masked_atomicrmw_max_i64;
16654     case AtomicRMWInst::Min:
16655       return Intrinsic::riscv_masked_atomicrmw_min_i64;
16656     case AtomicRMWInst::UMax:
16657       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
16658     case AtomicRMWInst::UMin:
16659       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
16660     }
16661   }
16662 
16663   llvm_unreachable("Unexpected XLen\n");
16664 }
16665 
16666 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
16667     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
16668     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
16669   unsigned XLen = Subtarget.getXLen();
16670   Value *Ordering =
16671       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
16672   Type *Tys[] = {AlignedAddr->getType()};
16673   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
16674       AI->getModule(),
16675       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
16676 
16677   if (XLen == 64) {
16678     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
16679     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16680     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
16681   }
16682 
16683   Value *Result;
16684 
16685   // Must pass the shift amount needed to sign extend the loaded value prior
16686   // to performing a signed comparison for min/max. ShiftAmt is the number of
16687   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
16688   // is the number of bits to left+right shift the value in order to
16689   // sign-extend.
16690   if (AI->getOperation() == AtomicRMWInst::Min ||
16691       AI->getOperation() == AtomicRMWInst::Max) {
16692     const DataLayout &DL = AI->getModule()->getDataLayout();
16693     unsigned ValWidth =
16694         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
16695     Value *SextShamt =
16696         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
16697     Result = Builder.CreateCall(LrwOpScwLoop,
16698                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
16699   } else {
16700     Result =
16701         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
16702   }
16703 
16704   if (XLen == 64)
16705     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16706   return Result;
16707 }
16708 
16709 TargetLowering::AtomicExpansionKind
16710 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
16711     AtomicCmpXchgInst *CI) const {
16712   // Don't expand forced atomics, we want to have __sync libcalls instead.
16713   if (Subtarget.hasForcedAtomics())
16714     return AtomicExpansionKind::None;
16715 
16716   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
16717   if (Size == 8 || Size == 16)
16718     return AtomicExpansionKind::MaskedIntrinsic;
16719   return AtomicExpansionKind::None;
16720 }
16721 
16722 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
16723     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
16724     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
16725   unsigned XLen = Subtarget.getXLen();
16726   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
16727   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
16728   if (XLen == 64) {
16729     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
16730     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
16731     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
16732     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
16733   }
16734   Type *Tys[] = {AlignedAddr->getType()};
16735   Function *MaskedCmpXchg =
16736       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
16737   Value *Result = Builder.CreateCall(
16738       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
16739   if (XLen == 64)
16740     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
16741   return Result;
16742 }
16743 
16744 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
16745                                                         EVT DataVT) const {
16746   return false;
16747 }
16748 
16749 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
16750                                                EVT VT) const {
16751   if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
16752     return false;
16753 
16754   switch (FPVT.getSimpleVT().SimpleTy) {
16755   case MVT::f16:
16756     return Subtarget.hasStdExtZfhOrZfhmin();
16757   case MVT::f32:
16758     return Subtarget.hasStdExtF();
16759   case MVT::f64:
16760     return Subtarget.hasStdExtD();
16761   default:
16762     return false;
16763   }
16764 }
16765 
16766 unsigned RISCVTargetLowering::getJumpTableEncoding() const {
16767   // If we are using the small code model, we can reduce size of jump table
16768   // entry to 4 bytes.
16769   if (Subtarget.is64Bit() && !isPositionIndependent() &&
16770       getTargetMachine().getCodeModel() == CodeModel::Small) {
16771     return MachineJumpTableInfo::EK_Custom32;
16772   }
16773   return TargetLowering::getJumpTableEncoding();
16774 }
16775 
16776 const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
16777     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
16778     unsigned uid, MCContext &Ctx) const {
16779   assert(Subtarget.is64Bit() && !isPositionIndependent() &&
16780          getTargetMachine().getCodeModel() == CodeModel::Small);
16781   return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
16782 }
16783 
16784 bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
16785   // We define vscale to be VLEN/RVVBitsPerBlock.  VLEN is always a power
16786   // of two >= 64, and RVVBitsPerBlock is 64.  Thus, vscale must be
16787   // a power of two as well.
16788   // FIXME: This doesn't work for zve32, but that's already broken
16789   // elsewhere for the same reason.
16790   assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
16791   static_assert(RISCV::RVVBitsPerBlock == 64,
16792                 "RVVBitsPerBlock changed, audit needed");
16793   return true;
16794 }
16795 
16796 bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
16797                                                  SDValue &Offset,
16798                                                  ISD::MemIndexedMode &AM,
16799                                                  bool &IsInc,
16800                                                  SelectionDAG &DAG) const {
16801   // Target does not support indexed loads.
16802   if (!Subtarget.hasVendorXTHeadMemIdx())
16803     return false;
16804 
16805   if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
16806     return false;
16807 
16808   Base = Op->getOperand(0);
16809   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
16810     int64_t RHSC = RHS->getSExtValue();
16811     if (Op->getOpcode() == ISD::SUB)
16812       RHSC = -(uint64_t)RHSC;
16813 
16814     // The constants that can be encoded in the THeadMemIdx instructions
16815     // are of the form (sign_extend(imm5) << imm2).
16816     bool isLegalIndexedOffset = false;
16817     for (unsigned i = 0; i < 4; i++)
16818       if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
16819         isLegalIndexedOffset = true;
16820         break;
16821       }
16822 
16823     if (!isLegalIndexedOffset)
16824       return false;
16825 
16826     IsInc = (Op->getOpcode() == ISD::ADD);
16827     Offset = Op->getOperand(1);
16828     return true;
16829   }
16830 
16831   return false;
16832 }
16833 
16834 bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
16835                                                     SDValue &Offset,
16836                                                     ISD::MemIndexedMode &AM,
16837                                                     SelectionDAG &DAG) const {
16838   EVT VT;
16839   SDValue Ptr;
16840   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16841     VT = LD->getMemoryVT();
16842     Ptr = LD->getBasePtr();
16843   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16844     VT = ST->getMemoryVT();
16845     Ptr = ST->getBasePtr();
16846   } else
16847     return false;
16848 
16849   bool IsInc;
16850   if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
16851     return false;
16852 
16853   AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
16854   return true;
16855 }
16856 
16857 bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
16858                                                      SDValue &Base,
16859                                                      SDValue &Offset,
16860                                                      ISD::MemIndexedMode &AM,
16861                                                      SelectionDAG &DAG) const {
16862   EVT VT;
16863   SDValue Ptr;
16864   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16865     VT = LD->getMemoryVT();
16866     Ptr = LD->getBasePtr();
16867   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16868     VT = ST->getMemoryVT();
16869     Ptr = ST->getBasePtr();
16870   } else
16871     return false;
16872 
16873   bool IsInc;
16874   if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
16875     return false;
16876   // Post-indexing updates the base, so it's not a valid transform
16877   // if that's not the same as the load's pointer.
16878   if (Ptr != Base)
16879     return false;
16880 
16881   AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
16882   return true;
16883 }
16884 
16885 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
16886                                                      EVT VT) const {
16887   EVT SVT = VT.getScalarType();
16888 
16889   if (!SVT.isSimple())
16890     return false;
16891 
16892   switch (SVT.getSimpleVT().SimpleTy) {
16893   case MVT::f16:
16894     return VT.isVector() ? Subtarget.hasVInstructionsF16()
16895                          : Subtarget.hasStdExtZfhOrZhinx();
16896   case MVT::f32:
16897     return Subtarget.hasStdExtFOrZfinx();
16898   case MVT::f64:
16899     return Subtarget.hasStdExtDOrZdinx();
16900   default:
16901     break;
16902   }
16903 
16904   return false;
16905 }
16906 
16907 Register RISCVTargetLowering::getExceptionPointerRegister(
16908     const Constant *PersonalityFn) const {
16909   return RISCV::X10;
16910 }
16911 
16912 Register RISCVTargetLowering::getExceptionSelectorRegister(
16913     const Constant *PersonalityFn) const {
16914   return RISCV::X11;
16915 }
16916 
16917 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
16918   // Return false to suppress the unnecessary extensions if the LibCall
16919   // arguments or return value is a float narrower than XLEN on a soft FP ABI.
16920   if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
16921                                   Type.getSizeInBits() < Subtarget.getXLen()))
16922     return false;
16923 
16924   return true;
16925 }
16926 
16927 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
16928   if (Subtarget.is64Bit() && Type == MVT::i32)
16929     return true;
16930 
16931   return IsSigned;
16932 }
16933 
16934 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
16935                                                  SDValue C) const {
16936   // Check integral scalar types.
16937   const bool HasExtMOrZmmul =
16938       Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
16939   if (!VT.isScalarInteger())
16940     return false;
16941 
16942   // Omit the optimization if the sub target has the M extension and the data
16943   // size exceeds XLen.
16944   if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
16945     return false;
16946 
16947   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16948     // Break the MUL to a SLLI and an ADD/SUB.
16949     const APInt &Imm = ConstNode->getAPIntValue();
16950     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
16951         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
16952       return true;
16953 
16954     // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
16955     if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
16956         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
16957          (Imm - 8).isPowerOf2()))
16958       return true;
16959 
16960     // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
16961     // a pair of LUI/ADDI.
16962     if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
16963         ConstNode->hasOneUse()) {
16964       APInt ImmS = Imm.ashr(Imm.countr_zero());
16965       if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
16966           (1 - ImmS).isPowerOf2())
16967         return true;
16968     }
16969   }
16970 
16971   return false;
16972 }
16973 
16974 bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
16975                                                       SDValue ConstNode) const {
16976   // Let the DAGCombiner decide for vectors.
16977   EVT VT = AddNode.getValueType();
16978   if (VT.isVector())
16979     return true;
16980 
16981   // Let the DAGCombiner decide for larger types.
16982   if (VT.getScalarSizeInBits() > Subtarget.getXLen())
16983     return true;
16984 
16985   // It is worse if c1 is simm12 while c1*c2 is not.
16986   ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
16987   ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
16988   const APInt &C1 = C1Node->getAPIntValue();
16989   const APInt &C2 = C2Node->getAPIntValue();
16990   if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
16991     return false;
16992 
16993   // Default to true and let the DAGCombiner decide.
16994   return true;
16995 }
16996 
16997 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
16998     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
16999     unsigned *Fast) const {
17000   if (!VT.isVector()) {
17001     if (Fast)
17002       *Fast = Subtarget.enableUnalignedScalarMem();
17003     return Subtarget.enableUnalignedScalarMem();
17004   }
17005 
17006   // All vector implementations must support element alignment
17007   EVT ElemVT = VT.getVectorElementType();
17008   if (Alignment >= ElemVT.getStoreSize()) {
17009     if (Fast)
17010       *Fast = 1;
17011     return true;
17012   }
17013 
17014   // Note: We lower an unmasked unaligned vector access to an equally sized
17015   // e8 element type access.  Given this, we effectively support all unmasked
17016   // misaligned accesses.  TODO: Work through the codegen implications of
17017   // allowing such accesses to be formed, and considered fast.
17018   if (Fast)
17019     *Fast = Subtarget.enableUnalignedVectorMem();
17020   return Subtarget.enableUnalignedVectorMem();
17021 }
17022 
17023 bool RISCVTargetLowering::splitValueIntoRegisterParts(
17024     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17025     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
17026   bool IsABIRegCopy = CC.has_value();
17027   EVT ValueVT = Val.getValueType();
17028   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17029       PartVT == MVT::f32) {
17030     // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
17031     // nan, and cast to f32.
17032     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
17033     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
17034     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
17035                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
17036     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
17037     Parts[0] = Val;
17038     return true;
17039   }
17040 
17041   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17042     LLVMContext &Context = *DAG.getContext();
17043     EVT ValueEltVT = ValueVT.getVectorElementType();
17044     EVT PartEltVT = PartVT.getVectorElementType();
17045     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17046     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17047     if (PartVTBitSize % ValueVTBitSize == 0) {
17048       assert(PartVTBitSize >= ValueVTBitSize);
17049       // If the element types are different, bitcast to the same element type of
17050       // PartVT first.
17051       // Give an example here, we want copy a <vscale x 1 x i8> value to
17052       // <vscale x 4 x i16>.
17053       // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
17054       // subvector, then we can bitcast to <vscale x 4 x i16>.
17055       if (ValueEltVT != PartEltVT) {
17056         if (PartVTBitSize > ValueVTBitSize) {
17057           unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17058           assert(Count != 0 && "The number of element should not be zero.");
17059           EVT SameEltTypeVT =
17060               EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17061           Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
17062                             DAG.getUNDEF(SameEltTypeVT), Val,
17063                             DAG.getVectorIdxConstant(0, DL));
17064         }
17065         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
17066       } else {
17067         Val =
17068             DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
17069                         Val, DAG.getVectorIdxConstant(0, DL));
17070       }
17071       Parts[0] = Val;
17072       return true;
17073     }
17074   }
17075   return false;
17076 }
17077 
17078 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
17079     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
17080     MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
17081   bool IsABIRegCopy = CC.has_value();
17082   if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
17083       PartVT == MVT::f32) {
17084     SDValue Val = Parts[0];
17085 
17086     // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
17087     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
17088     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
17089     Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
17090     return Val;
17091   }
17092 
17093   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
17094     LLVMContext &Context = *DAG.getContext();
17095     SDValue Val = Parts[0];
17096     EVT ValueEltVT = ValueVT.getVectorElementType();
17097     EVT PartEltVT = PartVT.getVectorElementType();
17098     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
17099     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
17100     if (PartVTBitSize % ValueVTBitSize == 0) {
17101       assert(PartVTBitSize >= ValueVTBitSize);
17102       EVT SameEltTypeVT = ValueVT;
17103       // If the element types are different, convert it to the same element type
17104       // of PartVT.
17105       // Give an example here, we want copy a <vscale x 1 x i8> value from
17106       // <vscale x 4 x i16>.
17107       // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
17108       // then we can extract <vscale x 1 x i8>.
17109       if (ValueEltVT != PartEltVT) {
17110         unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
17111         assert(Count != 0 && "The number of element should not be zero.");
17112         SameEltTypeVT =
17113             EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
17114         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
17115       }
17116       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
17117                         DAG.getVectorIdxConstant(0, DL));
17118       return Val;
17119     }
17120   }
17121   return SDValue();
17122 }
17123 
17124 bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
17125   // When aggressively optimizing for code size, we prefer to use a div
17126   // instruction, as it is usually smaller than the alternative sequence.
17127   // TODO: Add vector division?
17128   bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
17129   return OptSize && !VT.isVector();
17130 }
17131 
17132 bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
17133   // Scalarize zero_ext and sign_ext might stop match to widening instruction in
17134   // some situation.
17135   unsigned Opc = N->getOpcode();
17136   if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
17137     return false;
17138   return true;
17139 }
17140 
17141 static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
17142   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17143   Function *ThreadPointerFunc =
17144       Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17145   return IRB.CreatePointerCast(
17146       IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
17147                              IRB.CreateCall(ThreadPointerFunc), Offset),
17148       IRB.getInt8PtrTy()->getPointerTo(0));
17149 }
17150 
17151 Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
17152   // Fuchsia provides a fixed TLS slot for the stack cookie.
17153   // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17154   if (Subtarget.isTargetFuchsia())
17155     return useTpOffset(IRB, -0x10);
17156 
17157   return TargetLowering::getIRStackGuard(IRB);
17158 }
17159 
17160 bool RISCVTargetLowering::isLegalInterleavedAccessType(
17161     VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
17162     const DataLayout &DL) const {
17163   EVT VT = getValueType(DL, VTy);
17164   // Don't lower vlseg/vsseg for vector types that can't be split.
17165   if (!isTypeLegal(VT))
17166     return false;
17167 
17168   if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
17169       !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
17170                                       Alignment))
17171     return false;
17172 
17173   MVT ContainerVT = VT.getSimpleVT();
17174 
17175   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17176     if (!Subtarget.useRVVForFixedLengthVectors())
17177       return false;
17178     // Sometimes the interleaved access pass picks up splats as interleaves of
17179     // one element. Don't lower these.
17180     if (FVTy->getNumElements() < 2)
17181       return false;
17182 
17183     ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
17184   }
17185 
17186   // Need to make sure that EMUL * NFIELDS ≤ 8
17187   auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
17188   if (Fractional)
17189     return true;
17190   return Factor * LMUL <= 8;
17191 }
17192 
17193 bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
17194                                                   Align Alignment) const {
17195   if (!Subtarget.hasVInstructions())
17196     return false;
17197 
17198   // Only support fixed vectors if we know the minimum vector size.
17199   if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
17200     return false;
17201 
17202   EVT ScalarType = DataType.getScalarType();
17203   if (!isLegalElementTypeForRVV(ScalarType))
17204     return false;
17205 
17206   if (!Subtarget.enableUnalignedVectorMem() &&
17207       Alignment < ScalarType.getStoreSize())
17208     return false;
17209 
17210   return true;
17211 }
17212 
17213 static const Intrinsic::ID FixedVlsegIntrIds[] = {
17214     Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
17215     Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
17216     Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
17217     Intrinsic::riscv_seg8_load};
17218 
17219 /// Lower an interleaved load into a vlsegN intrinsic.
17220 ///
17221 /// E.g. Lower an interleaved load (Factor = 2):
17222 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
17223 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
17224 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
17225 ///
17226 /// Into:
17227 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
17228 ///                                        %ptr, i64 4)
17229 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
17230 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
17231 bool RISCVTargetLowering::lowerInterleavedLoad(
17232     LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
17233     ArrayRef<unsigned> Indices, unsigned Factor) const {
17234   IRBuilder<> Builder(LI);
17235 
17236   auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
17237   if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
17238                                     LI->getPointerAddressSpace(),
17239                                     LI->getModule()->getDataLayout()))
17240     return false;
17241 
17242   auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17243 
17244   Function *VlsegNFunc =
17245       Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
17246                                 {VTy, LI->getPointerOperandType(), XLenTy});
17247 
17248   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17249 
17250   CallInst *VlsegN =
17251       Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
17252 
17253   for (unsigned i = 0; i < Shuffles.size(); i++) {
17254     Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
17255     Shuffles[i]->replaceAllUsesWith(SubVec);
17256   }
17257 
17258   return true;
17259 }
17260 
17261 static const Intrinsic::ID FixedVssegIntrIds[] = {
17262     Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
17263     Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
17264     Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
17265     Intrinsic::riscv_seg8_store};
17266 
17267 /// Lower an interleaved store into a vssegN intrinsic.
17268 ///
17269 /// E.g. Lower an interleaved store (Factor = 3):
17270 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
17271 ///                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
17272 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
17273 ///
17274 /// Into:
17275 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
17276 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
17277 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
17278 /// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
17279 ///                                              %ptr, i32 4)
17280 ///
17281 /// Note that the new shufflevectors will be removed and we'll only generate one
17282 /// vsseg3 instruction in CodeGen.
17283 bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
17284                                                 ShuffleVectorInst *SVI,
17285                                                 unsigned Factor) const {
17286   IRBuilder<> Builder(SI);
17287   auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
17288   // Given SVI : <n*factor x ty>, then VTy : <n x ty>
17289   auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
17290                                    ShuffleVTy->getNumElements() / Factor);
17291   if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
17292                                     SI->getPointerAddressSpace(),
17293                                     SI->getModule()->getDataLayout()))
17294     return false;
17295 
17296   auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17297 
17298   Function *VssegNFunc =
17299       Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
17300                                 {VTy, SI->getPointerOperandType(), XLenTy});
17301 
17302   auto Mask = SVI->getShuffleMask();
17303   SmallVector<Value *, 10> Ops;
17304 
17305   for (unsigned i = 0; i < Factor; i++) {
17306     Value *Shuffle = Builder.CreateShuffleVector(
17307         SVI->getOperand(0), SVI->getOperand(1),
17308         createSequentialMask(Mask[i], VTy->getNumElements(), 0));
17309     Ops.push_back(Shuffle);
17310   }
17311   // This VL should be OK (should be executable in one vsseg instruction,
17312   // potentially under larger LMULs) because we checked that the fixed vector
17313   // type fits in isLegalInterleavedAccessType
17314   Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
17315   Ops.append({SI->getPointerOperand(), VL});
17316 
17317   Builder.CreateCall(VssegNFunc, Ops);
17318 
17319   return true;
17320 }
17321 
17322 bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
17323                                                            LoadInst *LI) const {
17324   assert(LI->isSimple());
17325   IRBuilder<> Builder(LI);
17326 
17327   // Only deinterleave2 supported at present.
17328   if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
17329     return false;
17330 
17331   unsigned Factor = 2;
17332 
17333   VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
17334   VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
17335 
17336   if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
17337                                     LI->getPointerAddressSpace(),
17338                                     LI->getModule()->getDataLayout()))
17339     return false;
17340 
17341   Function *VlsegNFunc;
17342   Value *VL;
17343   Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
17344   SmallVector<Value *, 10> Ops;
17345 
17346   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17347     VlsegNFunc = Intrinsic::getDeclaration(
17348         LI->getModule(), FixedVlsegIntrIds[Factor - 2],
17349         {ResVTy, LI->getPointerOperandType(), XLenTy});
17350     VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17351   } else {
17352     static const Intrinsic::ID IntrIds[] = {
17353         Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
17354         Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
17355         Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
17356         Intrinsic::riscv_vlseg8};
17357 
17358     VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
17359                                            {ResVTy, XLenTy});
17360     VL = Constant::getAllOnesValue(XLenTy);
17361     Ops.append(Factor, PoisonValue::get(ResVTy));
17362   }
17363 
17364   Ops.append({LI->getPointerOperand(), VL});
17365 
17366   Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
17367   DI->replaceAllUsesWith(Vlseg);
17368 
17369   return true;
17370 }
17371 
17372 bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
17373                                                           StoreInst *SI) const {
17374   assert(SI->isSimple());
17375   IRBuilder<> Builder(SI);
17376 
17377   // Only interleave2 supported at present.
17378   if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
17379     return false;
17380 
17381   unsigned Factor = 2;
17382 
17383   VectorType *VTy = cast<VectorType>(II->getType());
17384   VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
17385 
17386   if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
17387                                     SI->getPointerAddressSpace(),
17388                                     SI->getModule()->getDataLayout()))
17389     return false;
17390 
17391   Function *VssegNFunc;
17392   Value *VL;
17393   Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
17394 
17395   if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
17396     VssegNFunc = Intrinsic::getDeclaration(
17397         SI->getModule(), FixedVssegIntrIds[Factor - 2],
17398         {InVTy, SI->getPointerOperandType(), XLenTy});
17399     VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
17400   } else {
17401     static const Intrinsic::ID IntrIds[] = {
17402         Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
17403         Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
17404         Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
17405         Intrinsic::riscv_vsseg8};
17406 
17407     VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
17408                                            {InVTy, XLenTy});
17409     VL = Constant::getAllOnesValue(XLenTy);
17410   }
17411 
17412   Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
17413                                   SI->getPointerOperand(), VL});
17414 
17415   return true;
17416 }
17417 
17418 MachineInstr *
17419 RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
17420                                    MachineBasicBlock::instr_iterator &MBBI,
17421                                    const TargetInstrInfo *TII) const {
17422   assert(MBBI->isCall() && MBBI->getCFIType() &&
17423          "Invalid call instruction for a KCFI check");
17424   assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
17425                       MBBI->getOpcode()));
17426 
17427   MachineOperand &Target = MBBI->getOperand(0);
17428   Target.setIsRenamable(false);
17429 
17430   return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
17431       .addReg(Target.getReg())
17432       .addImm(MBBI->getCFIType())
17433       .getInstr();
17434 }
17435 
17436 #define GET_REGISTER_MATCHER
17437 #include "RISCVGenAsmMatcher.inc"
17438 
17439 Register
17440 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
17441                                        const MachineFunction &MF) const {
17442   Register Reg = MatchRegisterAltName(RegName);
17443   if (Reg == RISCV::NoRegister)
17444     Reg = MatchRegisterName(RegName);
17445   if (Reg == RISCV::NoRegister)
17446     report_fatal_error(
17447         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
17448   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
17449   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
17450     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
17451                              StringRef(RegName) + "\"."));
17452   return Reg;
17453 }
17454 
17455 MachineMemOperand::Flags
17456 RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
17457   const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
17458 
17459   if (NontemporalInfo == nullptr)
17460     return MachineMemOperand::MONone;
17461 
17462   // 1 for default value work as __RISCV_NTLH_ALL
17463   // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
17464   // 3 -> __RISCV_NTLH_ALL_PRIVATE
17465   // 4 -> __RISCV_NTLH_INNERMOST_SHARED
17466   // 5 -> __RISCV_NTLH_ALL
17467   int NontemporalLevel = 5;
17468   const MDNode *RISCVNontemporalInfo =
17469       I.getMetadata("riscv-nontemporal-domain");
17470   if (RISCVNontemporalInfo != nullptr)
17471     NontemporalLevel =
17472         cast<ConstantInt>(
17473             cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
17474                 ->getValue())
17475             ->getZExtValue();
17476 
17477   assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
17478          "RISC-V target doesn't support this non-temporal domain.");
17479 
17480   NontemporalLevel -= 2;
17481   MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
17482   if (NontemporalLevel & 0b1)
17483     Flags |= MONontemporalBit0;
17484   if (NontemporalLevel & 0b10)
17485     Flags |= MONontemporalBit1;
17486 
17487   return Flags;
17488 }
17489 
17490 MachineMemOperand::Flags
17491 RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
17492 
17493   MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
17494   MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
17495   TargetFlags |= (NodeFlags & MONontemporalBit0);
17496   TargetFlags |= (NodeFlags & MONontemporalBit1);
17497 
17498   return TargetFlags;
17499 }
17500 
17501 bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
17502     const MemSDNode &NodeX, const MemSDNode &NodeY) const {
17503   return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
17504 }
17505 
17506 namespace llvm::RISCVVIntrinsicsTable {
17507 
17508 #define GET_RISCVVIntrinsicsTable_IMPL
17509 #include "RISCVGenSearchableTables.inc"
17510 
17511 } // namespace llvm::RISCVVIntrinsicsTable
17512