xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISCV target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVMCTargetDesc.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCVISelLowering.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/IR/IntrinsicsRISCV.h"
20 #include "llvm/Support/Alignment.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/KnownBits.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 
30 namespace llvm {
31 namespace RISCV {
32 #define GET_RISCVVSSEGTable_IMPL
33 #define GET_RISCVVLSEGTable_IMPL
34 #define GET_RISCVVLXSEGTable_IMPL
35 #define GET_RISCVVSXSEGTable_IMPL
36 #define GET_RISCVVLETable_IMPL
37 #define GET_RISCVVSETable_IMPL
38 #define GET_RISCVVLXTable_IMPL
39 #define GET_RISCVVSXTable_IMPL
40 #include "RISCVGenSearchableTables.inc"
41 } // namespace RISCV
42 } // namespace llvm
43 
44 void RISCVDAGToDAGISel::PreprocessISelDAG() {
45   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
46                                        E = CurDAG->allnodes_end();
47        I != E;) {
48     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
49 
50     // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
51     // load. Done after lowering and combining so that we have a chance to
52     // optimize this to VMV_V_X_VL when the upper bits aren't needed.
53     if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
54       continue;
55 
56     assert(N->getNumOperands() == 3 && "Unexpected number of operands");
57     MVT VT = N->getSimpleValueType(0);
58     SDValue Lo = N->getOperand(0);
59     SDValue Hi = N->getOperand(1);
60     SDValue VL = N->getOperand(2);
61     assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
62            Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
63            "Unexpected VTs!");
64     MachineFunction &MF = CurDAG->getMachineFunction();
65     RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
66     SDLoc DL(N);
67 
68     // We use the same frame index we use for moving two i32s into 64-bit FPR.
69     // This is an analogous operation.
70     int FI = FuncInfo->getMoveF64FrameIndex(MF);
71     MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
72     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
73     SDValue StackSlot =
74         CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
75 
76     SDValue Chain = CurDAG->getEntryNode();
77     Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
78 
79     SDValue OffsetSlot =
80         CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
81     Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
82                           Align(8));
83 
84     Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
85 
86     SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
87     SDValue IntID =
88         CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
89     SDValue Ops[] = {Chain,
90                      IntID,
91                      CurDAG->getUNDEF(VT),
92                      StackSlot,
93                      CurDAG->getRegister(RISCV::X0, MVT::i64),
94                      VL};
95 
96     SDValue Result = CurDAG->getMemIntrinsicNode(
97         ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
98         MachineMemOperand::MOLoad);
99 
100     // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the
101     // vlse we created.  This will cause general havok on the dag because
102     // anything below the conversion could be folded into other existing nodes.
103     // To avoid invalidating 'I', back it up to the convert node.
104     --I;
105     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
106 
107     // Now that we did that, the node is dead.  Increment the iterator to the
108     // next node to process, then delete N.
109     ++I;
110     CurDAG->DeleteNode(N);
111   }
112 }
113 
114 void RISCVDAGToDAGISel::PostprocessISelDAG() {
115   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
116 
117   bool MadeChange = false;
118   while (Position != CurDAG->allnodes_begin()) {
119     SDNode *N = &*--Position;
120     // Skip dead nodes and any non-machine opcodes.
121     if (N->use_empty() || !N->isMachineOpcode())
122       continue;
123 
124     MadeChange |= doPeepholeSExtW(N);
125     MadeChange |= doPeepholeLoadStoreADDI(N);
126   }
127 
128   if (MadeChange)
129     CurDAG->RemoveDeadNodes();
130 }
131 
132 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
133                                          const MVT VT, int64_t Imm,
134                                          const RISCVSubtarget &Subtarget) {
135   assert(VT == MVT::i64 && "Expecting MVT::i64");
136   const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
137   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
138       ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
139   SDValue Addr = TLI->getAddr(CP, *CurDAG);
140   SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
141   // Since there is no data race, the chain can be the entry node.
142   SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
143                                         CurDAG->getEntryNode());
144   MachineFunction &MF = CurDAG->getMachineFunction();
145   MachineMemOperand *MemOp = MF.getMachineMemOperand(
146       MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
147       LLT(VT), CP->getAlign());
148   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
149   return Load;
150 }
151 
152 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
153                          int64_t Imm, const RISCVSubtarget &Subtarget) {
154   MVT XLenVT = Subtarget.getXLenVT();
155   RISCVMatInt::InstSeq Seq =
156       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
157 
158   // If Imm is expensive to build, then we put it into constant pool.
159   if (Subtarget.useConstantPoolForLargeInts() &&
160       Seq.size() > Subtarget.getMaxBuildIntsCost())
161     return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
162 
163   SDNode *Result = nullptr;
164   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
165   for (RISCVMatInt::Inst &Inst : Seq) {
166     SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
167     if (Inst.Opc == RISCV::LUI)
168       Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
169     else if (Inst.Opc == RISCV::ADD_UW)
170       Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
171                                       CurDAG->getRegister(RISCV::X0, XLenVT));
172     else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
173              Inst.Opc == RISCV::SH3ADD)
174       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
175     else
176       Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
177 
178     // Only the first instruction has X0 as its source.
179     SrcReg = SDValue(Result, 0);
180   }
181 
182   return Result;
183 }
184 
185 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
186                                unsigned RegClassID, unsigned SubReg0) {
187   assert(Regs.size() >= 2 && Regs.size() <= 8);
188 
189   SDLoc DL(Regs[0]);
190   SmallVector<SDValue, 8> Ops;
191 
192   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
193 
194   for (unsigned I = 0; I < Regs.size(); ++I) {
195     Ops.push_back(Regs[I]);
196     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
197   }
198   SDNode *N =
199       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
200   return SDValue(N, 0);
201 }
202 
203 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
204                              unsigned NF) {
205   static const unsigned RegClassIDs[] = {
206       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
207       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
208       RISCV::VRN8M1RegClassID};
209 
210   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
211 }
212 
213 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
214                              unsigned NF) {
215   static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
216                                          RISCV::VRN3M2RegClassID,
217                                          RISCV::VRN4M2RegClassID};
218 
219   return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
220 }
221 
222 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
223                              unsigned NF) {
224   return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
225                          RISCV::sub_vrm4_0);
226 }
227 
228 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
229                            unsigned NF, RISCVII::VLMUL LMUL) {
230   switch (LMUL) {
231   default:
232     llvm_unreachable("Invalid LMUL.");
233   case RISCVII::VLMUL::LMUL_F8:
234   case RISCVII::VLMUL::LMUL_F4:
235   case RISCVII::VLMUL::LMUL_F2:
236   case RISCVII::VLMUL::LMUL_1:
237     return createM1Tuple(CurDAG, Regs, NF);
238   case RISCVII::VLMUL::LMUL_2:
239     return createM2Tuple(CurDAG, Regs, NF);
240   case RISCVII::VLMUL::LMUL_4:
241     return createM4Tuple(CurDAG, Regs, NF);
242   }
243 }
244 
245 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
246     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248     bool IsLoad, MVT *IndexVT) {
249   SDValue Chain = Node->getOperand(0);
250   SDValue Glue;
251 
252   SDValue Base;
253   SelectBaseAddr(Node->getOperand(CurOp++), Base);
254   Operands.push_back(Base); // Base pointer.
255 
256   if (IsStridedOrIndexed) {
257     Operands.push_back(Node->getOperand(CurOp++)); // Index.
258     if (IndexVT)
259       *IndexVT = Operands.back()->getSimpleValueType(0);
260   }
261 
262   if (IsMasked) {
263     // Mask needs to be copied to V0.
264     SDValue Mask = Node->getOperand(CurOp++);
265     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
266     Glue = Chain.getValue(1);
267     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
268   }
269   SDValue VL;
270   selectVLOp(Node->getOperand(CurOp++), VL);
271   Operands.push_back(VL);
272 
273   MVT XLenVT = Subtarget->getXLenVT();
274   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
275   Operands.push_back(SEWOp);
276 
277   // Masked load has the tail policy argument.
278   if (IsMasked && IsLoad) {
279     // Policy must be a constant.
280     uint64_t Policy = Node->getConstantOperandVal(CurOp++);
281     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
282     Operands.push_back(PolicyOp);
283   }
284 
285   Operands.push_back(Chain); // Chain.
286   if (Glue)
287     Operands.push_back(Glue);
288 }
289 
290 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
291                                     bool IsStrided) {
292   SDLoc DL(Node);
293   unsigned NF = Node->getNumValues() - 1;
294   MVT VT = Node->getSimpleValueType(0);
295   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
296   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
297 
298   unsigned CurOp = 2;
299   SmallVector<SDValue, 8> Operands;
300   if (IsMasked) {
301     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
302                                  Node->op_begin() + CurOp + NF);
303     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
304     Operands.push_back(MaskedOff);
305     CurOp += NF;
306   }
307 
308   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
309                              Operands, /*IsLoad=*/true);
310 
311   const RISCV::VLSEGPseudo *P =
312       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
313                             static_cast<unsigned>(LMUL));
314   MachineSDNode *Load =
315       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
316 
317   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
318     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
319 
320   SDValue SuperReg = SDValue(Load, 0);
321   for (unsigned I = 0; I < NF; ++I) {
322     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
323     ReplaceUses(SDValue(Node, I),
324                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
325   }
326 
327   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
328   CurDAG->RemoveDeadNode(Node);
329 }
330 
331 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
332   SDLoc DL(Node);
333   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
334   MVT VT = Node->getSimpleValueType(0);
335   MVT XLenVT = Subtarget->getXLenVT();
336   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
337   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
338 
339   unsigned CurOp = 2;
340   SmallVector<SDValue, 7> Operands;
341   if (IsMasked) {
342     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
343                                  Node->op_begin() + CurOp + NF);
344     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
345     Operands.push_back(MaskedOff);
346     CurOp += NF;
347   }
348 
349   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
350                              /*IsStridedOrIndexed*/ false, Operands,
351                              /*IsLoad=*/true);
352 
353   const RISCV::VLSEGPseudo *P =
354       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
355                             Log2SEW, static_cast<unsigned>(LMUL));
356   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
357                                                MVT::Other, MVT::Glue, Operands);
358   SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
359                                           /*Glue*/ SDValue(Load, 2));
360 
361   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
362     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
363 
364   SDValue SuperReg = SDValue(Load, 0);
365   for (unsigned I = 0; I < NF; ++I) {
366     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
367     ReplaceUses(SDValue(Node, I),
368                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
369   }
370 
371   ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0));   // VL
372   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain
373   CurDAG->RemoveDeadNode(Node);
374 }
375 
376 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
377                                      bool IsOrdered) {
378   SDLoc DL(Node);
379   unsigned NF = Node->getNumValues() - 1;
380   MVT VT = Node->getSimpleValueType(0);
381   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
382   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
383 
384   unsigned CurOp = 2;
385   SmallVector<SDValue, 8> Operands;
386   if (IsMasked) {
387     SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
388                                  Node->op_begin() + CurOp + NF);
389     SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
390     Operands.push_back(MaskedOff);
391     CurOp += NF;
392   }
393 
394   MVT IndexVT;
395   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
396                              /*IsStridedOrIndexed*/ true, Operands,
397                              /*IsLoad=*/true, &IndexVT);
398 
399   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
400          "Element count mismatch");
401 
402   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
403   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
404   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
405     report_fatal_error("The V extension does not support EEW=64 for index "
406                        "values when XLEN=32");
407   }
408   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
409       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
410       static_cast<unsigned>(IndexLMUL));
411   MachineSDNode *Load =
412       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
413 
414   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
415     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
416 
417   SDValue SuperReg = SDValue(Load, 0);
418   for (unsigned I = 0; I < NF; ++I) {
419     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
420     ReplaceUses(SDValue(Node, I),
421                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
422   }
423 
424   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
425   CurDAG->RemoveDeadNode(Node);
426 }
427 
428 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
429                                     bool IsStrided) {
430   SDLoc DL(Node);
431   unsigned NF = Node->getNumOperands() - 4;
432   if (IsStrided)
433     NF--;
434   if (IsMasked)
435     NF--;
436   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
437   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
438   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
439   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
440   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
441 
442   SmallVector<SDValue, 8> Operands;
443   Operands.push_back(StoreVal);
444   unsigned CurOp = 2 + NF;
445 
446   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
447                              Operands);
448 
449   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
450       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
451   MachineSDNode *Store =
452       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
453 
454   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
455     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
456 
457   ReplaceNode(Node, Store);
458 }
459 
460 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
461                                      bool IsOrdered) {
462   SDLoc DL(Node);
463   unsigned NF = Node->getNumOperands() - 5;
464   if (IsMasked)
465     --NF;
466   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
467   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
468   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
469   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
470   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
471 
472   SmallVector<SDValue, 8> Operands;
473   Operands.push_back(StoreVal);
474   unsigned CurOp = 2 + NF;
475 
476   MVT IndexVT;
477   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
478                              /*IsStridedOrIndexed*/ true, Operands,
479                              /*IsLoad=*/false, &IndexVT);
480 
481   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
482          "Element count mismatch");
483 
484   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
485   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
486   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
487     report_fatal_error("The V extension does not support EEW=64 for index "
488                        "values when XLEN=32");
489   }
490   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
491       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
492       static_cast<unsigned>(IndexLMUL));
493   MachineSDNode *Store =
494       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
495 
496   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
497     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
498 
499   ReplaceNode(Node, Store);
500 }
501 
502 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
503   if (!Subtarget->hasVInstructions())
504     return;
505 
506   assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
507           Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
508          "Unexpected opcode");
509 
510   SDLoc DL(Node);
511   MVT XLenVT = Subtarget->getXLenVT();
512 
513   bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
514   unsigned IntNoOffset = HasChain ? 1 : 0;
515   unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
516 
517   assert((IntNo == Intrinsic::riscv_vsetvli ||
518           IntNo == Intrinsic::riscv_vsetvlimax ||
519           IntNo == Intrinsic::riscv_vsetvli_opt ||
520           IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
521          "Unexpected vsetvli intrinsic");
522 
523   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
524                IntNo == Intrinsic::riscv_vsetvlimax_opt;
525   unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
526 
527   assert(Node->getNumOperands() == Offset + 2 &&
528          "Unexpected number of operands");
529 
530   unsigned SEW =
531       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
532   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
533       Node->getConstantOperandVal(Offset + 1) & 0x7);
534 
535   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
536                                             /*MaskAgnostic*/ false);
537   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
538 
539   SmallVector<EVT, 2> VTs = {XLenVT};
540   if (HasChain)
541     VTs.push_back(MVT::Other);
542 
543   SDValue VLOperand;
544   unsigned Opcode = RISCV::PseudoVSETVLI;
545   if (VLMax) {
546     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
547     Opcode = RISCV::PseudoVSETVLIX0;
548   } else {
549     VLOperand = Node->getOperand(IntNoOffset + 1);
550 
551     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
552       uint64_t AVL = C->getZExtValue();
553       if (isUInt<5>(AVL)) {
554         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
555         SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
556         if (HasChain)
557           Ops.push_back(Node->getOperand(0));
558         ReplaceNode(
559             Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
560         return;
561       }
562     }
563   }
564 
565   SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
566   if (HasChain)
567     Ops.push_back(Node->getOperand(0));
568 
569   ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
570 }
571 
572 void RISCVDAGToDAGISel::Select(SDNode *Node) {
573   // If we have a custom node, we have already selected.
574   if (Node->isMachineOpcode()) {
575     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
576     Node->setNodeId(-1);
577     return;
578   }
579 
580   // Instruction Selection not handled by the auto-generated tablegen selection
581   // should be handled here.
582   unsigned Opcode = Node->getOpcode();
583   MVT XLenVT = Subtarget->getXLenVT();
584   SDLoc DL(Node);
585   MVT VT = Node->getSimpleValueType(0);
586 
587   switch (Opcode) {
588   case ISD::Constant: {
589     auto *ConstNode = cast<ConstantSDNode>(Node);
590     if (VT == XLenVT && ConstNode->isZero()) {
591       SDValue New =
592           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
593       ReplaceNode(Node, New.getNode());
594       return;
595     }
596     int64_t Imm = ConstNode->getSExtValue();
597     // If the upper XLen-16 bits are not used, try to convert this to a simm12
598     // by sign extending bit 15.
599     if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
600         hasAllHUsers(Node))
601       Imm = SignExtend64(Imm, 16);
602     // If the upper 32-bits are not used try to convert this into a simm32 by
603     // sign extending bit 32.
604     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
605       Imm = SignExtend64(Imm, 32);
606 
607     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
608     return;
609   }
610   case ISD::FrameIndex: {
611     SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
612     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
613     SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
614     ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
615     return;
616   }
617   case ISD::SRL: {
618     // Optimize (srl (and X, C2), C) ->
619     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
620     // Where C2 is a mask with C3 trailing ones.
621     // Taking into account that the C2 may have had lower bits unset by
622     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
623     // This pattern occurs when type legalizing right shifts for types with
624     // less than XLen bits.
625     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
626     if (!N1C)
627       break;
628     SDValue N0 = Node->getOperand(0);
629     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
630         !isa<ConstantSDNode>(N0.getOperand(1)))
631       break;
632     unsigned ShAmt = N1C->getZExtValue();
633     uint64_t Mask = N0.getConstantOperandVal(1);
634     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
635     if (!isMask_64(Mask))
636       break;
637     unsigned TrailingOnes = countTrailingOnes(Mask);
638     // 32 trailing ones should use srliw via tablegen pattern.
639     if (TrailingOnes == 32 || ShAmt >= TrailingOnes)
640       break;
641     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
642     SDNode *SLLI =
643         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
644                                CurDAG->getTargetConstant(LShAmt, DL, VT));
645     SDNode *SRLI = CurDAG->getMachineNode(
646         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
647         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
648     ReplaceNode(Node, SRLI);
649     return;
650   }
651   case ISD::SRA: {
652     // Optimize (sra (sext_inreg X, i16), C) ->
653     //          (srai (slli X, (XLen-16), (XLen-16) + C)
654     // And      (sra (sext_inreg X, i8), C) ->
655     //          (srai (slli X, (XLen-8), (XLen-8) + C)
656     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
657     // This transform matches the code we get without Zbb. The shifts are more
658     // compressible, and this can help expose CSE opportunities in the sdiv by
659     // constant optimization.
660     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
661     if (!N1C)
662       break;
663     SDValue N0 = Node->getOperand(0);
664     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
665       break;
666     unsigned ShAmt = N1C->getZExtValue();
667     unsigned ExtSize =
668         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
669     // ExtSize of 32 should use sraiw via tablegen pattern.
670     if (ExtSize >= 32 || ShAmt >= ExtSize)
671       break;
672     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
673     SDNode *SLLI =
674         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
675                                CurDAG->getTargetConstant(LShAmt, DL, VT));
676     SDNode *SRAI = CurDAG->getMachineNode(
677         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
678         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
679     ReplaceNode(Node, SRAI);
680     return;
681   }
682   case ISD::AND: {
683     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
684     if (!N1C)
685       break;
686 
687     SDValue N0 = Node->getOperand(0);
688 
689     bool LeftShift = N0.getOpcode() == ISD::SHL;
690     if (!LeftShift && N0.getOpcode() != ISD::SRL)
691       break;
692 
693     auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
694     if (!C)
695       break;
696     uint64_t C2 = C->getZExtValue();
697     unsigned XLen = Subtarget->getXLen();
698     if (!C2 || C2 >= XLen)
699       break;
700 
701     uint64_t C1 = N1C->getZExtValue();
702 
703     // Keep track of whether this is a andi, zext.h, or zext.w.
704     bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());
705     if (C1 == UINT64_C(0xFFFF) &&
706         (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))
707       ZExtOrANDI = true;
708     if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())
709       ZExtOrANDI = true;
710 
711     // Clear irrelevant bits in the mask.
712     if (LeftShift)
713       C1 &= maskTrailingZeros<uint64_t>(C2);
714     else
715       C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
716 
717     // Some transforms should only be done if the shift has a single use or
718     // the AND would become (srli (slli X, 32), 32)
719     bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
720 
721     SDValue X = N0.getOperand(0);
722 
723     // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
724     // with c3 leading zeros.
725     if (!LeftShift && isMask_64(C1)) {
726       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
727       if (C2 < C3) {
728         // If the number of leading zeros is C2+32 this can be SRLIW.
729         if (C2 + 32 == C3) {
730           SDNode *SRLIW =
731               CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
732                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
733           ReplaceNode(Node, SRLIW);
734           return;
735         }
736 
737         // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
738         // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
739         //
740         // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
741         // legalized and goes through DAG combine.
742         SDValue Y;
743         if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
744             selectSExti32(X, Y)) {
745           SDNode *SRAIW =
746               CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,
747                                      CurDAG->getTargetConstant(31, DL, XLenVT));
748           SDNode *SRLIW = CurDAG->getMachineNode(
749               RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
750               CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
751           ReplaceNode(Node, SRLIW);
752           return;
753         }
754 
755         // (srli (slli x, c3-c2), c3).
756         if (OneUseOrZExtW && !ZExtOrANDI) {
757           SDNode *SLLI = CurDAG->getMachineNode(
758               RISCV::SLLI, DL, XLenVT, X,
759               CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
760           SDNode *SRLI =
761               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
762                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
763           ReplaceNode(Node, SRLI);
764           return;
765         }
766       }
767     }
768 
769     // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
770     // shifted by c2 bits with c3 leading zeros.
771     if (LeftShift && isShiftedMask_64(C1)) {
772       uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
773 
774       if (C2 + C3 < XLen &&
775           C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
776         // Use slli.uw when possible.
777         if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
778           SDNode *SLLI_UW =
779               CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
780                                      CurDAG->getTargetConstant(C2, DL, XLenVT));
781           ReplaceNode(Node, SLLI_UW);
782           return;
783         }
784 
785         // (srli (slli c2+c3), c3)
786         if (OneUseOrZExtW && !ZExtOrANDI) {
787           SDNode *SLLI = CurDAG->getMachineNode(
788               RISCV::SLLI, DL, XLenVT, X,
789               CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
790           SDNode *SRLI =
791               CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
792                                      CurDAG->getTargetConstant(C3, DL, XLenVT));
793           ReplaceNode(Node, SRLI);
794           return;
795         }
796       }
797     }
798 
799     // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
800     // shifted mask with c2 leading zeros and c3 trailing zeros.
801     if (!LeftShift && isShiftedMask_64(C1)) {
802       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
803       uint64_t C3 = countTrailingZeros(C1);
804       if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {
805         SDNode *SRLI = CurDAG->getMachineNode(
806             RISCV::SRLI, DL, XLenVT, X,
807             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
808         SDNode *SLLI =
809             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
810                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
811         ReplaceNode(Node, SLLI);
812         return;
813       }
814       // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
815       if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
816           OneUseOrZExtW && !ZExtOrANDI) {
817         SDNode *SRLIW = CurDAG->getMachineNode(
818             RISCV::SRLIW, DL, XLenVT, X,
819             CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
820         SDNode *SLLI =
821             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
822                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
823         ReplaceNode(Node, SLLI);
824         return;
825       }
826     }
827 
828     // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
829     // shifted mask with no leading zeros and c3 trailing zeros.
830     if (LeftShift && isShiftedMask_64(C1)) {
831       uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
832       uint64_t C3 = countTrailingZeros(C1);
833       if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {
834         SDNode *SRLI = CurDAG->getMachineNode(
835             RISCV::SRLI, DL, XLenVT, X,
836             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
837         SDNode *SLLI =
838             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
839                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
840         ReplaceNode(Node, SLLI);
841         return;
842       }
843       // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
844       if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
845         SDNode *SRLIW = CurDAG->getMachineNode(
846             RISCV::SRLIW, DL, XLenVT, X,
847             CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
848         SDNode *SLLI =
849             CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
850                                    CurDAG->getTargetConstant(C3, DL, XLenVT));
851         ReplaceNode(Node, SLLI);
852         return;
853       }
854     }
855 
856     break;
857   }
858   case ISD::MUL: {
859     // Special case for calculating (mul (and X, C2), C1) where the full product
860     // fits in XLen bits. We can shift X left by the number of leading zeros in
861     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
862     // product has XLen trailing zeros, putting it in the output of MULHU. This
863     // can avoid materializing a constant in a register for C2.
864 
865     // RHS should be a constant.
866     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
867     if (!N1C || !N1C->hasOneUse())
868       break;
869 
870     // LHS should be an AND with constant.
871     SDValue N0 = Node->getOperand(0);
872     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
873       break;
874 
875     uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
876 
877     // Constant should be a mask.
878     if (!isMask_64(C2))
879       break;
880 
881     // This should be the only use of the AND unless we will use
882     // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND
883     // constants.
884     if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF))
885       break;
886 
887     // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this
888     // optimization.
889     if (isInt<12>(C2) ||
890         (C2 == UINT64_C(0xFFFF) &&
891          (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) ||
892         (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()))
893       break;
894 
895     // We need to shift left the AND input and C1 by a total of XLen bits.
896 
897     // How far left do we need to shift the AND input?
898     unsigned XLen = Subtarget->getXLen();
899     unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2));
900 
901     // The constant gets shifted by the remaining amount unless that would
902     // shift bits out.
903     uint64_t C1 = N1C->getZExtValue();
904     unsigned ConstantShift = XLen - LeadingZeros;
905     if (ConstantShift > (XLen - (64 - countLeadingZeros(C1))))
906       break;
907 
908     uint64_t ShiftedC1 = C1 << ConstantShift;
909     // If this RV32, we need to sign extend the constant.
910     if (XLen == 32)
911       ShiftedC1 = SignExtend64(ShiftedC1, 32);
912 
913     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
914     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
915     SDNode *SLLI =
916         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
917                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
918     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
919                                            SDValue(SLLI, 0), SDValue(Imm, 0));
920     ReplaceNode(Node, MULHU);
921     return;
922   }
923   case ISD::INTRINSIC_WO_CHAIN: {
924     unsigned IntNo = Node->getConstantOperandVal(0);
925     switch (IntNo) {
926       // By default we do not custom select any intrinsic.
927     default:
928       break;
929     case Intrinsic::riscv_vmsgeu:
930     case Intrinsic::riscv_vmsge: {
931       SDValue Src1 = Node->getOperand(1);
932       SDValue Src2 = Node->getOperand(2);
933       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
934       bool IsCmpUnsignedZero = false;
935       // Only custom select scalar second operand.
936       if (Src2.getValueType() != XLenVT)
937         break;
938       // Small constants are handled with patterns.
939       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
940         int64_t CVal = C->getSExtValue();
941         if (CVal >= -15 && CVal <= 16) {
942           if (!IsUnsigned || CVal != 0)
943             break;
944           IsCmpUnsignedZero = true;
945         }
946       }
947       MVT Src1VT = Src1.getSimpleValueType();
948       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
949       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
950       default:
951         llvm_unreachable("Unexpected LMUL!");
952 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
953   case RISCVII::VLMUL::lmulenum:                                               \
954     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
955                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
956     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
957     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
958     break;
959         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
960         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
961         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
962         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
963         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
964         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
965         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
966 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
967       }
968       SDValue SEW = CurDAG->getTargetConstant(
969           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
970       SDValue VL;
971       selectVLOp(Node->getOperand(3), VL);
972 
973       // If vmsgeu with 0 immediate, expand it to vmset.
974       if (IsCmpUnsignedZero) {
975         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
976         return;
977       }
978 
979       // Expand to
980       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
981       SDValue Cmp = SDValue(
982           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
983           0);
984       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
985                                                {Cmp, Cmp, VL, SEW}));
986       return;
987     }
988     case Intrinsic::riscv_vmsgeu_mask:
989     case Intrinsic::riscv_vmsge_mask: {
990       SDValue Src1 = Node->getOperand(2);
991       SDValue Src2 = Node->getOperand(3);
992       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
993       bool IsCmpUnsignedZero = false;
994       // Only custom select scalar second operand.
995       if (Src2.getValueType() != XLenVT)
996         break;
997       // Small constants are handled with patterns.
998       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
999         int64_t CVal = C->getSExtValue();
1000         if (CVal >= -15 && CVal <= 16) {
1001           if (!IsUnsigned || CVal != 0)
1002             break;
1003           IsCmpUnsignedZero = true;
1004         }
1005       }
1006       MVT Src1VT = Src1.getSimpleValueType();
1007       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1008           VMSetOpcode, VMANDOpcode;
1009       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1010       default:
1011         llvm_unreachable("Unexpected LMUL!");
1012 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b)                   \
1013   case RISCVII::VLMUL::lmulenum:                                               \
1014     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1015                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1016     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1017                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1018     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1019     break;
1020         CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)
1021         CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)
1022         CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)
1023         CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)
1024         CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)
1025         CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)
1026         CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)
1027 #undef CASE_VMSLT_VMSET_OPCODES
1028       }
1029       // Mask operations use the LMUL from the mask type.
1030       switch (RISCVTargetLowering::getLMUL(VT)) {
1031       default:
1032         llvm_unreachable("Unexpected LMUL!");
1033 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix)                       \
1034   case RISCVII::VLMUL::lmulenum:                                               \
1035     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1036     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1037     VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix;                              \
1038     break;
1039         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)
1040         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)
1041         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)
1042         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)
1043         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)
1044         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)
1045         CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)
1046 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES
1047       }
1048       SDValue SEW = CurDAG->getTargetConstant(
1049           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1050       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1051       SDValue VL;
1052       selectVLOp(Node->getOperand(5), VL);
1053       SDValue MaskedOff = Node->getOperand(1);
1054       SDValue Mask = Node->getOperand(4);
1055 
1056       // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.
1057       if (IsCmpUnsignedZero) {
1058         SDValue VMSet =
1059             SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);
1060         ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,
1061                                                  {Mask, VMSet, VL, MaskSEW}));
1062         return;
1063       }
1064 
1065       // If the MaskedOff value and the Mask are the same value use
1066       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1067       // This avoids needing to copy v0 to vd before starting the next sequence.
1068       if (Mask == MaskedOff) {
1069         SDValue Cmp = SDValue(
1070             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1071             0);
1072         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1073                                                  {Mask, Cmp, VL, MaskSEW}));
1074         return;
1075       }
1076 
1077       // Mask needs to be copied to V0.
1078       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1079                                            RISCV::V0, Mask, SDValue());
1080       SDValue Glue = Chain.getValue(1);
1081       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1082 
1083       // Otherwise use
1084       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1085       SDValue Cmp = SDValue(
1086           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1087                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1088           0);
1089       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1090                                                {Cmp, Mask, VL, MaskSEW}));
1091       return;
1092     }
1093     case Intrinsic::riscv_vsetvli_opt:
1094     case Intrinsic::riscv_vsetvlimax_opt:
1095       return selectVSETVLI(Node);
1096     }
1097     break;
1098   }
1099   case ISD::INTRINSIC_W_CHAIN: {
1100     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1101     switch (IntNo) {
1102       // By default we do not custom select any intrinsic.
1103     default:
1104       break;
1105     case Intrinsic::riscv_vsetvli:
1106     case Intrinsic::riscv_vsetvlimax:
1107       return selectVSETVLI(Node);
1108     case Intrinsic::riscv_vlseg2:
1109     case Intrinsic::riscv_vlseg3:
1110     case Intrinsic::riscv_vlseg4:
1111     case Intrinsic::riscv_vlseg5:
1112     case Intrinsic::riscv_vlseg6:
1113     case Intrinsic::riscv_vlseg7:
1114     case Intrinsic::riscv_vlseg8: {
1115       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1116       return;
1117     }
1118     case Intrinsic::riscv_vlseg2_mask:
1119     case Intrinsic::riscv_vlseg3_mask:
1120     case Intrinsic::riscv_vlseg4_mask:
1121     case Intrinsic::riscv_vlseg5_mask:
1122     case Intrinsic::riscv_vlseg6_mask:
1123     case Intrinsic::riscv_vlseg7_mask:
1124     case Intrinsic::riscv_vlseg8_mask: {
1125       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1126       return;
1127     }
1128     case Intrinsic::riscv_vlsseg2:
1129     case Intrinsic::riscv_vlsseg3:
1130     case Intrinsic::riscv_vlsseg4:
1131     case Intrinsic::riscv_vlsseg5:
1132     case Intrinsic::riscv_vlsseg6:
1133     case Intrinsic::riscv_vlsseg7:
1134     case Intrinsic::riscv_vlsseg8: {
1135       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1136       return;
1137     }
1138     case Intrinsic::riscv_vlsseg2_mask:
1139     case Intrinsic::riscv_vlsseg3_mask:
1140     case Intrinsic::riscv_vlsseg4_mask:
1141     case Intrinsic::riscv_vlsseg5_mask:
1142     case Intrinsic::riscv_vlsseg6_mask:
1143     case Intrinsic::riscv_vlsseg7_mask:
1144     case Intrinsic::riscv_vlsseg8_mask: {
1145       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1146       return;
1147     }
1148     case Intrinsic::riscv_vloxseg2:
1149     case Intrinsic::riscv_vloxseg3:
1150     case Intrinsic::riscv_vloxseg4:
1151     case Intrinsic::riscv_vloxseg5:
1152     case Intrinsic::riscv_vloxseg6:
1153     case Intrinsic::riscv_vloxseg7:
1154     case Intrinsic::riscv_vloxseg8:
1155       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1156       return;
1157     case Intrinsic::riscv_vluxseg2:
1158     case Intrinsic::riscv_vluxseg3:
1159     case Intrinsic::riscv_vluxseg4:
1160     case Intrinsic::riscv_vluxseg5:
1161     case Intrinsic::riscv_vluxseg6:
1162     case Intrinsic::riscv_vluxseg7:
1163     case Intrinsic::riscv_vluxseg8:
1164       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1165       return;
1166     case Intrinsic::riscv_vloxseg2_mask:
1167     case Intrinsic::riscv_vloxseg3_mask:
1168     case Intrinsic::riscv_vloxseg4_mask:
1169     case Intrinsic::riscv_vloxseg5_mask:
1170     case Intrinsic::riscv_vloxseg6_mask:
1171     case Intrinsic::riscv_vloxseg7_mask:
1172     case Intrinsic::riscv_vloxseg8_mask:
1173       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1174       return;
1175     case Intrinsic::riscv_vluxseg2_mask:
1176     case Intrinsic::riscv_vluxseg3_mask:
1177     case Intrinsic::riscv_vluxseg4_mask:
1178     case Intrinsic::riscv_vluxseg5_mask:
1179     case Intrinsic::riscv_vluxseg6_mask:
1180     case Intrinsic::riscv_vluxseg7_mask:
1181     case Intrinsic::riscv_vluxseg8_mask:
1182       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1183       return;
1184     case Intrinsic::riscv_vlseg8ff:
1185     case Intrinsic::riscv_vlseg7ff:
1186     case Intrinsic::riscv_vlseg6ff:
1187     case Intrinsic::riscv_vlseg5ff:
1188     case Intrinsic::riscv_vlseg4ff:
1189     case Intrinsic::riscv_vlseg3ff:
1190     case Intrinsic::riscv_vlseg2ff: {
1191       selectVLSEGFF(Node, /*IsMasked*/ false);
1192       return;
1193     }
1194     case Intrinsic::riscv_vlseg8ff_mask:
1195     case Intrinsic::riscv_vlseg7ff_mask:
1196     case Intrinsic::riscv_vlseg6ff_mask:
1197     case Intrinsic::riscv_vlseg5ff_mask:
1198     case Intrinsic::riscv_vlseg4ff_mask:
1199     case Intrinsic::riscv_vlseg3ff_mask:
1200     case Intrinsic::riscv_vlseg2ff_mask: {
1201       selectVLSEGFF(Node, /*IsMasked*/ true);
1202       return;
1203     }
1204     case Intrinsic::riscv_vloxei:
1205     case Intrinsic::riscv_vloxei_mask:
1206     case Intrinsic::riscv_vluxei:
1207     case Intrinsic::riscv_vluxei_mask: {
1208       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1209                       IntNo == Intrinsic::riscv_vluxei_mask;
1210       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1211                        IntNo == Intrinsic::riscv_vloxei_mask;
1212 
1213       MVT VT = Node->getSimpleValueType(0);
1214       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1215 
1216       unsigned CurOp = 2;
1217       // Masked intrinsic only have TU version pseduo instructions.
1218       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1219       SmallVector<SDValue, 8> Operands;
1220       if (IsTU)
1221         Operands.push_back(Node->getOperand(CurOp++));
1222       else
1223         // Skip the undef passthru operand for nomask TA version pseudo
1224         CurOp++;
1225 
1226       MVT IndexVT;
1227       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1228                                  /*IsStridedOrIndexed*/ true, Operands,
1229                                  /*IsLoad=*/true, &IndexVT);
1230 
1231       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1232              "Element count mismatch");
1233 
1234       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1235       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1236       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1237       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1238         report_fatal_error("The V extension does not support EEW=64 for index "
1239                            "values when XLEN=32");
1240       }
1241       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1242           IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1243           static_cast<unsigned>(IndexLMUL));
1244       MachineSDNode *Load =
1245           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1246 
1247       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1248         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1249 
1250       ReplaceNode(Node, Load);
1251       return;
1252     }
1253     case Intrinsic::riscv_vlm:
1254     case Intrinsic::riscv_vle:
1255     case Intrinsic::riscv_vle_mask:
1256     case Intrinsic::riscv_vlse:
1257     case Intrinsic::riscv_vlse_mask: {
1258       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1259                       IntNo == Intrinsic::riscv_vlse_mask;
1260       bool IsStrided =
1261           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1262 
1263       MVT VT = Node->getSimpleValueType(0);
1264       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1265 
1266       unsigned CurOp = 2;
1267       // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1268       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1269       // Masked intrinsic only have TU version pseduo instructions.
1270       bool IsTU =
1271           HasPassthruOperand &&
1272           ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
1273       SmallVector<SDValue, 8> Operands;
1274       if (IsTU)
1275         Operands.push_back(Node->getOperand(CurOp++));
1276       else if (HasPassthruOperand)
1277         // Skip the undef passthru operand for nomask TA version pseudo
1278         CurOp++;
1279 
1280       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1281                                  Operands, /*IsLoad=*/true);
1282 
1283       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1284       const RISCV::VLEPseudo *P =
1285           RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1286                               static_cast<unsigned>(LMUL));
1287       MachineSDNode *Load =
1288           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1289 
1290       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1291         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1292 
1293       ReplaceNode(Node, Load);
1294       return;
1295     }
1296     case Intrinsic::riscv_vleff:
1297     case Intrinsic::riscv_vleff_mask: {
1298       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1299 
1300       MVT VT = Node->getSimpleValueType(0);
1301       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1302 
1303       unsigned CurOp = 2;
1304       // Masked intrinsic only have TU version pseduo instructions.
1305       bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
1306       SmallVector<SDValue, 7> Operands;
1307       if (IsTU)
1308         Operands.push_back(Node->getOperand(CurOp++));
1309       else
1310         // Skip the undef passthru operand for nomask TA version pseudo
1311         CurOp++;
1312 
1313       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1314                                  /*IsStridedOrIndexed*/ false, Operands,
1315                                  /*IsLoad=*/true);
1316 
1317       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1318       const RISCV::VLEPseudo *P =
1319           RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1320                               Log2SEW, static_cast<unsigned>(LMUL));
1321       MachineSDNode *Load =
1322           CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
1323                                  MVT::Other, MVT::Glue, Operands);
1324       SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
1325                                               /*Glue*/ SDValue(Load, 2));
1326 
1327       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1328         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1329 
1330       ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
1331       ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL
1332       ReplaceUses(SDValue(Node, 2), SDValue(Load, 1));   // Chain
1333       CurDAG->RemoveDeadNode(Node);
1334       return;
1335     }
1336     }
1337     break;
1338   }
1339   case ISD::INTRINSIC_VOID: {
1340     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1341     switch (IntNo) {
1342     case Intrinsic::riscv_vsseg2:
1343     case Intrinsic::riscv_vsseg3:
1344     case Intrinsic::riscv_vsseg4:
1345     case Intrinsic::riscv_vsseg5:
1346     case Intrinsic::riscv_vsseg6:
1347     case Intrinsic::riscv_vsseg7:
1348     case Intrinsic::riscv_vsseg8: {
1349       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1350       return;
1351     }
1352     case Intrinsic::riscv_vsseg2_mask:
1353     case Intrinsic::riscv_vsseg3_mask:
1354     case Intrinsic::riscv_vsseg4_mask:
1355     case Intrinsic::riscv_vsseg5_mask:
1356     case Intrinsic::riscv_vsseg6_mask:
1357     case Intrinsic::riscv_vsseg7_mask:
1358     case Intrinsic::riscv_vsseg8_mask: {
1359       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1360       return;
1361     }
1362     case Intrinsic::riscv_vssseg2:
1363     case Intrinsic::riscv_vssseg3:
1364     case Intrinsic::riscv_vssseg4:
1365     case Intrinsic::riscv_vssseg5:
1366     case Intrinsic::riscv_vssseg6:
1367     case Intrinsic::riscv_vssseg7:
1368     case Intrinsic::riscv_vssseg8: {
1369       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1370       return;
1371     }
1372     case Intrinsic::riscv_vssseg2_mask:
1373     case Intrinsic::riscv_vssseg3_mask:
1374     case Intrinsic::riscv_vssseg4_mask:
1375     case Intrinsic::riscv_vssseg5_mask:
1376     case Intrinsic::riscv_vssseg6_mask:
1377     case Intrinsic::riscv_vssseg7_mask:
1378     case Intrinsic::riscv_vssseg8_mask: {
1379       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1380       return;
1381     }
1382     case Intrinsic::riscv_vsoxseg2:
1383     case Intrinsic::riscv_vsoxseg3:
1384     case Intrinsic::riscv_vsoxseg4:
1385     case Intrinsic::riscv_vsoxseg5:
1386     case Intrinsic::riscv_vsoxseg6:
1387     case Intrinsic::riscv_vsoxseg7:
1388     case Intrinsic::riscv_vsoxseg8:
1389       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1390       return;
1391     case Intrinsic::riscv_vsuxseg2:
1392     case Intrinsic::riscv_vsuxseg3:
1393     case Intrinsic::riscv_vsuxseg4:
1394     case Intrinsic::riscv_vsuxseg5:
1395     case Intrinsic::riscv_vsuxseg6:
1396     case Intrinsic::riscv_vsuxseg7:
1397     case Intrinsic::riscv_vsuxseg8:
1398       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1399       return;
1400     case Intrinsic::riscv_vsoxseg2_mask:
1401     case Intrinsic::riscv_vsoxseg3_mask:
1402     case Intrinsic::riscv_vsoxseg4_mask:
1403     case Intrinsic::riscv_vsoxseg5_mask:
1404     case Intrinsic::riscv_vsoxseg6_mask:
1405     case Intrinsic::riscv_vsoxseg7_mask:
1406     case Intrinsic::riscv_vsoxseg8_mask:
1407       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1408       return;
1409     case Intrinsic::riscv_vsuxseg2_mask:
1410     case Intrinsic::riscv_vsuxseg3_mask:
1411     case Intrinsic::riscv_vsuxseg4_mask:
1412     case Intrinsic::riscv_vsuxseg5_mask:
1413     case Intrinsic::riscv_vsuxseg6_mask:
1414     case Intrinsic::riscv_vsuxseg7_mask:
1415     case Intrinsic::riscv_vsuxseg8_mask:
1416       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1417       return;
1418     case Intrinsic::riscv_vsoxei:
1419     case Intrinsic::riscv_vsoxei_mask:
1420     case Intrinsic::riscv_vsuxei:
1421     case Intrinsic::riscv_vsuxei_mask: {
1422       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1423                       IntNo == Intrinsic::riscv_vsuxei_mask;
1424       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1425                        IntNo == Intrinsic::riscv_vsoxei_mask;
1426 
1427       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1428       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1429 
1430       unsigned CurOp = 2;
1431       SmallVector<SDValue, 8> Operands;
1432       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1433 
1434       MVT IndexVT;
1435       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1436                                  /*IsStridedOrIndexed*/ true, Operands,
1437                                  /*IsLoad=*/false, &IndexVT);
1438 
1439       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1440              "Element count mismatch");
1441 
1442       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1443       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1444       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1445       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1446         report_fatal_error("The V extension does not support EEW=64 for index "
1447                            "values when XLEN=32");
1448       }
1449       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1450           IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1451           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1452       MachineSDNode *Store =
1453           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1454 
1455       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1456         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1457 
1458       ReplaceNode(Node, Store);
1459       return;
1460     }
1461     case Intrinsic::riscv_vsm:
1462     case Intrinsic::riscv_vse:
1463     case Intrinsic::riscv_vse_mask:
1464     case Intrinsic::riscv_vsse:
1465     case Intrinsic::riscv_vsse_mask: {
1466       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1467                       IntNo == Intrinsic::riscv_vsse_mask;
1468       bool IsStrided =
1469           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1470 
1471       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1472       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1473 
1474       unsigned CurOp = 2;
1475       SmallVector<SDValue, 8> Operands;
1476       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1477 
1478       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1479                                  Operands);
1480 
1481       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1482       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1483           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1484       MachineSDNode *Store =
1485           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1486       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1487         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1488 
1489       ReplaceNode(Node, Store);
1490       return;
1491     }
1492     }
1493     break;
1494   }
1495   case ISD::BITCAST: {
1496     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1497     // Just drop bitcasts between vectors if both are fixed or both are
1498     // scalable.
1499     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1500         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1501       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1502       CurDAG->RemoveDeadNode(Node);
1503       return;
1504     }
1505     break;
1506   }
1507   case ISD::INSERT_SUBVECTOR: {
1508     SDValue V = Node->getOperand(0);
1509     SDValue SubV = Node->getOperand(1);
1510     SDLoc DL(SubV);
1511     auto Idx = Node->getConstantOperandVal(2);
1512     MVT SubVecVT = SubV.getSimpleValueType();
1513 
1514     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1515     MVT SubVecContainerVT = SubVecVT;
1516     // Establish the correct scalable-vector types for any fixed-length type.
1517     if (SubVecVT.isFixedLengthVector())
1518       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1519     if (VT.isFixedLengthVector())
1520       VT = TLI.getContainerForFixedLengthVector(VT);
1521 
1522     const auto *TRI = Subtarget->getRegisterInfo();
1523     unsigned SubRegIdx;
1524     std::tie(SubRegIdx, Idx) =
1525         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1526             VT, SubVecContainerVT, Idx, TRI);
1527 
1528     // If the Idx hasn't been completely eliminated then this is a subvector
1529     // insert which doesn't naturally align to a vector register. These must
1530     // be handled using instructions to manipulate the vector registers.
1531     if (Idx != 0)
1532       break;
1533 
1534     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1535     bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1536                            SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1537                            SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1538     (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1539     assert((!IsSubVecPartReg || V.isUndef()) &&
1540            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1541            "the subvector is smaller than a full-sized register");
1542 
1543     // If we haven't set a SubRegIdx, then we must be going between
1544     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1545     if (SubRegIdx == RISCV::NoSubRegister) {
1546       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1547       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1548                  InRegClassID &&
1549              "Unexpected subvector extraction");
1550       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1551       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1552                                                DL, VT, SubV, RC);
1553       ReplaceNode(Node, NewNode);
1554       return;
1555     }
1556 
1557     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
1558     ReplaceNode(Node, Insert.getNode());
1559     return;
1560   }
1561   case ISD::EXTRACT_SUBVECTOR: {
1562     SDValue V = Node->getOperand(0);
1563     auto Idx = Node->getConstantOperandVal(1);
1564     MVT InVT = V.getSimpleValueType();
1565     SDLoc DL(V);
1566 
1567     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1568     MVT SubVecContainerVT = VT;
1569     // Establish the correct scalable-vector types for any fixed-length type.
1570     if (VT.isFixedLengthVector())
1571       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
1572     if (InVT.isFixedLengthVector())
1573       InVT = TLI.getContainerForFixedLengthVector(InVT);
1574 
1575     const auto *TRI = Subtarget->getRegisterInfo();
1576     unsigned SubRegIdx;
1577     std::tie(SubRegIdx, Idx) =
1578         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1579             InVT, SubVecContainerVT, Idx, TRI);
1580 
1581     // If the Idx hasn't been completely eliminated then this is a subvector
1582     // extract which doesn't naturally align to a vector register. These must
1583     // be handled using instructions to manipulate the vector registers.
1584     if (Idx != 0)
1585       break;
1586 
1587     // If we haven't set a SubRegIdx, then we must be going between
1588     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
1589     if (SubRegIdx == RISCV::NoSubRegister) {
1590       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
1591       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1592                  InRegClassID &&
1593              "Unexpected subvector extraction");
1594       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1595       SDNode *NewNode =
1596           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
1597       ReplaceNode(Node, NewNode);
1598       return;
1599     }
1600 
1601     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
1602     ReplaceNode(Node, Extract.getNode());
1603     return;
1604   }
1605   case ISD::SPLAT_VECTOR:
1606   case RISCVISD::VMV_S_X_VL:
1607   case RISCVISD::VFMV_S_F_VL:
1608   case RISCVISD::VMV_V_X_VL:
1609   case RISCVISD::VFMV_V_F_VL: {
1610     // Try to match splat of a scalar load to a strided load with stride of x0.
1611     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
1612                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
1613     if (IsScalarMove && !Node->getOperand(0).isUndef())
1614       break;
1615     SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
1616     auto *Ld = dyn_cast<LoadSDNode>(Src);
1617     if (!Ld)
1618       break;
1619     EVT MemVT = Ld->getMemoryVT();
1620     // The memory VT should be the same size as the element type.
1621     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
1622       break;
1623     if (!IsProfitableToFold(Src, Node, Node) ||
1624         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
1625       break;
1626 
1627     SDValue VL;
1628     if (Node->getOpcode() == ISD::SPLAT_VECTOR)
1629       VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1630     else if (IsScalarMove) {
1631       // We could deal with more VL if we update the VSETVLI insert pass to
1632       // avoid introducing more VSETVLI.
1633       if (!isOneConstant(Node->getOperand(2)))
1634         break;
1635       selectVLOp(Node->getOperand(2), VL);
1636     } else
1637       selectVLOp(Node->getOperand(1), VL);
1638 
1639     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1640     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
1641 
1642     SDValue Operands[] = {Ld->getBasePtr(),
1643                           CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
1644                           Ld->getChain()};
1645 
1646     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1647     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
1648         /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
1649         Log2SEW, static_cast<unsigned>(LMUL));
1650     MachineSDNode *Load =
1651         CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1652 
1653     if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1654       CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1655 
1656     ReplaceNode(Node, Load);
1657     return;
1658   }
1659   }
1660 
1661   // Select the default instruction.
1662   SelectCode(Node);
1663 }
1664 
1665 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
1666     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
1667   switch (ConstraintID) {
1668   case InlineAsm::Constraint_m:
1669     // We just support simple memory operands that have a single address
1670     // operand and need no special handling.
1671     OutOps.push_back(Op);
1672     return false;
1673   case InlineAsm::Constraint_A:
1674     OutOps.push_back(Op);
1675     return false;
1676   default:
1677     break;
1678   }
1679 
1680   return true;
1681 }
1682 
1683 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
1684   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
1685     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1686     return true;
1687   }
1688   return false;
1689 }
1690 
1691 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
1692   // If this is FrameIndex, select it directly. Otherwise just let it get
1693   // selected to a register independently.
1694   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr))
1695     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
1696   else
1697     Base = Addr;
1698   return true;
1699 }
1700 
1701 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
1702                                         SDValue &ShAmt) {
1703   // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
1704   // amount. If there is an AND on the shift amount, we can bypass it if it
1705   // doesn't affect any of those bits.
1706   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
1707     const APInt &AndMask = N->getConstantOperandAPInt(1);
1708 
1709     // Since the max shift amount is a power of 2 we can subtract 1 to make a
1710     // mask that covers the bits needed to represent all shift amounts.
1711     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
1712     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
1713 
1714     if (ShMask.isSubsetOf(AndMask)) {
1715       ShAmt = N.getOperand(0);
1716       return true;
1717     }
1718 
1719     // SimplifyDemandedBits may have optimized the mask so try restoring any
1720     // bits that are known zero.
1721     KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
1722     if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
1723       ShAmt = N.getOperand(0);
1724       return true;
1725     }
1726   }
1727 
1728   ShAmt = N;
1729   return true;
1730 }
1731 
1732 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
1733   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1734       cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
1735     Val = N.getOperand(0);
1736     return true;
1737   }
1738   MVT VT = N.getSimpleValueType();
1739   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
1740     Val = N;
1741     return true;
1742   }
1743 
1744   return false;
1745 }
1746 
1747 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
1748   if (N.getOpcode() == ISD::AND) {
1749     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
1750     if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) {
1751       Val = N.getOperand(0);
1752       return true;
1753     }
1754   }
1755   MVT VT = N.getSimpleValueType();
1756   APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32);
1757   if (CurDAG->MaskedValueIsZero(N, Mask)) {
1758     Val = N;
1759     return true;
1760   }
1761 
1762   return false;
1763 }
1764 
1765 // Return true if all users of this SDNode* only consume the lower \p Bits.
1766 // This can be used to form W instructions for add/sub/mul/shl even when the
1767 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
1768 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
1769 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
1770 // the add/sub/mul/shl to become non-W instructions. By checking the users we
1771 // may be able to use a W instruction and CSE with the other instruction if
1772 // this has happened. We could try to detect that the CSE opportunity exists
1773 // before doing this, but that would be more complicated.
1774 // TODO: Does this need to look through AND/OR/XOR to their users to find more
1775 // opportunities.
1776 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
1777   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
1778           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
1779           Node->getOpcode() == ISD::SRL ||
1780           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
1781           isa<ConstantSDNode>(Node)) &&
1782          "Unexpected opcode");
1783 
1784   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
1785     SDNode *User = *UI;
1786     // Users of this node should have already been instruction selected
1787     if (!User->isMachineOpcode())
1788       return false;
1789 
1790     // TODO: Add more opcodes?
1791     switch (User->getMachineOpcode()) {
1792     default:
1793       return false;
1794     case RISCV::ADDW:
1795     case RISCV::ADDIW:
1796     case RISCV::SUBW:
1797     case RISCV::MULW:
1798     case RISCV::SLLW:
1799     case RISCV::SLLIW:
1800     case RISCV::SRAW:
1801     case RISCV::SRAIW:
1802     case RISCV::SRLW:
1803     case RISCV::SRLIW:
1804     case RISCV::DIVW:
1805     case RISCV::DIVUW:
1806     case RISCV::REMW:
1807     case RISCV::REMUW:
1808     case RISCV::ROLW:
1809     case RISCV::RORW:
1810     case RISCV::RORIW:
1811     case RISCV::CLZW:
1812     case RISCV::CTZW:
1813     case RISCV::CPOPW:
1814     case RISCV::SLLI_UW:
1815     case RISCV::FCVT_H_W:
1816     case RISCV::FCVT_H_WU:
1817     case RISCV::FCVT_S_W:
1818     case RISCV::FCVT_S_WU:
1819     case RISCV::FCVT_D_W:
1820     case RISCV::FCVT_D_WU:
1821       if (Bits < 32)
1822         return false;
1823       break;
1824     case RISCV::SLLI:
1825       // SLLI only uses the lower (XLen - ShAmt) bits.
1826       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
1827         return false;
1828       break;
1829     case RISCV::ANDI:
1830       if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1))))
1831         return false;
1832       break;
1833     case RISCV::SEXT_B:
1834       if (Bits < 8)
1835         return false;
1836       break;
1837     case RISCV::SEXT_H:
1838     case RISCV::ZEXT_H_RV32:
1839     case RISCV::ZEXT_H_RV64:
1840       if (Bits < 16)
1841         return false;
1842       break;
1843     case RISCV::ADD_UW:
1844     case RISCV::SH1ADD_UW:
1845     case RISCV::SH2ADD_UW:
1846     case RISCV::SH3ADD_UW:
1847       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
1848       // 32 bits.
1849       if (UI.getOperandNo() != 0 || Bits < 32)
1850         return false;
1851       break;
1852     case RISCV::SB:
1853       if (UI.getOperandNo() != 0 || Bits < 8)
1854         return false;
1855       break;
1856     case RISCV::SH:
1857       if (UI.getOperandNo() != 0 || Bits < 16)
1858         return false;
1859       break;
1860     case RISCV::SW:
1861       if (UI.getOperandNo() != 0 || Bits < 32)
1862         return false;
1863       break;
1864     }
1865   }
1866 
1867   return true;
1868 }
1869 
1870 // Select VL as a 5 bit immediate or a value that will become a register. This
1871 // allows us to choose betwen VSETIVLI or VSETVLI later.
1872 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
1873   auto *C = dyn_cast<ConstantSDNode>(N);
1874   if (C && (isUInt<5>(C->getZExtValue()) ||
1875             C->getSExtValue() == RISCV::VLMaxSentinel))
1876     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
1877                                    N->getValueType(0));
1878   else
1879     VL = N;
1880 
1881   return true;
1882 }
1883 
1884 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
1885   if (N.getOpcode() != ISD::SPLAT_VECTOR &&
1886       N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
1887       N.getOpcode() != RISCVISD::VMV_V_X_VL)
1888     return false;
1889   SplatVal = N.getOperand(0);
1890   return true;
1891 }
1892 
1893 using ValidateFn = bool (*)(int64_t);
1894 
1895 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
1896                                    SelectionDAG &DAG,
1897                                    const RISCVSubtarget &Subtarget,
1898                                    ValidateFn ValidateImm) {
1899   if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
1900        N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
1901        N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
1902       !isa<ConstantSDNode>(N.getOperand(0)))
1903     return false;
1904 
1905   int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
1906 
1907   // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL
1908   // share semantics when the operand type is wider than the resulting vector
1909   // element type: an implicit truncation first takes place. Therefore, perform
1910   // a manual truncation/sign-extension in order to ignore any truncated bits
1911   // and catch any zero-extended immediate.
1912   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
1913   // sign-extending to (XLenVT -1).
1914   MVT XLenVT = Subtarget.getXLenVT();
1915   assert(XLenVT == N.getOperand(0).getSimpleValueType() &&
1916          "Unexpected splat operand type");
1917   MVT EltVT = N.getSimpleValueType().getVectorElementType();
1918   if (EltVT.bitsLT(XLenVT))
1919     SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
1920 
1921   if (!ValidateImm(SplatImm))
1922     return false;
1923 
1924   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
1925   return true;
1926 }
1927 
1928 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
1929   return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
1930                                 [](int64_t Imm) { return isInt<5>(Imm); });
1931 }
1932 
1933 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
1934   return selectVSplatSimmHelper(
1935       N, SplatVal, *CurDAG, *Subtarget,
1936       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
1937 }
1938 
1939 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
1940                                                       SDValue &SplatVal) {
1941   return selectVSplatSimmHelper(
1942       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
1943         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
1944       });
1945 }
1946 
1947 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
1948   if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
1949        N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
1950        N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
1951       !isa<ConstantSDNode>(N.getOperand(0)))
1952     return false;
1953 
1954   int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
1955 
1956   if (!isUInt<5>(SplatImm))
1957     return false;
1958 
1959   SplatVal =
1960       CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
1961 
1962   return true;
1963 }
1964 
1965 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
1966                                        SDValue &Imm) {
1967   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
1968     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
1969 
1970     if (!isInt<5>(ImmVal))
1971       return false;
1972 
1973     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
1974     return true;
1975   }
1976 
1977   return false;
1978 }
1979 
1980 // Merge an ADDI into the offset of a load/store instruction where possible.
1981 // (load (addi base, off1), off2) -> (load base, off1+off2)
1982 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
1983 // This is possible when off1+off2 fits a 12-bit immediate.
1984 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
1985   int OffsetOpIdx;
1986   int BaseOpIdx;
1987 
1988   // Only attempt this optimisation for I-type loads and S-type stores.
1989   switch (N->getMachineOpcode()) {
1990   default:
1991     return false;
1992   case RISCV::LB:
1993   case RISCV::LH:
1994   case RISCV::LW:
1995   case RISCV::LBU:
1996   case RISCV::LHU:
1997   case RISCV::LWU:
1998   case RISCV::LD:
1999   case RISCV::FLH:
2000   case RISCV::FLW:
2001   case RISCV::FLD:
2002     BaseOpIdx = 0;
2003     OffsetOpIdx = 1;
2004     break;
2005   case RISCV::SB:
2006   case RISCV::SH:
2007   case RISCV::SW:
2008   case RISCV::SD:
2009   case RISCV::FSH:
2010   case RISCV::FSW:
2011   case RISCV::FSD:
2012     BaseOpIdx = 1;
2013     OffsetOpIdx = 2;
2014     break;
2015   }
2016 
2017   if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
2018     return false;
2019 
2020   SDValue Base = N->getOperand(BaseOpIdx);
2021 
2022   // If the base is an ADDI, we can merge it in to the load/store.
2023   if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
2024     return false;
2025 
2026   SDValue ImmOperand = Base.getOperand(1);
2027   uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
2028 
2029   if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
2030     int64_t Offset1 = Const->getSExtValue();
2031     int64_t CombinedOffset = Offset1 + Offset2;
2032     if (!isInt<12>(CombinedOffset))
2033       return false;
2034     ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
2035                                            ImmOperand.getValueType());
2036   } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
2037     // If the off1 in (addi base, off1) is a global variable's address (its
2038     // low part, really), then we can rely on the alignment of that variable
2039     // to provide a margin of safety before off1 can overflow the 12 bits.
2040     // Check if off2 falls within that margin; if so off1+off2 can't overflow.
2041     const DataLayout &DL = CurDAG->getDataLayout();
2042     Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
2043     if (Offset2 != 0 && Alignment <= Offset2)
2044       return false;
2045     int64_t Offset1 = GA->getOffset();
2046     int64_t CombinedOffset = Offset1 + Offset2;
2047     ImmOperand = CurDAG->getTargetGlobalAddress(
2048         GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
2049         CombinedOffset, GA->getTargetFlags());
2050   } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
2051     // Ditto.
2052     Align Alignment = CP->getAlign();
2053     if (Offset2 != 0 && Alignment <= Offset2)
2054       return false;
2055     int64_t Offset1 = CP->getOffset();
2056     int64_t CombinedOffset = Offset1 + Offset2;
2057     ImmOperand = CurDAG->getTargetConstantPool(
2058         CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
2059         CombinedOffset, CP->getTargetFlags());
2060   } else {
2061     return false;
2062   }
2063 
2064   LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
2065   LLVM_DEBUG(Base->dump(CurDAG));
2066   LLVM_DEBUG(dbgs() << "\nN: ");
2067   LLVM_DEBUG(N->dump(CurDAG));
2068   LLVM_DEBUG(dbgs() << "\n");
2069 
2070   // Modify the offset operand of the load/store.
2071   if (BaseOpIdx == 0) // Load
2072     CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
2073                                N->getOperand(2));
2074   else // Store
2075     CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
2076                                ImmOperand, N->getOperand(3));
2077 
2078   return true;
2079 }
2080 
2081 // Try to remove sext.w if the input is a W instruction or can be made into
2082 // a W instruction cheaply.
2083 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
2084   // Look for the sext.w pattern, addiw rd, rs1, 0.
2085   if (N->getMachineOpcode() != RISCV::ADDIW ||
2086       !isNullConstant(N->getOperand(1)))
2087     return false;
2088 
2089   SDValue N0 = N->getOperand(0);
2090   if (!N0.isMachineOpcode())
2091     return false;
2092 
2093   switch (N0.getMachineOpcode()) {
2094   default:
2095     break;
2096   case RISCV::ADD:
2097   case RISCV::ADDI:
2098   case RISCV::SUB:
2099   case RISCV::MUL:
2100   case RISCV::SLLI: {
2101     // Convert sext.w+add/sub/mul to their W instructions. This will create
2102     // a new independent instruction. This improves latency.
2103     unsigned Opc;
2104     switch (N0.getMachineOpcode()) {
2105     default:
2106       llvm_unreachable("Unexpected opcode!");
2107     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
2108     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
2109     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
2110     case RISCV::MUL:  Opc = RISCV::MULW;  break;
2111     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
2112     }
2113 
2114     SDValue N00 = N0.getOperand(0);
2115     SDValue N01 = N0.getOperand(1);
2116 
2117     // Shift amount needs to be uimm5.
2118     if (N0.getMachineOpcode() == RISCV::SLLI &&
2119         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
2120       break;
2121 
2122     SDNode *Result =
2123         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
2124                                N00, N01);
2125     ReplaceUses(N, Result);
2126     return true;
2127   }
2128   case RISCV::ADDW:
2129   case RISCV::ADDIW:
2130   case RISCV::SUBW:
2131   case RISCV::MULW:
2132   case RISCV::SLLIW:
2133     // Result is already sign extended just remove the sext.w.
2134     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
2135     ReplaceUses(N, N0.getNode());
2136     return true;
2137   }
2138 
2139   return false;
2140 }
2141 
2142 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
2143 // for instruction scheduling.
2144 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
2145   return new RISCVDAGToDAGISel(TM);
2146 }
2147